# Series

In [2]:
import numpy as np
import pandas as pd

In [3]:
my_series = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])
my_series

a   -0.194296
b   -1.045299
c    0.427016
d   -2.195521
e   -1.256941
dtype: float64

In [5]:
my_series = pd.Series([1,2,3,4,5], index=['a', 'b', 'c', 'd', 'e'])
my_series

a    1
b    2
c    3
d    4
e    5
dtype: int64

In [6]:
my_series.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [7]:
my_series = pd.Series([1,2,3,4,5])
my_series

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [8]:
my_series.index

RangeIndex(start=0, stop=5, step=1)

In [9]:
my_series[:3]

0    1
1    2
2    3
dtype: int64

In [11]:
my_dict = {'c1' : 1j, 'c2' : 5.0, 'c3' : 1}
my_series = pd.Series(my_dict)
my_series

c1        1j
c2    (5+0j)
c3    (1+0j)
dtype: complex128

In [13]:
my_series.c2

(5+0j)

In [14]:
my_series['c3']

(1+0j)

In [16]:
my_series.get('c1')

1j

In [18]:
print(my_series.get('not available'))

None


In [19]:
pd.Series(1, index=['a', 'b', 'c', 'd', 'e'])

a    1
b    1
c    1
d    1
e    1
dtype: int64

In [20]:
pd.Series([1,2,3,np.nan,5,6])

0    1.0
1    2.0
2    3.0
3    NaN
4    5.0
5    6.0
dtype: float64

In [22]:
pd.Series( np.array(np.arange(12)))

0      0
1      1
2      2
3      3
4      4
5      5
6      6
7      7
8      8
9      9
10    10
11    11
dtype: int64

In [23]:
pd.date_range('20180801', periods=8)

DatetimeIndex(['2018-08-01', '2018-08-02', '2018-08-03', '2018-08-04',
               '2018-08-05', '2018-08-06', '2018-08-07', '2018-08-08'],
              dtype='datetime64[ns]', freq='D')

In [27]:
pd.Series( np.array(np.arange(12)), index = pd.date_range('20180801', periods=12))

2018-08-01     0
2018-08-02     1
2018-08-03     2
2018-08-04     3
2018-08-05     4
2018-08-06     5
2018-08-07     6
2018-08-08     7
2018-08-09     8
2018-08-10     9
2018-08-11    10
2018-08-12    11
Freq: D, dtype: int64

# Dataframes

In [34]:
my_df = pd.DataFrame(np.array(np.arange(24)).reshape((8,3)), 
              index = pd.date_range('20160101', periods=8),
              columns = ['c1','c2','c3'])
my_df

Unnamed: 0,c1,c2,c3
2016-01-01,0,1,2
2016-01-02,3,4,5
2016-01-03,6,7,8
2016-01-04,9,10,11
2016-01-05,12,13,14
2016-01-06,15,16,17
2016-01-07,18,19,20
2016-01-08,21,22,23


In [30]:
df_from_dictionary = pd.DataFrame({ 
                         'float' : 1.,
                         'time' : pd.Timestamp('20160825'),
                         'series' : pd.Series(1,index=list(range(4)),dtype='float32'),
                         'array' : np.array([3] * 4,dtype='int32'),
                         'categories' : pd.Categorical(["test","train","taxes","tools"]),
                         'dull' : 'boring data' 
                      })
df_from_dictionary

Unnamed: 0,array,categories,dull,float,series,time
0,3,test,boring data,1.0,1.0,2016-08-25
1,3,train,boring data,1.0,1.0,2016-08-25
2,3,taxes,boring data,1.0,1.0,2016-08-25
3,3,tools,boring data,1.0,1.0,2016-08-25


In [31]:
df_from_dictionary.dtypes

array                  int32
categories          category
dull                  object
float                float64
series               float32
time          datetime64[ns]
dtype: object

In [35]:
my_df.head()

Unnamed: 0,c1,c2,c3
2016-01-01,0,1,2
2016-01-02,3,4,5
2016-01-03,6,7,8
2016-01-04,9,10,11
2016-01-05,12,13,14


In [36]:
my_df.tail(2)

Unnamed: 0,c1,c2,c3
2016-01-07,18,19,20
2016-01-08,21,22,23


In [37]:
my_df.values

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14],
       [15, 16, 17],
       [18, 19, 20],
       [21, 22, 23]])

In [38]:
my_df.index

DatetimeIndex(['2016-01-01', '2016-01-02', '2016-01-03', '2016-01-04',
               '2016-01-05', '2016-01-06', '2016-01-07', '2016-01-08'],
              dtype='datetime64[ns]', freq='D')

In [39]:
my_df.describe()

Unnamed: 0,c1,c2,c3
count,8.0,8.0,8.0
mean,10.5,11.5,12.5
std,7.348469,7.348469,7.348469
min,0.0,1.0,2.0
25%,5.25,6.25,7.25
50%,10.5,11.5,12.5
75%,15.75,16.75,17.75
max,21.0,22.0,23.0


In [40]:
my_df.T

Unnamed: 0,2016-01-01 00:00:00,2016-01-02 00:00:00,2016-01-03 00:00:00,2016-01-04 00:00:00,2016-01-05 00:00:00,2016-01-06 00:00:00,2016-01-07 00:00:00,2016-01-08 00:00:00
c1,0,3,6,9,12,15,18,21
c2,1,4,7,10,13,16,19,22
c3,2,5,8,11,14,17,20,23


In [41]:
my_df.sort_index(axis=1, ascending=False)

Unnamed: 0,c3,c2,c1
2016-01-01,2,1,0
2016-01-02,5,4,3
2016-01-03,8,7,6
2016-01-04,11,10,9
2016-01-05,14,13,12
2016-01-06,17,16,15
2016-01-07,20,19,18
2016-01-08,23,22,21


In [43]:
my_df.sort_values(by='c2', ascending=False)

Unnamed: 0,c1,c2,c3
2016-01-08,21,22,23
2016-01-07,18,19,20
2016-01-06,15,16,17
2016-01-05,12,13,14
2016-01-04,9,10,11
2016-01-03,6,7,8
2016-01-02,3,4,5
2016-01-01,0,1,2
