### Pandas

In this tutorial we will use *pandas* and for some operations, we will also need numpy:

In [1]:
import pandas as pd
import numpy as np

In [2]:
my_series = pd.Series([1, 3, 5, np.nan, 6, 8]);
print(my_series)

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64


In [3]:
my_dates_index = pd.date_range('20190101', periods = 6);
print(my_dates_index)

DatetimeIndex(['2019-01-01', '2019-01-02', '2019-01-03', '2019-01-04',
               '2019-01-05', '2019-01-06'],
              dtype='datetime64[ns]', freq='D')


In [4]:
sample_numpy_data = np.array(np.arange(24)).reshape((6, 4)); 
print(sample_numpy_data)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]]


In [7]:
sample_df = pd.DataFrame(sample_numpy_data, index=my_dates_index, columns=list('ABCD')); 
sample_df

Unnamed: 0,A,B,C,D
2019-01-01,0,1,2,3
2019-01-02,4,5,6,7
2019-01-03,8,9,10,11
2019-01-04,12,13,14,15
2019-01-05,16,17,18,19
2019-01-06,20,21,22,23


Transpose operation:

In [8]:
sample_df.T

Unnamed: 0,2019-01-01,2019-01-02,2019-01-03,2019-01-04,2019-01-05,2019-01-06
A,0,4,8,12,16,20
B,1,5,9,13,17,21
C,2,6,10,14,18,22
D,3,7,11,15,19,23


In [9]:
sample_df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2019-01-01,3,2,1,0
2019-01-02,7,6,5,4
2019-01-03,11,10,9,8
2019-01-04,15,14,13,12
2019-01-05,19,18,17,16
2019-01-06,23,22,21,20


In [10]:
sample_df.sort_values(by="B", ascending=False)

Unnamed: 0,A,B,C,D
2019-01-06,20,21,22,23
2019-01-05,16,17,18,19
2019-01-04,12,13,14,15
2019-01-03,8,9,10,11
2019-01-02,4,5,6,7
2019-01-01,0,1,2,3


Selection by col name:

In [11]:
sample_df['C']

2019-01-01     2
2019-01-02     6
2019-01-03    10
2019-01-04    14
2019-01-05    18
2019-01-06    22
Freq: D, Name: C, dtype: int64

Selecting rows by index.

In [12]:
sample_df[1:4]

Unnamed: 0,A,B,C,D
2019-01-02,4,5,6,7
2019-01-03,8,9,10,11
2019-01-04,12,13,14,15


In [13]:
sample_df['2019-01-01':'2019-01-04']

Unnamed: 0,A,B,C,D
2019-01-01,0,1,2,3
2019-01-02,4,5,6,7
2019-01-03,8,9,10,11
2019-01-04,12,13,14,15


In [14]:
sample_df.loc[my_dates_index[1:4]]

Unnamed: 0,A,B,C,D
2019-01-02,4,5,6,7
2019-01-03,8,9,10,11
2019-01-04,12,13,14,15


In [15]:
sample_df.loc[:]

Unnamed: 0,A,B,C,D
2019-01-01,0,1,2,3
2019-01-02,4,5,6,7
2019-01-03,8,9,10,11
2019-01-04,12,13,14,15
2019-01-05,16,17,18,19
2019-01-06,20,21,22,23


In [16]:
sample_df.loc[:, ['A', 'B']]

Unnamed: 0,A,B
2019-01-01,0,1
2019-01-02,4,5
2019-01-03,8,9
2019-01-04,12,13
2019-01-05,16,17
2019-01-06,20,21


In [17]:
sample_df.loc['2019-01-01':'2019-01-04', ['A', 'B']]

Unnamed: 0,A,B
2019-01-01,0,1
2019-01-02,4,5
2019-01-03,8,9
2019-01-04,12,13
