## Pandas

In [1]:
import numpy as np
import pandas as pd

### You can create a series object with a list or numpy array or a dictionary

### Series with List

In [2]:
my_list = [i for i in range(0,5)]
my_index =[i for i in 'risha']

In [3]:
my_list, my_index

([0, 1, 2, 3, 4], ['r', 'i', 's', 'h', 'a'])

In [4]:
pd.Series(data=my_list, index=my_index)

r    0
i    1
s    2
h    3
a    4
dtype: int64

In [5]:
pd.Series(my_list)

0    0
1    1
2    2
3    3
4    4
dtype: int64

In [6]:
pd.Series(my_list,my_index)

r    0
i    1
s    2
h    3
a    4
dtype: int64

### Series with NumPy array

In [7]:
array = np.arange(0,5)

In [8]:
array, my_index

(array([0, 1, 2, 3, 4]), ['r', 'i', 's', 'h', 'a'])

In [9]:
pd.Series(data = array)

0    0
1    1
2    2
3    3
4    4
dtype: int32

In [10]:
pd.Series(data = array, index = my_index)

r    0
i    1
s    2
h    3
a    4
dtype: int32

### Series with Dictionary

In [11]:
dict1 = {'one':1,'two':2,'three':3,'four':4}

In [12]:
dict1

{'one': 1, 'two': 2, 'three': 3, 'four': 4}

In [13]:
ser1 = pd.Series(dict1)
ser1

one      1
two      2
three    3
four     4
dtype: int64

## Series can hold wide variety of datatypes

In [14]:
## strings
list1 = ['a','b','c','d']
list2 = [1,2,3,4]
ser2 = pd.Series(data= list1 , index= list2)
ser2

1    a
2    b
3    c
4    d
dtype: object

In [15]:
## holding functions
list1 = [sum,min,max,type,range]
list2 = ['sum','min','max','type','range']
ser3 = pd.Series(data=list1, index=list2)
ser3

sum      <built-in function sum>
min      <built-in function min>
max      <built-in function max>
type              <class 'type'>
range            <class 'range'>
dtype: object

## Grabbing data from series

In [16]:
ser2[1]

'a'

In [17]:
dict1 = {'Mumbai':100,'Delhi':200,'Kolkata':300,'Chennai':400,'Bengaluru':500}
ser1 = pd.Series(dict1)
ser2 = pd.Series({'Mumbai':100,'Kolkata':300,'Chennai':400,'Bengaluru':500})
ser3 = pd.Series({'Mumbai':100,'Delhi':200,'Kolkata':300,'Bengaluru':500})

In [18]:
ser1['Mumbai']

100

In [19]:
ser1

Mumbai       100
Delhi        200
Kolkata      300
Chennai      400
Bengaluru    500
dtype: int64

In [20]:
ser2

Mumbai       100
Kolkata      300
Chennai      400
Bengaluru    500
dtype: int64

In [21]:
ser3

Mumbai       100
Delhi        200
Kolkata      300
Bengaluru    500
dtype: int64

### Basic operations on Series

In [22]:
ser1 +  ser2

Bengaluru    1000.0
Chennai       800.0
Delhi           NaN
Kolkata       600.0
Mumbai        200.0
dtype: float64

In [23]:
ser2 + ser3

Bengaluru    1000.0
Chennai         NaN
Delhi           NaN
Kolkata       600.0
Mumbai        200.0
dtype: float64

## Basic functions and attributes

In [24]:
ser = ser2 + ser3
ser

Bengaluru    1000.0
Chennai         NaN
Delhi           NaN
Kolkata       600.0
Mumbai        200.0
dtype: float64

In [25]:
ser.isnull()

Bengaluru    False
Chennai       True
Delhi         True
Kolkata      False
Mumbai       False
dtype: bool

In [26]:
ser.notnull()

Bengaluru     True
Chennai      False
Delhi        False
Kolkata       True
Mumbai        True
dtype: bool

In [27]:
ser.size

5

In [28]:
ser.axes

[Index(['Bengaluru', 'Chennai', 'Delhi', 'Kolkata', 'Mumbai'], dtype='object')]

In [29]:
ser.values

array([1000.,   nan,   nan,  600.,  200.])

In [30]:
ser.empty

False

In [31]:
ser.max()

1000.0

In [32]:
ser.min()

200.0

In [33]:
ser.std()

400.0

In [34]:
ser.var()

160000.0

In [35]:
ser.sum()

1800.0

In [36]:
ser.mean()

600.0

In [37]:
ser.head(2)

Bengaluru    1000.0
Chennai         NaN
dtype: float64

In [38]:
ser.tail(3)

Delhi        NaN
Kolkata    600.0
Mumbai     200.0
dtype: float64

## Dataframes

### Creating a dataframe

In [40]:
rows = 'India Australia Japan US Russia England France Germany Bhutan Fiji'.split()
cols = 'c1 c2 c3 c4 c5 c6 c7 c8 c9 c10'.split()
data = np.random.randint(0,100,100).reshape(10,10)

In [41]:
rows

['India',
 'Australia',
 'Japan',
 'US',
 'Russia',
 'England',
 'France',
 'Germany',
 'Bhutan',
 'Fiji']

In [42]:
cols

['c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9', 'c10']

In [43]:
data

array([[89,  4, 64, 21, 81, 76,  0, 89, 65, 19],
       [61,  9, 85, 89, 55, 16, 41, 20, 92, 66],
       [89, 82, 72, 68, 58, 53, 83, 54, 60, 77],
       [60, 44, 10, 99, 99, 64, 23, 67,  2, 30],
       [13, 39, 99, 82, 81, 57, 50, 52, 75,  8],
       [87, 50, 21, 60, 24, 21, 87, 70,  4, 20],
       [70, 15, 64, 53, 41, 39, 65, 97, 64, 31],
       [98, 42, 99, 53, 90, 74, 87, 33, 22, 11],
       [88, 87, 49, 62, 65, 48, 45, 86, 63,  2],
       [94, 48, 67, 84, 39,  4, 63, 84, 11, 40]])

In [44]:
df = pd.DataFrame(data=data, index=rows, columns=cols)

In [45]:
df

Unnamed: 0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10
India,89,4,64,21,81,76,0,89,65,19
Australia,61,9,85,89,55,16,41,20,92,66
Japan,89,82,72,68,58,53,83,54,60,77
US,60,44,10,99,99,64,23,67,2,30
Russia,13,39,99,82,81,57,50,52,75,8
England,87,50,21,60,24,21,87,70,4,20
France,70,15,64,53,41,39,65,97,64,31
Germany,98,42,99,53,90,74,87,33,22,11
Bhutan,88,87,49,62,65,48,45,86,63,2
Fiji,94,48,67,84,39,4,63,84,11,40


 ## Getting data out of DataFrames

### Selecting columns

In [46]:
df['c1']

India        89
Australia    61
Japan        89
US           60
Russia       13
England      87
France       70
Germany      98
Bhutan       88
Fiji         94
Name: c1, dtype: int32

In [47]:
df[['c1','c2']]

Unnamed: 0,c1,c2
India,89,4
Australia,61,9
Japan,89,82
US,60,44
Russia,13,39
England,87,50
France,70,15
Germany,98,42
Bhutan,88,87
Fiji,94,48


### Selecting rows

In [48]:
df.loc['India']

c1     89
c2      4
c3     64
c4     21
c5     81
c6     76
c7      0
c8     89
c9     65
c10    19
Name: India, dtype: int32

In [49]:
df.iloc[3]

c1     60
c2     44
c3     10
c4     99
c5     99
c6     64
c7     23
c8     67
c9      2
c10    30
Name: US, dtype: int32

In [50]:
df.loc[['India','England','Russia']]

Unnamed: 0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10
India,89,4,64,21,81,76,0,89,65,19
England,87,50,21,60,24,21,87,70,4,20
Russia,13,39,99,82,81,57,50,52,75,8


### Selecting subset of a dataframe

In [51]:
df

Unnamed: 0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10
India,89,4,64,21,81,76,0,89,65,19
Australia,61,9,85,89,55,16,41,20,92,66
Japan,89,82,72,68,58,53,83,54,60,77
US,60,44,10,99,99,64,23,67,2,30
Russia,13,39,99,82,81,57,50,52,75,8
England,87,50,21,60,24,21,87,70,4,20
France,70,15,64,53,41,39,65,97,64,31
Germany,98,42,99,53,90,74,87,33,22,11
Bhutan,88,87,49,62,65,48,45,86,63,2
Fiji,94,48,67,84,39,4,63,84,11,40


In [52]:
df.loc[['Japan','US','Russia'],['c5','c6','c7']]

Unnamed: 0,c5,c6,c7
Japan,58,53,83
US,99,64,23
Russia,81,57,50


### Adding a new column

In [53]:
df['c11'] = np.arange(0,10)

In [54]:
df

Unnamed: 0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11
India,89,4,64,21,81,76,0,89,65,19,0
Australia,61,9,85,89,55,16,41,20,92,66,1
Japan,89,82,72,68,58,53,83,54,60,77,2
US,60,44,10,99,99,64,23,67,2,30,3
Russia,13,39,99,82,81,57,50,52,75,8,4
England,87,50,21,60,24,21,87,70,4,20,5
France,70,15,64,53,41,39,65,97,64,31,6
Germany,98,42,99,53,90,74,87,33,22,11,7
Bhutan,88,87,49,62,65,48,45,86,63,2,8
Fiji,94,48,67,84,39,4,63,84,11,40,9


### Adding a new row

In [56]:
df.loc['Poland'] = np.random.randint(0,100,11)

In [57]:
df

Unnamed: 0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11
India,89,4,64,21,81,76,0,89,65,19,0
Australia,61,9,85,89,55,16,41,20,92,66,1
Japan,89,82,72,68,58,53,83,54,60,77,2
US,60,44,10,99,99,64,23,67,2,30,3
Russia,13,39,99,82,81,57,50,52,75,8,4
England,87,50,21,60,24,21,87,70,4,20,5
France,70,15,64,53,41,39,65,97,64,31,6
Germany,98,42,99,53,90,74,87,33,22,11,7
Bhutan,88,87,49,62,65,48,45,86,63,2,8
Fiji,94,48,67,84,39,4,63,84,11,40,9


### Dropping a column

In [58]:
df.drop('c11', axis=1)

Unnamed: 0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10
India,89,4,64,21,81,76,0,89,65,19
Australia,61,9,85,89,55,16,41,20,92,66
Japan,89,82,72,68,58,53,83,54,60,77
US,60,44,10,99,99,64,23,67,2,30
Russia,13,39,99,82,81,57,50,52,75,8
England,87,50,21,60,24,21,87,70,4,20
France,70,15,64,53,41,39,65,97,64,31
Germany,98,42,99,53,90,74,87,33,22,11
Bhutan,88,87,49,62,65,48,45,86,63,2
Fiji,94,48,67,84,39,4,63,84,11,40


### Dropping a row

In [59]:
df.drop('Poland',axis=0)

Unnamed: 0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11
India,89,4,64,21,81,76,0,89,65,19,0
Australia,61,9,85,89,55,16,41,20,92,66,1
Japan,89,82,72,68,58,53,83,54,60,77,2
US,60,44,10,99,99,64,23,67,2,30,3
Russia,13,39,99,82,81,57,50,52,75,8,4
England,87,50,21,60,24,21,87,70,4,20,5
France,70,15,64,53,41,39,65,97,64,31,6
Germany,98,42,99,53,90,74,87,33,22,11,7
Bhutan,88,87,49,62,65,48,45,86,63,2,8
Fiji,94,48,67,84,39,4,63,84,11,40,9


## Conditional Selection

In [60]:
df >=50

Unnamed: 0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11
India,True,False,True,False,True,True,False,True,True,False,False
Australia,True,False,True,True,True,False,False,False,True,True,False
Japan,True,True,True,True,True,True,True,True,True,True,False
US,True,False,False,True,True,True,False,True,False,False,False
Russia,False,False,True,True,True,True,True,True,True,False,False
England,True,True,False,True,False,False,True,True,False,False,False
France,True,False,True,True,False,False,True,True,True,False,False
Germany,True,False,True,True,True,True,True,False,False,False,False
Bhutan,True,True,False,True,True,False,False,True,True,False,False
Fiji,True,False,True,True,False,False,True,True,False,False,False


In [61]:
df[df>=50]

Unnamed: 0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11
India,89.0,,64.0,,81.0,76.0,,89.0,65.0,,
Australia,61.0,,85.0,89.0,55.0,,,,92.0,66.0,
Japan,89.0,82.0,72.0,68.0,58.0,53.0,83.0,54.0,60.0,77.0,
US,60.0,,,99.0,99.0,64.0,,67.0,,,
Russia,,,99.0,82.0,81.0,57.0,50.0,52.0,75.0,,
England,87.0,50.0,,60.0,,,87.0,70.0,,,
France,70.0,,64.0,53.0,,,65.0,97.0,64.0,,
Germany,98.0,,99.0,53.0,90.0,74.0,87.0,,,,
Bhutan,88.0,87.0,,62.0,65.0,,,86.0,63.0,,
Fiji,94.0,,67.0,84.0,,,63.0,84.0,,,


In [64]:
df[df['c1']>=50]

Unnamed: 0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11
India,89,4,64,21,81,76,0,89,65,19,0
Australia,61,9,85,89,55,16,41,20,92,66,1
Japan,89,82,72,68,58,53,83,54,60,77,2
US,60,44,10,99,99,64,23,67,2,30,3
England,87,50,21,60,24,21,87,70,4,20,5
France,70,15,64,53,41,39,65,97,64,31,6
Germany,98,42,99,53,90,74,87,33,22,11,7
Bhutan,88,87,49,62,65,48,45,86,63,2,8
Fiji,94,48,67,84,39,4,63,84,11,40,9


In [67]:
df[df['c1']>=50].loc[['India','Bhutan']]

Unnamed: 0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11
India,89,4,64,21,81,76,0,89,65,19,0
Bhutan,88,87,49,62,65,48,45,86,63,2,8


In [69]:
 df[(df>50) | (df%20==0)]

Unnamed: 0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11
India,89.0,,64.0,,81.0,76.0,0.0,89.0,65.0,,0.0
Australia,61.0,,85.0,89.0,55.0,,,20.0,92.0,66.0,
Japan,89.0,82.0,72.0,68.0,58.0,53.0,83.0,54.0,60.0,77.0,
US,60.0,,,99.0,99.0,64.0,,67.0,,,
Russia,,,99.0,82.0,81.0,57.0,,52.0,75.0,,
England,87.0,,,60.0,,,87.0,70.0,,20.0,
France,70.0,,64.0,53.0,,,65.0,97.0,64.0,,
Germany,98.0,,99.0,53.0,90.0,74.0,87.0,,,,
Bhutan,88.0,87.0,,62.0,65.0,,,86.0,63.0,,
Fiji,94.0,,67.0,84.0,,,63.0,84.0,,40.0,


### Useful Methods

In [70]:
### RESET INDEX
df.reset_index()

Unnamed: 0,index,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11
0,India,89,4,64,21,81,76,0,89,65,19,0
1,Australia,61,9,85,89,55,16,41,20,92,66,1
2,Japan,89,82,72,68,58,53,83,54,60,77,2
3,US,60,44,10,99,99,64,23,67,2,30,3
4,Russia,13,39,99,82,81,57,50,52,75,8,4
5,England,87,50,21,60,24,21,87,70,4,20,5
6,France,70,15,64,53,41,39,65,97,64,31,6
7,Germany,98,42,99,53,90,74,87,33,22,11,7
8,Bhutan,88,87,49,62,65,48,45,86,63,2,8
9,Fiji,94,48,67,84,39,4,63,84,11,40,9


In [73]:
df['new_ind'] = 'a b c d e f g h i j k'.split()

In [74]:
df

Unnamed: 0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,new_ind
India,89,4,64,21,81,76,0,89,65,19,0,a
Australia,61,9,85,89,55,16,41,20,92,66,1,b
Japan,89,82,72,68,58,53,83,54,60,77,2,c
US,60,44,10,99,99,64,23,67,2,30,3,d
Russia,13,39,99,82,81,57,50,52,75,8,4,e
England,87,50,21,60,24,21,87,70,4,20,5,f
France,70,15,64,53,41,39,65,97,64,31,6,g
Germany,98,42,99,53,90,74,87,33,22,11,7,h
Bhutan,88,87,49,62,65,48,45,86,63,2,8,i
Fiji,94,48,67,84,39,4,63,84,11,40,9,j


In [75]:
df.set_index('new_ind',inplace=True)

In [76]:
df

Unnamed: 0_level_0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11
new_ind,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
a,89,4,64,21,81,76,0,89,65,19,0
b,61,9,85,89,55,16,41,20,92,66,1
c,89,82,72,68,58,53,83,54,60,77,2
d,60,44,10,99,99,64,23,67,2,30,3
e,13,39,99,82,81,57,50,52,75,8,4
f,87,50,21,60,24,21,87,70,4,20,5
g,70,15,64,53,41,39,65,97,64,31,6
h,98,42,99,53,90,74,87,33,22,11,7
i,88,87,49,62,65,48,45,86,63,2,8
j,94,48,67,84,39,4,63,84,11,40,9
