In [1]:
import numpy as np
import pandas as pd

In [2]:
rand_mat = np.random.randn(5,4)
rand_mat

array([[ 1.5633145 , -0.31285036,  0.87478507,  0.12504477],
       [-1.05640926, -0.00582228,  1.24539579, -1.57195948],
       [ 0.50491881, -0.60622197, -1.74195815, -0.72476432],
       [ 0.45063669,  1.2201413 ,  0.17104031, -1.18374347],
       [ 0.30930653,  0.01022717, -0.55207622, -0.80084596]])

In [3]:
df = pd.DataFrame(rand_mat, index=['A','B','C','D','E'],columns='W X Y Z'.split())

In [4]:
df

Unnamed: 0,W,X,Y,Z
A,1.563315,-0.31285,0.874785,0.125045
B,-1.056409,-0.005822,1.245396,-1.571959
C,0.504919,-0.606222,-1.741958,-0.724764
D,0.450637,1.220141,0.17104,-1.183743
E,0.309307,0.010227,-0.552076,-0.800846


In [5]:
type(df)

pandas.core.frame.DataFrame

In [6]:
df.loc['A']

W    1.563315
X   -0.312850
Y    0.874785
Z    0.125045
Name: A, dtype: float64

In [7]:
type(df.loc['A'])

pandas.core.series.Series

### Selection and Indexing

In [8]:
df.loc['A']

W    1.563315
X   -0.312850
Y    0.874785
Z    0.125045
Name: A, dtype: float64

In [9]:
df.iloc[0]

W    1.563315
X   -0.312850
Y    0.874785
Z    0.125045
Name: A, dtype: float64

In [10]:
df['W']

A    1.563315
B   -1.056409
C    0.504919
D    0.450637
E    0.309307
Name: W, dtype: float64

In [11]:
df[['W','Z']]

Unnamed: 0,W,Z
A,1.563315,0.125045
B,-1.056409,-1.571959
C,0.504919,-0.724764
D,0.450637,-1.183743
E,0.309307,-0.800846


In [12]:
type(df[['W','Z']])

pandas.core.frame.DataFrame

In [13]:
df

Unnamed: 0,W,X,Y,Z
A,1.563315,-0.31285,0.874785,0.125045
B,-1.056409,-0.005822,1.245396,-1.571959
C,0.504919,-0.606222,-1.741958,-0.724764
D,0.450637,1.220141,0.17104,-1.183743
E,0.309307,0.010227,-0.552076,-0.800846


In [14]:
df['New'] = df['W']+df['Y']

In [15]:
df

Unnamed: 0,W,X,Y,Z,New
A,1.563315,-0.31285,0.874785,0.125045,2.4381
B,-1.056409,-0.005822,1.245396,-1.571959,0.188987
C,0.504919,-0.606222,-1.741958,-0.724764,-1.237039
D,0.450637,1.220141,0.17104,-1.183743,0.621677
E,0.309307,0.010227,-0.552076,-0.800846,-0.24277


In [18]:
df.drop('New',axis=1,inplace=True)

In [19]:
df

Unnamed: 0,W,X,Y,Z
A,1.563315,-0.31285,0.874785,0.125045
B,-1.056409,-0.005822,1.245396,-1.571959
C,0.504919,-0.606222,-1.741958,-0.724764
D,0.450637,1.220141,0.17104,-1.183743
E,0.309307,0.010227,-0.552076,-0.800846


### Adding a Row

In [20]:
df.loc['F'] = df.loc['A']+df.loc['B']

In [21]:
df

Unnamed: 0,W,X,Y,Z
A,1.563315,-0.31285,0.874785,0.125045
B,-1.056409,-0.005822,1.245396,-1.571959
C,0.504919,-0.606222,-1.741958,-0.724764
D,0.450637,1.220141,0.17104,-1.183743
E,0.309307,0.010227,-0.552076,-0.800846
F,0.506905,-0.318673,2.120181,-1.446915


In [22]:
df.drop('F',axis=0,inplace=True)

In [23]:
df

Unnamed: 0,W,X,Y,Z
A,1.563315,-0.31285,0.874785,0.125045
B,-1.056409,-0.005822,1.245396,-1.571959
C,0.504919,-0.606222,-1.741958,-0.724764
D,0.450637,1.220141,0.17104,-1.183743
E,0.309307,0.010227,-0.552076,-0.800846


In [24]:
df.loc['F'] = df.loc['A']+df.loc['B']

In [25]:
df

Unnamed: 0,W,X,Y,Z
A,1.563315,-0.31285,0.874785,0.125045
B,-1.056409,-0.005822,1.245396,-1.571959
C,0.504919,-0.606222,-1.741958,-0.724764
D,0.450637,1.220141,0.17104,-1.183743
E,0.309307,0.010227,-0.552076,-0.800846
F,0.506905,-0.318673,2.120181,-1.446915


In [26]:
newind = 'DEL UP UK TN AP KL'.split()

In [27]:
newind

['DEL', 'UP', 'UK', 'TN', 'AP', 'KL']

In [28]:
df['States'] = newind

In [29]:
df

Unnamed: 0,W,X,Y,Z,States
A,1.563315,-0.31285,0.874785,0.125045,DEL
B,-1.056409,-0.005822,1.245396,-1.571959,UP
C,0.504919,-0.606222,-1.741958,-0.724764,UK
D,0.450637,1.220141,0.17104,-1.183743,TN
E,0.309307,0.010227,-0.552076,-0.800846,AP
F,0.506905,-0.318673,2.120181,-1.446915,KL


In [30]:
df.reset_index()

Unnamed: 0,index,W,X,Y,Z,States
0,A,1.563315,-0.31285,0.874785,0.125045,DEL
1,B,-1.056409,-0.005822,1.245396,-1.571959,UP
2,C,0.504919,-0.606222,-1.741958,-0.724764,UK
3,D,0.450637,1.220141,0.17104,-1.183743,TN
4,E,0.309307,0.010227,-0.552076,-0.800846,AP
5,F,0.506905,-0.318673,2.120181,-1.446915,KL


In [31]:
df.set_index('States',inplace=True)

In [32]:
df

Unnamed: 0_level_0,W,X,Y,Z
States,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
DEL,1.563315,-0.31285,0.874785,0.125045
UP,-1.056409,-0.005822,1.245396,-1.571959
UK,0.504919,-0.606222,-1.741958,-0.724764
TN,0.450637,1.220141,0.17104,-1.183743
AP,0.309307,0.010227,-0.552076,-0.800846
KL,0.506905,-0.318673,2.120181,-1.446915


#### Multi-Index Levels

In [33]:
outside = ['North', 'North', 'North', 'South', 'South', 'South']
inside = newind

In [34]:
hier_index = list(zip(outside,inside))

In [35]:
hier_index

[('North', 'DEL'),
 ('North', 'UP'),
 ('North', 'UK'),
 ('South', 'TN'),
 ('South', 'AP'),
 ('South', 'KL')]

In [37]:
hier_index = pd.MultiIndex.from_tuples(hier_index)

In [38]:
hier_index

MultiIndex([('North', 'DEL'),
            ('North',  'UP'),
            ('North',  'UK'),
            ('South',  'TN'),
            ('South',  'AP'),
            ('South',  'KL')],
           )

In [39]:
df.index = hier_index

In [40]:
df

Unnamed: 0,Unnamed: 1,W,X,Y,Z
North,DEL,1.563315,-0.31285,0.874785,0.125045
North,UP,-1.056409,-0.005822,1.245396,-1.571959
North,UK,0.504919,-0.606222,-1.741958,-0.724764
South,TN,0.450637,1.220141,0.17104,-1.183743
South,AP,0.309307,0.010227,-0.552076,-0.800846
South,KL,0.506905,-0.318673,2.120181,-1.446915


In [41]:
df.xs('North')

Unnamed: 0,W,X,Y,Z
DEL,1.563315,-0.31285,0.874785,0.125045
UP,-1.056409,-0.005822,1.245396,-1.571959
UK,0.504919,-0.606222,-1.741958,-0.724764


### Data Input & Output

### CSV Input

In [42]:
df = pd.read_csv('C:\\Users\\AEL04\\Downloads\\example.csv')

In [43]:
df

Unnamed: 0,a,b,c,d
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15


In [44]:
df2 = pd.read_csv('C:/Users/AEL04/Downloads/example.csv')

In [45]:
df2

Unnamed: 0,a,b,c,d
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15


In [46]:
pwd

'C:\\Users\\AEL04'

In [47]:
df3 = pd.read_csv('example.csv')

In [48]:
pd.

Unnamed: 0,a,b,c,d
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15


In [52]:
df3.to_csv('example3.csv',index=False)

In [53]:
df4 = pd.read_csv('example3.csv')

In [54]:
df4

Unnamed: 0,a,b,c,d
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15


#### Excel Input

In [55]:
df = pd.read_excel('Excel_Sample.xlsx',sheet_name='Sheet1')

In [56]:
df

Unnamed: 0,a,b,c,d
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15


In [57]:
df.to_excel('Excel_Sample2.xlsx',sheet_name='Sheet1',index=False)

In [59]:
pd.read_csv('population_india_census2011.csv',encoding='unicode_escape')

Unnamed: 0,Sno,Region,State / Union Territory,Population,Rural population,Urban population,Area,Gender Ratio
0,1,North,Uttar Pradesh,199812341,155317278,44495063,"240,928 km2",912
1,2,West,Maharashtra,112374333,61556074,50818259,"307,713 km2",929
2,3,North,Bihar,104099452,92341436,11758016,"94,163 km2",918
3,4,East,West Bengal,91276115,62183113,29093002,"88,752 km2",953
4,5,Cntral,Madhya Pradesh,72626809,52557404,20069405,"308,245 km2",931
5,6,South,Tamil Nadu,72147030,37229590,34917440,"130,058 km2",996
6,7,North,Rajasthan,68548437,51500352,17048085,"342,239 km2",928
7,8,South,Karnataka,61095297,37469335,23625962,"191,791 km2",973
8,9,West,Gujarat,60439692,34694609,25745083,"196,024 km2",919
9,10,South,Andhra Pradesh,49577103,34966693,14610410,"162,968 km2",993


In [60]:
df = pd.read_csv('https://raw.githubusercontent.com/ishant707/Covid19/master/covid_19_world.csv')

In [64]:
df.head(10)

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered
0,1,01/22/2020,Anhui,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
1,2,01/22/2020,Beijing,Mainland China,1/22/2020 17:00,14.0,0.0,0.0
2,3,01/22/2020,Chongqing,Mainland China,1/22/2020 17:00,6.0,0.0,0.0
3,4,01/22/2020,Fujian,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
4,5,01/22/2020,Gansu,Mainland China,1/22/2020 17:00,0.0,0.0,0.0
5,6,01/22/2020,Guangdong,Mainland China,1/22/2020 17:00,26.0,0.0,0.0
6,7,01/22/2020,Guangxi,Mainland China,1/22/2020 17:00,2.0,0.0,0.0
7,8,01/22/2020,Guizhou,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
8,9,01/22/2020,Hainan,Mainland China,1/22/2020 17:00,4.0,0.0,0.0
9,10,01/22/2020,Hebei,Mainland China,1/22/2020 17:00,1.0,0.0,0.0


In [62]:
df.shape

(48095, 8)

In [63]:
df.tail()

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered
48090,48091,06/22/2020,Zacatecas,Mexico,2020-06-23 04:33:22,706.0,75.0,442.0
48091,48092,06/22/2020,Zakarpattia Oblast,Ukraine,2020-06-23 04:33:22,2188.0,57.0,842.0
48092,48093,06/22/2020,Zaporizhia Oblast,Ukraine,2020-06-23 04:33:22,555.0,16.0,391.0
48093,48094,06/22/2020,Zhejiang,Mainland China,2020-06-23 04:33:22,1269.0,1.0,1267.0
48094,48095,06/22/2020,Zhytomyr Oblast,Ukraine,2020-06-23 04:33:22,1270.0,23.0,633.0
