In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Dataframe is basically multiple series that share the same index

In [3]:
# creating some dataframe with random normal data

data = np.random.randn(5, 4)
cols = ['A', 'B', 'C', 'D']

# creating dataframe (df) by seperatly passing data and column names
# we can also create dataframe using python dictionary
df = pd.DataFrame(data = data, columns=cols)

In [4]:
# cols A, B, C, D are Series object that share the same index
df

Unnamed: 0,A,B,C,D
0,0.770391,1.424868,-0.577634,0.886262
1,-0.499311,0.503849,-0.234692,0.006987
2,-0.428533,-0.674532,-0.167588,0.606704
3,-1.281685,-1.352079,0.772554,0.071031
4,1.875226,0.855103,-0.252178,0.842963


In [5]:
# Manually assigning index

index = 'U V W X Y'.split()

df.index = index

In [6]:
df

Unnamed: 0,A,B,C,D
U,0.770391,1.424868,-0.577634,0.886262
V,-0.499311,0.503849,-0.234692,0.006987
W,-0.428533,-0.674532,-0.167588,0.606704
X,-1.281685,-1.352079,0.772554,0.071031
Y,1.875226,0.855103,-0.252178,0.842963


In [7]:
# Now accessing a particular column
# it is just like python dictionary
df['A']

U    0.770391
V   -0.499311
W   -0.428533
X   -1.281685
Y    1.875226
Name: A, dtype: float64

In [8]:
# Now accessing multiple columns

df[ ['A', 'B', 'D'] ]

Unnamed: 0,A,B,D
U,0.770391,1.424868,0.886262
V,-0.499311,0.503849,0.006987
W,-0.428533,-0.674532,0.606704
X,-1.281685,-1.352079,0.071031
Y,1.875226,0.855103,0.842963


In [9]:
# Creating new column 

# it will create new column called E and will do elementwise sum of cols A and B 
df['E'] = df['A'] + df['B']

In [10]:
df

Unnamed: 0,A,B,C,D,E
U,0.770391,1.424868,-0.577634,0.886262,2.195259
V,-0.499311,0.503849,-0.234692,0.006987,0.004539
W,-0.428533,-0.674532,-0.167588,0.606704,-1.103066
X,-1.281685,-1.352079,0.772554,0.071031,-2.633765
Y,1.875226,0.855103,-0.252178,0.842963,2.730329


In [11]:
# Now deleting a column

# by default it is set to delete rows ( i.e axis = 0 ), if you want to delete column set (axis = 1)
# also this will return new dataframe with E column removed, inorder to do it in same dataframe set inplace=True
df.drop('E', axis = 1, inplace = True)

In [12]:
df

Unnamed: 0,A,B,C,D
U,0.770391,1.424868,-0.577634,0.886262
V,-0.499311,0.503849,-0.234692,0.006987
W,-0.428533,-0.674532,-0.167588,0.606704
X,-1.281685,-1.352079,0.772554,0.071031
Y,1.875226,0.855103,-0.252178,0.842963


In [13]:
# For deleting particular row 
# say Xth row

# by default it is set to axis = 0
df.drop('X', inplace = True)

In [14]:
df

Unnamed: 0,A,B,C,D
U,0.770391,1.424868,-0.577634,0.886262
V,-0.499311,0.503849,-0.234692,0.006987
W,-0.428533,-0.674532,-0.167588,0.606704
Y,1.875226,0.855103,-0.252178,0.842963


In [15]:
# Now we know how to access col, add col, drop col and drop row

# Now accessing particular row
df.loc['U']

A    0.770391
B    1.424868
C   -0.577634
D    0.886262
Name: U, dtype: float64

In [16]:
# accessing multiple rows
df.loc[ ['U', 'Y'] ]

Unnamed: 0,A,B,C,D
U,0.770391,1.424868,-0.577634,0.886262
Y,1.875226,0.855103,-0.252178,0.842963


In [17]:
# Now instead of accessing row by index name, we can access by integer index

# iloc means integer based location
# it will return 0th and 2nd row
df.iloc[ [0, 2] ]

Unnamed: 0,A,B,C,D
U,0.770391,1.424868,-0.577634,0.886262
W,-0.428533,-0.674532,-0.167588,0.606704


In [18]:
# Now accessing subset of data
# selecting A, B column and U, W row

# This will select first U, W row and this will return dataframe and then finally selecting A and B column
df.loc[ ['U', 'W'] ][ ['A', 'B'] ]

Unnamed: 0,A,B
U,0.770391,1.424868
W,-0.428533,-0.674532


In [19]:
# Another way of doing this

# this is numpy like indexing  first rows, columns
df.loc[ ['U', 'W'], ['A', 'B'] ]

Unnamed: 0,A,B
U,0.770391,1.424868
W,-0.428533,-0.674532
