# Pandas DataFrames

In [132]:
import numpy as np
import pandas as pd

In [133]:
from numpy.random import randn

In [134]:
np.random.seed(101)

## Creating dataframes

In [135]:
df = pd.DataFrame(randn(5,4), ['A', 'B', 'C', 'D', 'E'],['W', 'X','Y','Z'])

In [136]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


## Adding columns

In [137]:
df['new'] = randn(5)

In [138]:
df

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,0.302665
B,0.651118,-0.319318,-0.848077,0.605965,1.693723
C,-2.018168,0.740122,0.528813,-0.589001,-1.706086
D,0.188695,-0.758872,-0.933237,0.955057,-1.159119
E,0.190794,1.978757,2.605967,0.683509,-0.134841


## Acessing elements in dataframes

In [140]:
df['W']

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

In [141]:
df[['W','Z']]

Unnamed: 0,W,Z
A,2.70685,0.503826
B,0.651118,0.605965
C,-2.018168,-0.589001
D,0.188695,0.955057
E,0.190794,0.683509


In [142]:
df

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,0.302665
B,0.651118,-0.319318,-0.848077,0.605965,1.693723
C,-2.018168,0.740122,0.528813,-0.589001,-1.706086
D,0.188695,-0.758872,-0.933237,0.955057,-1.159119
E,0.190794,1.978757,2.605967,0.683509,-0.134841


In [143]:
df.loc[:,'X']

A    0.628133
B   -0.319318
C    0.740122
D   -0.758872
E    1.978757
Name: X, dtype: float64

In [144]:
df.loc['A']

W      2.706850
X      0.628133
Y      0.907969
Z      0.503826
new    0.302665
Name: A, dtype: float64

In [145]:
df.loc['B','Y']

-0.8480769834036315

In [146]:
df.iloc[0,:]

W      2.706850
X      0.628133
Y      0.907969
Z      0.503826
new    0.302665
Name: A, dtype: float64

In [147]:
df.loc['B':,:'Z']

Unnamed: 0,W,X,Y,Z
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [148]:
df.loc[['A','B'],['X','Y']]

Unnamed: 0,X,Y
A,0.628133,0.907969
B,-0.319318,-0.848077


## Droping a column

By default the axis is 0 

In [149]:
df.drop('new', axis=1)

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


df.drop doesn't do it in place. 

In [150]:
df

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,0.302665
B,0.651118,-0.319318,-0.848077,0.605965,1.693723
C,-2.018168,0.740122,0.528813,-0.589001,-1.706086
D,0.188695,-0.758872,-0.933237,0.955057,-1.159119
E,0.190794,1.978757,2.605967,0.683509,-0.134841


To do it in place, do this.

In [151]:
df.drop('new', axis=1, inplace=True)

In [152]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


## Droping a row

In [153]:
df.drop('E')

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057


This is also not in place

In [154]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [155]:
df.drop('E', inplace=True)

In [156]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057


In [157]:
df.shape

(4, 4)

## Operations on dataframe

In [162]:
booldf = df>0

In [163]:
booldf

Unnamed: 0,W,X,Y,Z
A,True,True,True,True
B,True,False,False,True
C,False,True,True,False
D,True,False,False,True


In [164]:
df[df>0]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,,,0.605965
C,,0.740122,0.528813,
D,0.188695,,,0.955057


In [174]:
df["W"]>0

A     True
B     True
C    False
D     True
Name: W, dtype: bool

In [176]:
df[df['W']>0]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057


### And

In [180]:
df[(df['W']>0.5) & (df['X']<0)]

Unnamed: 0,W,X,Y,Z
B,0.651118,-0.319318,-0.848077,0.605965


### OR

In [181]:
df[(df['W']>0) | (df['X']<0)]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057


## Reset index

In [182]:
df.reset_index()

Unnamed: 0,index,W,X,Y,Z
0,A,2.70685,0.628133,0.907969,0.503826
1,B,0.651118,-0.319318,-0.848077,0.605965
2,C,-2.018168,0.740122,0.528813,-0.589001
3,D,0.188695,-0.758872,-0.933237,0.955057


In [187]:
newind = ['CA', 'NY', 'WY', 'OR']
df['newind'] = newind
df

Unnamed: 0,W,X,Y,Z,newind
A,2.70685,0.628133,0.907969,0.503826,CA
B,0.651118,-0.319318,-0.848077,0.605965,NY
C,-2.018168,0.740122,0.528813,-0.589001,WY
D,0.188695,-0.758872,-0.933237,0.955057,OR


In [190]:
df.set_index('newind')

Unnamed: 0_level_0,W,X,Y,Z
newind,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,2.70685,0.628133,0.907969,0.503826
NY,0.651118,-0.319318,-0.848077,0.605965
WY,-2.018168,0.740122,0.528813,-0.589001
OR,0.188695,-0.758872,-0.933237,0.955057
