In [2]:
import numpy as np
import pandas as pd

In [3]:
from numpy.random import randn

In [4]:
np.random.seed(101)

In [5]:
df = pd.DataFrame(randn(5,4),['A','B','C','D','E'],['W','X','Y','Z'])

In [6]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [7]:
# All of columns are a panda series, which share a common index

In [8]:
df['W']

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

In [9]:
type(df['W'])

pandas.core.series.Series

In [10]:
type(df)

pandas.core.frame.DataFrame

In [11]:
df[['W','Z']]

Unnamed: 0,W,Z
A,2.70685,0.503826
B,0.651118,0.605965
C,-2.018168,-0.589001
D,0.188695,0.955057
E,0.190794,0.683509


In [12]:
# returns dataframe for multiple columns

In [13]:
df['new'] = df['W'] + df['X']

In [14]:
df

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,3.334983
B,0.651118,-0.319318,-0.848077,0.605965,0.3318
C,-2.018168,0.740122,0.528813,-0.589001,-1.278046
D,0.188695,-0.758872,-0.933237,0.955057,-0.570177
E,0.190794,1.978757,2.605967,0.683509,2.169552


In [15]:
df.drop('new', axis = 1, inplace = True)
# axis = 0 for indexes, 1 for columns
# always we have to set inplace = true for saving

In [16]:
df.drop('E', axis = 0)
# now if we don't write axis it goes for row

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057


In [17]:
df.shape

(5, 4)

In [18]:
#Row
df.loc['C']

W   -2.018168
X    0.740122
Y    0.528813
Z   -0.589001
Name: C, dtype: float64

In [19]:
df.iloc[2]

W   -2.018168
X    0.740122
Y    0.528813
Z   -0.589001
Name: C, dtype: float64

In [20]:
df.loc['B','Y']

-0.8480769834036315

In [21]:
df.loc[['A','B'],['W','Y']]

Unnamed: 0,W,Y
A,2.70685,0.907969
B,0.651118,-0.848077


In [22]:
df > 0

Unnamed: 0,W,X,Y,Z
A,True,True,True,True
B,True,False,False,True
C,False,True,True,False
D,True,False,False,True
E,True,True,True,True


In [23]:
df[df > 0]
# instead of nulls NaN

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,,,0.605965
C,,0.740122,0.528813,
D,0.188695,,,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [24]:
df[df['W']>0]
# only the rows that this happen to be true

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [25]:
df[df['Z']<0]

Unnamed: 0,W,X,Y,Z
C,-2.018168,0.740122,0.528813,-0.589001


In [33]:
resultDf = df[df['W']>0][['Y','X']]
# not resultDf = df[df['W']>0]['Y','X']

In [34]:
resultDf

Unnamed: 0,Y,X
A,0.907969,0.628133
B,-0.848077,-0.319318
D,-0.933237,-0.758872
E,2.605967,1.978757


In [35]:
boolser = df['W']>0
result = df[boolser]
myCols = ['Y','X']
result[myCols]

Unnamed: 0,Y,X
A,0.907969,0.628133
B,-0.848077,-0.319318
D,-0.933237,-0.758872
E,2.605967,1.978757


In [36]:
# 'and' and 'or' operator is for only checking simple booleans: True and False
# here we use '&' and '|'
df[(df['W']>0)&(df['Y']>1)]

Unnamed: 0,W,X,Y,Z
E,0.190794,1.978757,2.605967,0.683509


In [37]:
df[(df['W']>0)|(df['Y']>1)]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [38]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [40]:
df.reset_index()
# we should inplace it for save
# df.reset_index(inplace = True)

Unnamed: 0,index,W,X,Y,Z
0,A,2.70685,0.628133,0.907969,0.503826
1,B,0.651118,-0.319318,-0.848077,0.605965
2,C,-2.018168,0.740122,0.528813,-0.589001
3,D,0.188695,-0.758872,-0.933237,0.955057
4,E,0.190794,1.978757,2.605967,0.683509


In [41]:
newWind = "CA MI LA MT ON".split()
df['States'] = newWind

In [42]:
df

Unnamed: 0,W,X,Y,Z,States
A,2.70685,0.628133,0.907969,0.503826,CA
B,0.651118,-0.319318,-0.848077,0.605965,MI
C,-2.018168,0.740122,0.528813,-0.589001,LA
D,0.188695,-0.758872,-0.933237,0.955057,MT
E,0.190794,1.978757,2.605967,0.683509,ON


In [43]:
df.set_index('States')
# not inplaced

Unnamed: 0_level_0,W,X,Y,Z
States,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,2.70685,0.628133,0.907969,0.503826
MI,0.651118,-0.319318,-0.848077,0.605965
LA,-2.018168,0.740122,0.528813,-0.589001
MT,0.188695,-0.758872,-0.933237,0.955057
ON,0.190794,1.978757,2.605967,0.683509


In [44]:
df

Unnamed: 0,W,X,Y,Z,States
A,2.70685,0.628133,0.907969,0.503826,CA
B,0.651118,-0.319318,-0.848077,0.605965,MI
C,-2.018168,0.740122,0.528813,-0.589001,LA
D,0.188695,-0.758872,-0.933237,0.955057,MT
E,0.190794,1.978757,2.605967,0.683509,ON
