In [70]:
import numpy as np
import pandas as pd
from numpy.random import randn
np.random.seed(101)

In [71]:
df = pd.DataFrame(randn(5,4), ['A', 'B', 'C', 'D', 'E'], ['W', 'X', 'Y', 'Z'])
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


# Conditional Selection

In [72]:
df > 0        # Check value > 0 against whole dataframe

Unnamed: 0,W,X,Y,Z
A,True,True,True,True
B,True,False,False,True
C,False,True,True,False
D,True,False,False,True
E,True,True,True,True


In [73]:
# We can save that.
booldf = df > 0

In [74]:
booldf

Unnamed: 0,W,X,Y,Z
A,True,True,True,True
B,True,False,False,True
C,False,True,True,False
D,True,False,False,True
E,True,True,True,True


In [75]:
# Now pass this to original df
df[booldf]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,,,0.605965
C,,0.740122,0.528813,
D,0.188695,,,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [76]:
# We will get NaN, where booldf is False
# Now this can be done in a single line
df[df > 0]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,,,0.605965
C,,0.740122,0.528813,
D,0.188695,,,0.955057
E,0.190794,1.978757,2.605967,0.683509


## Conditional Selection on rows and columns

We dont do selection on whole dataframe, that is not a commom approach. But we do it extensivly on rows and columns

In [77]:
df['X'] > 0

A     True
B    False
C     True
D    False
E     True
Name: X, dtype: bool

In [78]:
# Save this series
boolser = df['X'] > 0
boolser

A     True
B    False
C     True
D    False
E     True
Name: X, dtype: bool

In [79]:
# Now pass it and save the resultent dataframe
result = df[boolser]
result

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
C,-2.018168,0.740122,0.528813,-0.589001
E,0.190794,1.978757,2.605967,0.683509


In [80]:
# Now the whole row is gone where X > 0 = False
# Now because result is also a dataframe, we can perform operations on it.
result[['X', 'Y']]

Unnamed: 0,X,Y
A,0.628133,0.907969
C,0.740122,0.528813
E,1.978757,2.605967


### We can do it in a single line also 

In [81]:
df[df['X'] > 0][['X', 'Y']]

# Here boolser = df['X'] > 0
# result = df[df['X'] > 0]
# operation on result = [['X', 'Y']]

Unnamed: 0,X,Y
A,0.628133,0.907969
C,0.740122,0.528813
E,1.978757,2.605967


# DataFrame is still unchanged

In [82]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


We have asked for images of dataframe with some conditions on it, but not changed any data according to the condition,
also new dataframe was saved as result where some condition was applied.

# Multiple Conditional Statements. and, or problem with DataFrame

In [83]:
df['Y'] > 0

A     True
B    False
C     True
D    False
E     True
Name: Y, dtype: bool

In [84]:
df[df['Y'] > 0]         # DF where all Y values > 0

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
C,-2.018168,0.740122,0.528813,-0.589001
E,0.190794,1.978757,2.605967,0.683509


In [85]:
df['Z'] > 0

A     True
B     True
C    False
D     True
E     True
Name: Z, dtype: bool

In [86]:
df[df['Z'] > 0]      # DF where all Z vales > 0

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


#### But we need a df where all Y and Z values > 0


In [87]:
# df[ (df['Y']>0) and (df['Z']>0) ]
# ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
# It means and operator cannot compare series of boolean df['Y']>0 with another series of boolean df['Z']>0
# It can only compare True and False, True and True etc. Single booleans only.

To solve this problem we should use & inplace of 'and', and | inplace of 'or'

In [88]:
df[ (df['Y']>0) & (df['Z']>0) ]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
E,0.190794,1.978757,2.605967,0.683509


In [89]:
df[ (df['Y']>0) | (df['Z']>0) ]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


# Index Methods

## reset_index()

In [90]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [91]:
# We can reset current index to default index.
df.reset_index()

Unnamed: 0,index,W,X,Y,Z
0,A,2.70685,0.628133,0.907969,0.503826
1,B,0.651118,-0.319318,-0.848077,0.605965
2,C,-2.018168,0.740122,0.528813,-0.589001
3,D,0.188695,-0.758872,-0.933237,0.955057
4,E,0.190794,1.978757,2.605967,0.683509


### But these changes are not permanent we have to set inplace = True

In [92]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


## set_index()

In [93]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [94]:
# If we have column which can be used as index for df, we can set that column as our new index replacing the old one.
newcol = 'p q r s t'.split()
newcol

['p', 'q', 'r', 's', 't']

In [95]:
df['Nindex'] = newcol
df

Unnamed: 0,W,X,Y,Z,Nindex
A,2.70685,0.628133,0.907969,0.503826,p
B,0.651118,-0.319318,-0.848077,0.605965,q
C,-2.018168,0.740122,0.528813,-0.589001,r
D,0.188695,-0.758872,-0.933237,0.955057,s
E,0.190794,1.978757,2.605967,0.683509,t


In [96]:
df.set_index('Nindex', inplace = True)

### Now the changes are permanent.

In [97]:
df

Unnamed: 0_level_0,W,X,Y,Z
Nindex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
p,2.70685,0.628133,0.907969,0.503826
q,0.651118,-0.319318,-0.848077,0.605965
r,-2.018168,0.740122,0.528813,-0.589001
s,0.188695,-0.758872,-0.933237,0.955057
t,0.190794,1.978757,2.605967,0.683509


In [98]:
df.loc['p']

W    2.706850
X    0.628133
Y    0.907969
Z    0.503826
Name: p, dtype: float64

In [99]:
df.loc[['r', 't']]

Unnamed: 0_level_0,W,X,Y,Z
Nindex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
r,-2.018168,0.740122,0.528813,-0.589001
t,0.190794,1.978757,2.605967,0.683509
