# Conditional selection 

In [1]:
import numpy as np
import pandas as pd

In [2]:
np.random.seed(101)

In [3]:
df1 = pd.DataFrame(np.random.randn(4,4),['A','B','C','D'],['W','X','Y','Z'])

In [4]:
df1

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057


In [5]:
df1 > 0

Unnamed: 0,W,X,Y,Z
A,True,True,True,True
B,True,False,False,True
C,False,True,True,False
D,True,False,False,True


In [6]:
booldf = df1 > 0

In [7]:
booldf

Unnamed: 0,W,X,Y,Z
A,True,True,True,True
B,True,False,False,True
C,False,True,True,False
D,True,False,False,True


In [8]:
df1[booldf]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,,,0.605965
C,,0.740122,0.528813,
D,0.188695,,,0.955057


In [9]:
df1[df1 > 0]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,,,0.605965
C,,0.740122,0.528813,
D,0.188695,,,0.955057


In [10]:
# conditional statement operation on a specific column 

In [11]:
df1['W']

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
Name: W, dtype: float64

In [12]:
df1['W']>0

A     True
B     True
C    False
D     True
Name: W, dtype: bool

In [13]:
df1[df1['W']>0]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057


In [44]:
df1

Unnamed: 0_level_0,W,X,Y,Z
states,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AO,2.70685,0.628133,0.907969,0.503826
CK,0.651118,-0.319318,-0.848077,0.605965
BE,-2.018168,0.740122,0.528813,-0.589001
FR,0.188695,-0.758872,-0.933237,0.955057


In [15]:
df1[df1['Z']<0]

Unnamed: 0,W,X,Y,Z
C,-2.018168,0.740122,0.528813,-0.589001


In [16]:
result = df1[df1['W']>0]
result

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057


In [17]:
result['X']

A    0.628133
B   -0.319318
D   -0.758872
Name: X, dtype: float64

In [18]:
# but this can be done in 1 step

In [19]:
df1[df1['W']>0]['X']

A    0.628133
B   -0.319318
D   -0.758872
Name: X, dtype: float64

In [20]:
boolser = df1['W']>0
result = df1[boolser]
mycol = ['X','Y']

result[mycol]

Unnamed: 0,X,Y
A,0.628133,0.907969
B,-0.319318,-0.848077
D,-0.758872,-0.933237


In [21]:
# for multiple conditions 

In [22]:
df1[(df1['W']>0) & (df1['X']>0.5)]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826


In [23]:
# reseting our dataframe index

In [24]:
df1.reset_index()


Unnamed: 0,index,W,X,Y,Z
0,A,2.70685,0.628133,0.907969,0.503826
1,B,0.651118,-0.319318,-0.848077,0.605965
2,C,-2.018168,0.740122,0.528813,-0.589001
3,D,0.188695,-0.758872,-0.933237,0.955057


In [25]:
df1

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057


In [26]:
newid = 'AO CK BE FR'.split()
newid

['AO', 'CK', 'BE', 'FR']

In [27]:
df1['states'] = newid
df1

Unnamed: 0,W,X,Y,Z,states
A,2.70685,0.628133,0.907969,0.503826,AO
B,0.651118,-0.319318,-0.848077,0.605965,CK
C,-2.018168,0.740122,0.528813,-0.589001,BE
D,0.188695,-0.758872,-0.933237,0.955057,FR


In [28]:
df1.set_index('states',inplace = True)
df1

Unnamed: 0_level_0,W,X,Y,Z
states,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AO,2.70685,0.628133,0.907969,0.503826
CK,0.651118,-0.319318,-0.848077,0.605965
BE,-2.018168,0.740122,0.528813,-0.589001
FR,0.188695,-0.758872,-0.933237,0.955057


# Multiindex And Higher Key

In [29]:
# Index Levels
outside = ['Sudarshan','Sudarshan','Sudarshan','sourabh','sourabh','sourabh','sourabh']
inside = [0,1,2,1,2,3,4]
hier_index =list(zip(outside,inside))
hier_index = pd.MultiIndex.from_tuples(hier_index)


In [30]:
outside

['Sudarshan',
 'Sudarshan',
 'Sudarshan',
 'sourabh',
 'sourabh',
 'sourabh',
 'sourabh']

In [31]:
inside

[0, 1, 2, 1, 2, 3, 4]

In [32]:
hier_index

MultiIndex([('Sudarshan', 0),
            ('Sudarshan', 1),
            ('Sudarshan', 2),
            (  'sourabh', 1),
            (  'sourabh', 2),
            (  'sourabh', 3),
            (  'sourabh', 4)],
           )

In [33]:
df = pd.DataFrame(np.random.randn(7,3),hier_index,['Ak','YM','PH'])
df

Unnamed: 0,Unnamed: 1,Ak,YM,PH
Sudarshan,0,0.190794,1.978757,2.605967
Sudarshan,1,0.683509,0.302665,1.693723
Sudarshan,2,-1.706086,-1.159119,-0.134841
sourabh,1,0.390528,0.166905,0.184502
sourabh,2,0.807706,0.07296,0.638787
sourabh,3,0.329646,-0.497104,-0.75407
sourabh,4,-0.943406,0.484752,-0.116773


In [34]:
# Now we will name outside index and inside index

In [46]:
df.loc['Sudarshan']


Unnamed: 0_level_0,Ak,YM,PH
num,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.190794,1.978757,2.605967
1,0.683509,0.302665,1.693723
2,-1.706086,-1.159119,-0.134841


In [36]:
df.loc['sourabh'].loc[2]

Ak    0.807706
YM    0.072960
PH    0.638787
Name: 2, dtype: float64

In [37]:
df.index.names = ['Employee','num']
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Ak,YM,PH
Employee,num,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Sudarshan,0,0.190794,1.978757,2.605967
Sudarshan,1,0.683509,0.302665,1.693723
Sudarshan,2,-1.706086,-1.159119,-0.134841
sourabh,1,0.390528,0.166905,0.184502
sourabh,2,0.807706,0.07296,0.638787
sourabh,3,0.329646,-0.497104,-0.75407
sourabh,4,-0.943406,0.484752,-0.116773


In [38]:
# retriving a specific value

In [39]:
df.loc['sourabh'].loc[2,'PH']

0.638787013499328

In [40]:
# cross section 

In [47]:
df.xs('Sudarshan')

Unnamed: 0_level_0,Ak,YM,PH
num,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.190794,1.978757,2.605967
1,0.683509,0.302665,1.693723
2,-1.706086,-1.159119,-0.134841


In [49]:
sundar=df.xs(1,level = 'num')
sundar

Unnamed: 0_level_0,Ak,YM,PH
Employee,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Sudarshan,0.683509,0.302665,1.693723
sourabh,0.390528,0.166905,0.184502


In [43]:
sundar

Unnamed: 0_level_0,Ak,YM,PH
Employee,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Sudarshan,0.683509,0.302665,1.693723
sourabh,0.390528,0.166905,0.184502
