In [4]:
import pandas as pd
import numpy as np

In [8]:
np.random.seed(seed=101)
df = pd.DataFrame(data=np.random.randn(5,4),
                  index=['A', 'B', 'C', 'D', 'E'],  
                  columns=['Col1', 'Col2', 'Col3', 'Col4']
                  )
df

Unnamed: 0,Col1,Col2,Col3,Col4
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


## Rows or Rows & Columns can be selected by label using '.loc[]'

In [7]:
# Single label. Note this returns the row as a Series.

df.loc['A']

Col1    2.706850
Col2    0.628133
Col3    0.907969
Col4    0.503826
Name: A, dtype: float64

In [12]:
# Single Row & Column returns specific cell content

df.loc['A']['Col2']

0.6281327087844596

In [55]:
# Slicing DataFrame || Grabbing multiple Rows

df.loc['A':'D']

Unnamed: 0,Col1,Col2,Col3,Col4
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057


In [240]:
# Returns list of labels in DataFrame

df.loc[['A','C']]

Unnamed: 0,Col1,Col2,Col3,Col4
A,2.70685,0.628133,0.907969,0.503826
C,-2.018168,0.740122,0.528813,-0.589001


In [242]:
df.loc['A':,['Col3']]

Unnamed: 0,Col3
A,0.907969
B,-0.848077
C,0.528813
D,-0.933237
E,2.605967


In [53]:
# Returns cells element of specific rows and columns

df.loc[['A','C'], 'Col3']

A    0.907969
C    0.528813
Name: Col3, dtype: float64

In [59]:
# Range of specified rows & columns. Both Rows & Columns should be in '[]' separately to get DataFrame output

df.loc[['A','E'],['Col1','Col3']]

Unnamed: 0,Col1,Col3
A,2.70685,0.907969
E,0.190794,2.605967


In [73]:
# As Row is not '[]', output becomes Series

df.loc['A',['Col2','Col4']]

Col2    0.628133
Col4    0.503826
Name: A, dtype: float64

In [41]:
df

Unnamed: 0,Col1,Col2,Col3,Col4
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [88]:
# Column level condition

df.loc[df['Col3'] > .6]

Unnamed: 0,Col1,Col2,Col3,Col4
A,2.70685,0.628133,0.907969,0.503826
E,0.190794,1.978757,2.605967,0.683509


In [105]:
# Column level conditional comparing

df.loc[df['Col1'] >= df['Col3']]

Unnamed: 0,Col1,Col2,Col3,Col4
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057


In [113]:
df.loc[(df['Col1'] > 1) | (df['Col4'] < 0)]

Unnamed: 0,Col1,Col2,Col3,Col4
A,2.70685,0.628133,0.907969,0.503826
C,-2.018168,0.740122,0.528813,-0.589001


## .iloc[] - Purely integer-location based indexing for selection by position

In [117]:
df

Unnamed: 0,Col1,Col2,Col3,Col4
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [133]:
# Index(or Row) based selection as series

df.iloc[4]

Col1    0.190794
Col2    1.978757
Col3    2.605967
Col4    0.683509
Name: E, dtype: float64

In [137]:
# Index(or Row) based selection as DataFrame

df.iloc[[4]]

Unnamed: 0,Col1,Col2,Col3,Col4
E,0.190794,1.978757,2.605967,0.683509


In [157]:
# Like, df.loc['B','Col4'] = df.loc[row,column]

df.iloc[1,3]

0.6059653494949336

In [168]:
# Like, df.loc[['E'],['Col3']]

df.iloc[[4],[2]]

Unnamed: 0,Col3
E,2.605967


In [183]:
# Similar to df.loc[:'C', :'Col3']
df.iloc[:3,:3]

Unnamed: 0,Col1,Col2,Col3
A,2.70685,0.628133,0.907969
B,0.651118,-0.319318,-0.848077
C,-2.018168,0.740122,0.528813


## .query() -- DataFrame.query(expr, *, inplace=False, **kwargs)

In [255]:
# Query should be within SINGLE or DOUBLE QUOTE
# Similar to df[df['Col4'] > 0]

df.query('Col4 > 0')

Unnamed: 0,Col1,Col2,Col3,Col4
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [200]:
df.query(" Col4 > 0 and Col1 > 0.5 ")

Unnamed: 0,Col1,Col2,Col3,Col4
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965


In [251]:
# AND has precedence over OR -- first it queried Col2 < 0 and Col1 > 0.5, then Col2 > 0.8

df.query('Col2 > 0.8 or Col2 < 0 and Col1 > 0.5')

Unnamed: 0,Col1,Col2,Col3,Col4
B,0.651118,-0.319318,-0.848077,0.605965
E,0.190794,1.978757,2.605967,0.683509


In [249]:
# Bracket has precedence over AND --  first it queried Col2 > 0.8 or Col2 < 0, then the result was queried with and Col1 > 0

df.query('(Col2 > 0.8 or Col2 < 0) and Col1 > 0.5')

Unnamed: 0,Col1,Col2,Col3,Col4
B,0.651118,-0.319318,-0.848077,0.605965


In [253]:
# Practice - Select values where Col3 is greater than 0.4 or Col4 is greater than 0.7. 
# Then the result should be queried with Col1 smaller than 0.2

df.query('(Col3 > .4 or Col4> .7) and Col1 < .2')

Unnamed: 0,Col1,Col2,Col3,Col4
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509
