In [1]:
import pandas as pd

# Lets create a Dataframe First

In [2]:
names = ['L. Messi', 'Cristiano Ronaldo', 'Neymar Jr', 'J. Oblak', 'E. Hazard']
age = [32, 34, 27, 26, 28]
height_cm = [170, 187, 175, 188, 175]
nationality = ['Argentina', 'Portugal', 'Brazil', 'Slovenia', 'Belgium']
club = ['Paris Saint-Germain', 'Manchester United', 'Paris Saint-Germain', 'Atlético Madrid', 'Real Madrid']
#dataframe with index names
df = pd.DataFrame(index=names, data={'age':age, 'height_cm':height_cm, 'nationality':nationality, 'club':club})
#dataframe without index names
df1 = pd.DataFrame(data={'names':names,'age':age, 'height_cm':height_cm, 'nationality':nationality, 'club':club})


In [3]:
df

Unnamed: 0,age,height_cm,nationality,club
L. Messi,32,170,Argentina,Paris Saint-Germain
Cristiano Ronaldo,34,187,Portugal,Manchester United
Neymar Jr,27,175,Brazil,Paris Saint-Germain
J. Oblak,26,188,Slovenia,Atlético Madrid
E. Hazard,28,175,Belgium,Real Madrid


# Single Value Selection

In [4]:
#single value selection using loc
df.loc['Neymar Jr','height_cm']

175

In [5]:
#single value selection using iloc
df.iloc[2,1]

175

# Single row selection 

In [6]:
#returns dataframe
df.loc[['Cristiano Ronaldo'],:] 

Unnamed: 0,age,height_cm,nationality,club
Cristiano Ronaldo,34,187,Portugal,Manchester United


In [7]:
#returns object
df.loc['Cristiano Ronaldo',:]

age                           34
height_cm                    187
nationality             Portugal
club           Manchester United
Name: Cristiano Ronaldo, dtype: object

In [8]:
df.iloc[[1],:]

Unnamed: 0,age,height_cm,nationality,club
Cristiano Ronaldo,34,187,Portugal,Manchester United


In [9]:
df.iloc[1,:]

age                           34
height_cm                    187
nationality             Portugal
club           Manchester United
Name: Cristiano Ronaldo, dtype: object

# single column selection

In [10]:
#returns dataframe
df.loc[:,['age']]

Unnamed: 0,age
L. Messi,32
Cristiano Ronaldo,34
Neymar Jr,27
J. Oblak,26
E. Hazard,28


In [11]:
#returns int64
df.loc[:,'age']

L. Messi             32
Cristiano Ronaldo    34
Neymar Jr            27
J. Oblak             26
E. Hazard            28
Name: age, dtype: int64

In [12]:
df.iloc[:,[0]]

Unnamed: 0,age
L. Messi,32
Cristiano Ronaldo,34
Neymar Jr,27
J. Oblak,26
E. Hazard,28


In [13]:
df.iloc[:,0]

L. Messi             32
Cristiano Ronaldo    34
Neymar Jr            27
J. Oblak             26
E. Hazard            28
Name: age, dtype: int64

# Multiple rows and columns selection

In [14]:
#ex1
df.loc['L. Messi':'E. Hazard',['age','height_cm']]

Unnamed: 0,age,height_cm
L. Messi,32,170
Cristiano Ronaldo,34,187
Neymar Jr,27,175
J. Oblak,26,188
E. Hazard,28,175


In [15]:
#ex2
df.loc[['Neymar Jr','E. Hazard'],['nationality','club']]

Unnamed: 0,nationality,club
Neymar Jr,Brazil,Paris Saint-Germain
E. Hazard,Belgium,Real Madrid


In [16]:
#ex1
df.iloc[0:5,0:2]

Unnamed: 0,age,height_cm
L. Messi,32,170
Cristiano Ronaldo,34,187
Neymar Jr,27,175
J. Oblak,26,188
E. Hazard,28,175


In [17]:
#ex2
df.iloc[[2,4],[2,3]]

Unnamed: 0,nationality,club
Neymar Jr,Brazil,Paris Saint-Germain
E. Hazard,Belgium,Real Madrid


In [18]:
df1

Unnamed: 0,names,age,height_cm,nationality,club
0,L. Messi,32,170,Argentina,Paris Saint-Germain
1,Cristiano Ronaldo,34,187,Portugal,Manchester United
2,Neymar Jr,27,175,Brazil,Paris Saint-Germain
3,J. Oblak,26,188,Slovenia,Atlético Madrid
4,E. Hazard,28,175,Belgium,Real Madrid


In [19]:
df1.loc[0:2,'names':'height_cm']

Unnamed: 0,names,age,height_cm
0,L. Messi,32,170
1,Cristiano Ronaldo,34,187
2,Neymar Jr,27,175


In [20]:
df1.iloc[0:3,0:3]

Unnamed: 0,names,age,height_cm
0,L. Messi,32,170
1,Cristiano Ronaldo,34,187
2,Neymar Jr,27,175


# Selecting with conditions

In [21]:
df.loc[df['height_cm']>180,:]

Unnamed: 0,age,height_cm,nationality,club
Cristiano Ronaldo,34,187,Portugal,Manchester United
J. Oblak,26,188,Slovenia,Atlético Madrid


In [22]:
df.iloc[list(df['height_cm']>180),:]

Unnamed: 0,age,height_cm,nationality,club
Cristiano Ronaldo,34,187,Portugal,Manchester United
J. Oblak,26,188,Slovenia,Atlético Madrid


In [23]:
columns = ['age', 'height_cm', 'club']
df.loc[df['height_cm']>180]

Unnamed: 0,age,height_cm,nationality,club
Cristiano Ronaldo,34,187,Portugal,Manchester United
J. Oblak,26,188,Slovenia,Atlético Madrid


# Selecting with multiple conditions

In [24]:
df.loc[(df['height_cm']>170) & (df['club']=='Manchester United'), :]

Unnamed: 0,age,height_cm,nationality,club
Cristiano Ronaldo,34,187,Portugal,Manchester United


# Time Difference

In [25]:
import time
#specify the range of rows to select
rows = range(0,4)
#Time selecting rows using.loc[]
loc_start_time = time.time()
df1.loc[rows]
loc_end_time = time.time()

loc_time = loc_end_time - loc_start_time

print("Time using.loc[] : {} seconds".format(
    loc_time))


Time using.loc[] : 0.0013022422790527344 seconds


In [26]:
import time
#specify the range of rows to select
rows = range(0,4)
#Time selecting rows using.loc[]
iloc_start_time = time.time()
df1.iloc[rows]
iloc_end_time = time.time()

iloc_time = iloc_end_time - iloc_start_time

print("Time using.iloc[] : {} seconds".format(
    iloc_time))

Time using.iloc[] : 0.0007312297821044922 seconds


In [27]:
# By what percentage iloc is faster than loc
(loc_time/iloc_time)*100

178.08933811542224