In [1]:
# importing pandas and numpy
import pandas as pd
import numpy as np

# crete a sample dataframe
data = pd.DataFrame({
    'age' :     [ 10, 22, 13, 21, 12, 11, 17],
    'section' : [ 'A', 'B', 'C', 'B', 'B', 'A', 'A'],
    'city' :    [ 'Gurgaon', 'Delhi', 'Mumbai', 'Delhi', 'Mumbai', 'Delhi', 'Mumbai'],
    'gender' :  [ 'M', 'F', 'F', 'M', 'M', 'M', 'F'],
    'favourite_color' : [ 'red', np.NAN, 'yellow', np.NAN, 'black', 'green', 'red']
})

# view the data
print(data)

   age section     city gender favourite_color
0   10       A  Gurgaon      M             red
1   22       B    Delhi      F             NaN
2   13       C   Mumbai      F          yellow
3   21       B    Delhi      M             NaN
4   12       B   Mumbai      M           black
5   11       A    Delhi      M           green
6   17       A   Mumbai      F             red


#Find all the rows based on any condition in a column : LOC

In [2]:
 # find the rows where the value of age is greater than or equal to 15:-
data.loc[data.age >= 15]

Unnamed: 0,age,section,city,gender,favourite_color
1,22,B,Delhi,F,
3,21,B,Delhi,M,
6,17,A,Mumbai,F,red


# Find all the rows with more than one condition

In [3]:
# finding all the rows where the age is greater than or equal to 12 and the gender is also male:
data.loc[(data.age >= 12) & (data.gender == 'M')]

Unnamed: 0,age,section,city,gender,favourite_color
3,21,B,Delhi,M,
4,12,B,Mumbai,M,black


# Select a range of rows using loc

In [4]:
# we can also slice the Pandas dataframe over a range of indices.
# if the indices are not numbers, then we cannot slice our dataframe.
# In that case, we need to use the iloc function to slice our Pandas dataframe.
data.loc[1:3]

Unnamed: 0,age,section,city,gender,favourite_color
1,22,B,Delhi,F,
2,13,C,Mumbai,F,yellow
3,21,B,Delhi,M,


# Select only required columns with a condition

if our dataset contains hundreds of columns and we want to view only a few of them, then we can add a list of columns after the condition within the loc statement itself:

In [5]:
# select few columns with a condition
data.loc[(data.age >= 12), ['city', 'gender']]

Unnamed: 0,city,gender
1,Delhi,F
2,Mumbai,F
3,Delhi,M
4,Mumbai,M
6,Mumbai,F


# Update the values of a particular column on selected rows

if the values in age are greater than equal to 12, then we want to update the values of the column section to be “M”. We can do this by running a for loop as well but if our dataset is big in size, then it would take forever to complete the task. Using loc in Pandas, we can do this within seconds, even on bigger datasets! We just need to specify the condition followed by the target column and then assign the value with which we want to update:

In [6]:
# update a column with condition
data.loc[(data.age >= 12), ['section']] = 'M'
data

Unnamed: 0,age,section,city,gender,favourite_color
0,10,A,Gurgaon,M,red
1,22,M,Delhi,F,
2,13,M,Mumbai,F,yellow
3,21,M,Delhi,M,
4,12,M,Mumbai,M,black
5,11,A,Delhi,M,green
6,17,M,Mumbai,F,red


# Update the values of multiple columns on selected rows

if the value in the column age is greater than 20, then the loc function will update the values in the column section with “S” and the values in the column city with Pune:

In [7]:
# update multiple columns with condition
data.loc[(data.age >= 20), ['section', 'city']] = ['S','Pune']
data

Unnamed: 0,age,section,city,gender,favourite_color
0,10,A,Gurgaon,M,red
1,22,S,Pune,F,
2,13,M,Mumbai,F,yellow
3,21,S,Pune,M,
4,12,M,Mumbai,M,black
5,11,A,Delhi,M,green
6,17,M,Mumbai,F,red


# iloc
Select rows with indices using iloc

When we are using iloc, we need to specify the rows and columns by their integer index. If we want to select only the first and third row, we simply need to put this into a list in the iloc statement with our dataframe:

In [8]:
# select rows with indexes
data.iloc[[0,2]]

Unnamed: 0,age,section,city,gender,favourite_color
0,10,A,Gurgaon,M,red
2,13,M,Mumbai,F,yellow


# Select rows with particular indices and particular columns
we need to provide the index number of the column instead of the column name:

In [9]:
data.iloc[[0,2],[1,3]]

Unnamed: 0,section,gender
0,A,M
2,M,F


# Select a range of rows using iloc

In [10]:
data.iloc[1:3]

Unnamed: 0,age,section,city,gender,favourite_color
1,22,S,Pune,F,
2,13,M,Mumbai,F,yellow


# Select a range of rows and columns using iloc

In [11]:
data.iloc[1:3,2:4]

Unnamed: 0,city,gender
1,Pune,F
2,Mumbai,F
