# How to Select Rows by Column Value in Pandas

In [1]:
import pandas as pd

df = pd.read_csv('https://raw.githubusercontent.com/softhints/Pandas-Tutorials/master/data/csv/extremes.csv')
df

Unnamed: 0,Continent,Highest point,Elevation high,Lowest point,Elevation low
0,Asia,Mount Everest,8848,Dead Sea,−427
1,South America,Aconcagua,6960,Laguna del Carbón,−105
2,North America,Denali,6198,Death Valley,−86
3,Africa,Mount Kilimanjaro,5895,Lake Assal,−155
4,Europe,Mount Elbrus,5642,Caspian Sea,−28
5,Antarctica,Vinson Massif,4892,"Deep Lake, Vestfold Hills",−50
6,Australia,Puncak Jaya,4884,Lake Eyre,−15


## 3. Select rows by values with method isin()

### 3.1. What is boolean indexing?

In [2]:
selection = [True, False, True, False, True, False, True]

df[selection]

Unnamed: 0,Continent,Highest point,Elevation high,Lowest point,Elevation low
0,Asia,Mount Everest,8848,Dead Sea,−427
2,North America,Denali,6198,Death Valley,−86
4,Europe,Mount Elbrus,5642,Caspian Sea,−28
6,Australia,Puncak Jaya,4884,Lake Eyre,−15


### 3.2. Select Rows by Column Value with boolean indexing

In [3]:
df['Continent'].str.startswith('A')

0     True
1    False
2    False
3     True
4    False
5     True
6     True
Name: Continent, dtype: bool

In [4]:
df[df['Continent'].str.startswith('A')]

Unnamed: 0,Continent,Highest point,Elevation high,Lowest point,Elevation low
0,Asia,Mount Everest,8848,Dead Sea,−427
3,Africa,Mount Kilimanjaro,5895,Lake Assal,−155
5,Antarctica,Vinson Massif,4892,"Deep Lake, Vestfold Hills",−50
6,Australia,Puncak Jaya,4884,Lake Eyre,−15


In [5]:
df[df['Continent'] == 'Africa']

Unnamed: 0,Continent,Highest point,Elevation high,Lowest point,Elevation low
3,Africa,Mount Kilimanjaro,5895,Lake Assal,−155


In [6]:
df[df['Continent'].isin(['Africa', 'Europe'])]

Unnamed: 0,Continent,Highest point,Elevation high,Lowest point,Elevation low
3,Africa,Mount Kilimanjaro,5895,Lake Assal,−155
4,Europe,Mount Elbrus,5642,Caspian Sea,−28


## 4. Select rows by property `.loc[]`

In [7]:
df.loc[0]

Continent                  Asia
Highest point     Mount Everest
Elevation high             8848
Lowest point           Dead Sea
Elevation low              −427
Name: 0, dtype: object

In [8]:
df.loc[[0,2,4]]

Unnamed: 0,Continent,Highest point,Elevation high,Lowest point,Elevation low
0,Asia,Mount Everest,8848,Dead Sea,−427
2,North America,Denali,6198,Death Valley,−86
4,Europe,Mount Elbrus,5642,Caspian Sea,−28


In [9]:
df.set_index('Continent').loc["Africa"]

Highest point     Mount Kilimanjaro
Elevation high                 5895
Lowest point             Lake Assal
Elevation low                  −155
Name: Africa, dtype: object

In [10]:
df.loc[df['Continent'] == 'Africa']

Unnamed: 0,Continent,Highest point,Elevation high,Lowest point,Elevation low
3,Africa,Mount Kilimanjaro,5895,Lake Assal,−155


## 5. Select rows by positions - `.iloc[]`

In [11]:
import numpy as np
mask = df['Highest point'].str.contains('Mount')
pos = np.flatnonzero(mask)
df.iloc[pos]

Unnamed: 0,Continent,Highest point,Elevation high,Lowest point,Elevation low
0,Asia,Mount Everest,8848,Dead Sea,−427
3,Africa,Mount Kilimanjaro,5895,Lake Assal,−155
4,Europe,Mount Elbrus,5642,Caspian Sea,−28


## 6. Select rows by - `df.query()`

In [12]:
df.query('Continent == "Europe"')

Unnamed: 0,Continent,Highest point,Elevation high,Lowest point,Elevation low
4,Europe,Mount Elbrus,5642,Caspian Sea,−28
