# How to Select Rows by List of Values in Pandas DataFrame

In [1]:
import pandas as pd

df = pd.read_csv('https://raw.githubusercontent.com/softhints/Pandas-Tutorials/master/data/csv/extremes.csv')
df

Unnamed: 0,Continent,Highest point,Elevation high,Lowest point,Elevation low
0,Asia,Mount Everest,8848,Dead Sea,−427
1,South America,Aconcagua,6960,Laguna del Carbón,−105
2,North America,Denali,6198,Death Valley,−86
3,Africa,Mount Kilimanjaro,5895,Lake Assal,−155
4,Europe,Mount Elbrus,5642,Caspian Sea,−28
5,Antarctica,Vinson Massif,4892,"Deep Lake, Vestfold Hills",−50
6,Australia,Puncak Jaya,4884,Lake Eyre,−15


## 3. Select rows by values with method isin()

### 3.1. Positive selection

In [2]:
sel_continents = ['America', 'Europe', 'Asia']

df[df['Continent'].isin(sel_continents)]

Unnamed: 0,Continent,Highest point,Elevation high,Lowest point,Elevation low
0,Asia,Mount Everest,8848,Dead Sea,−427
4,Europe,Mount Elbrus,5642,Caspian Sea,−28


### 3.2. Negative selection

In [3]:
df[~df['Continent'].isin(sel_continents)]

Unnamed: 0,Continent,Highest point,Elevation high,Lowest point,Elevation low
1,South America,Aconcagua,6960,Laguna del Carbón,−105
2,North America,Denali,6198,Death Valley,−86
3,Africa,Mount Kilimanjaro,5895,Lake Assal,−155
5,Antarctica,Vinson Massif,4892,"Deep Lake, Vestfold Hills",−50
6,Australia,Puncak Jaya,4884,Lake Eyre,−15


## 4. Query rows by values - method df.query()

### 4.1. List of values

In [4]:
sel_continents = ['America', 'Europe', 'Asia']
df.query('Continent in @sel_continents')

Unnamed: 0,Continent,Highest point,Elevation high,Lowest point,Elevation low
0,Asia,Mount Everest,8848,Dead Sea,−427
4,Europe,Mount Elbrus,5642,Caspian Sea,−28


### 4.2. Range of numbers

In [5]:
df.query('5000 < `Elevation high` < 6000')

Unnamed: 0,Continent,Highest point,Elevation high,Lowest point,Elevation low
3,Africa,Mount Kilimanjaro,5895,Lake Assal,−155
4,Europe,Mount Elbrus,5642,Caspian Sea,−28


### 4.3. Select rows by multiple queries

In [6]:
sel_continents = ['America', 'Europe', 'Asia']
df.query('(Continent in @sel_continents) and (`Elevation high` > 8000)')

Unnamed: 0,Continent,Highest point,Elevation high,Lowest point,Elevation low
0,Asia,Mount Everest,8848,Dead Sea,−427


## 5. Select rows by values - `df.loc` + `df.apply(lambda`

In [7]:
sel_continents = ['America', 'Europe', 'Asia']
df.loc[df.apply(lambda x: x['Continent'] in sel_continents, axis=1)]

Unnamed: 0,Continent,Highest point,Elevation high,Lowest point,Elevation low
0,Asia,Mount Everest,8848,Dead Sea,−427
4,Europe,Mount Elbrus,5642,Caspian Sea,−28


In [8]:
sel_continents = ['America', 'Europe', 'Asia']
sel_continents = [item.lower() for item in sel_continents]
df.loc[df.apply(lambda x: x['Continent'].lower() in sel_continents, axis=1)]

Unnamed: 0,Continent,Highest point,Elevation high,Lowest point,Elevation low
0,Asia,Mount Everest,8848,Dead Sea,−427
4,Europe,Mount Elbrus,5642,Caspian Sea,−28


In [9]:
def some_function(x):
    if x in sel_continents:
        return True
    else:
        return False

sel_continents = ['America', 'Europe', 'Asia']    
df[df['Continent'].apply(some_function)]

Unnamed: 0,Continent,Highest point,Elevation high,Lowest point,Elevation low
0,Asia,Mount Everest,8848,Dead Sea,−427
4,Europe,Mount Elbrus,5642,Caspian Sea,−28
