# Selecting pandas DataFrame Rows Based On Conditions

## Preliminaries

In [1]:
# Import modules
import pandas as pd
import numpy as np

In [2]:
# Create a dataframe
raw_data = {'first_name': ['Jason', 'Molly', np.nan, np.nan, np.nan], 
        'nationality': ['USA', 'USA', 'France', 'UK', 'UK'], 
        'age': [42, 52, 36, 24, 70]}
df = pd.DataFrame(raw_data, columns = ['first_name', 'nationality', 'age'])
df

Unnamed: 0,first_name,nationality,age
0,Jason,USA,42
1,Molly,USA,52
2,,France,36
3,,UK,24
4,,UK,70


## Method 1: Using Boolean Variables

In [3]:
# Create variable with TRUE if nationality is USA
american = df['nationality'] == "USA"

# Create variable with TRUE if age is greater than 50
elderly = df['age'] > 50

# Select all cases where nationality is USA and age is greater than 50
df[american & elderly]

Unnamed: 0,first_name,nationality,age
1,Molly,USA,52


## Method 2: Using variable attributes

In [4]:
# Select all cases where the first name is not missing and nationality is USA 
df[df['first_name'].notnull() & (df['nationality'] == "USA")]

Unnamed: 0,first_name,nationality,age
0,Jason,USA,42
1,Molly,USA,52


## Method 3: Using isin iteratable

In [5]:
df[df['first_name'].notnull() & (~df['nationality'].isin(["UK"]))]

Unnamed: 0,first_name,nationality,age
0,Jason,USA,42
1,Molly,USA,52
