# Pandas DataFrames: select rows with NA values in any column

In [1]:
# Import libraries
import pandas as pd
import numpy as np

# Create sample dataframe
df = pd.DataFrame({'population':[2148000, np.nan, 861635, 232741, np.nan], 
                  'area_km':[105.4, 47.87, 240.6, np.nan, 78.26]},
                  index=['Paris', 'Lyon',  'Marseille', 'Lille', 'Strasbourg'])
df

Unnamed: 0,population,area_km
Paris,2148000.0,105.4
Lyon,,47.87
Marseille,861635.0,240.6
Lille,232741.0,
Strasbourg,,78.26


## Select rows that have at least one NA value

In [2]:
# Standard way
df[df.isnull().any(axis=1)]

Unnamed: 0,population,area_km
Lyon,,47.87
Lille,232741.0,
Strasbourg,,78.26


In [3]:
# With .loc and lambda (useful for method chaining)
df.loc[lambda x: x.isnull().any(axis=1)]

Unnamed: 0,population,area_km
Lyon,,47.87
Lille,232741.0,
Strasbourg,,78.26


## Filter out rows with NA values

In [4]:
# Select rows that do not have any NA value
df.dropna()

Unnamed: 0,population,area_km
Paris,2148000.0,105.4
Marseille,861635.0,240.6
