# Pandas Missing Data

In [1]:
import pandas as pd
import numpy as np

## Example Data

In [2]:
data = {'A':[10,20,np.nan,12],
       'B':[40,50,46,13],
       'C':[11,np.nan,np.nan,14]}

In [3]:
df = pd.DataFrame(data)

In [4]:
df

Unnamed: 0,A,B,C
0,10.0,40,11.0
1,20.0,50,
2,,46,
3,12.0,13,14.0


## pd.isnull

https://pandas.pydata.org/docs/reference/api/pandas.isnull.html

In [5]:
df.isnull()

Unnamed: 0,A,B,C
0,False,False,False
1,False,False,True
2,True,False,True
3,False,False,False


https://www.kite.com/python/answers/how-to-find-rows-with-nan-values-in-a-pandas-dataframe-in-python

In [6]:
df.isnull().any(axis=1)

0    False
1     True
2     True
3    False
dtype: bool

In [7]:
df[df.isnull().any(axis=1)]

Unnamed: 0,A,B,C
1,20.0,50,
2,,46,


## pd.isna

### Which rows have NaN?

https://stackoverflow.com/questions/43424199/display-rows-with-one-or-more-nan-values-in-pandas-dataframe

In [8]:
df.isna()

Unnamed: 0,A,B,C
0,False,False,False
1,False,False,True
2,True,False,True
3,False,False,False


In [9]:
df.isna().any(axis=1)

0    False
1     True
2     True
3    False
dtype: bool

In [10]:
df[df.isnull().any(axis=1)]

Unnamed: 0,A,B,C
1,20.0,50,
2,,46,


### Which columns have NaNs?

https://stackoverflow.com/questions/36226083/how-to-find-which-columns-contain-any-nan-value-in-pandas-dataframe

In [11]:
df

Unnamed: 0,A,B,C
0,10.0,40,11.0
1,20.0,50,
2,,46,
3,12.0,13,14.0


In [12]:
list(df.columns[df.isna().any(axis=0)])

['A', 'C']

### Which columns don't have NaNs?

In [13]:
list(df.columns[df.isna().any(axis=0)==False])

['B']

In [17]:
set(df.columns[df.isna().any(axis=0)==False])

{'B'}

### How many NaNs are there?

In [14]:
df

Unnamed: 0,A,B,C
0,10.0,40,11.0
1,20.0,50,
2,,46,
3,12.0,13,14.0


In [15]:
df.isna().sum()

A    1
B    0
C    2
dtype: int64

In [16]:
df.isna().sum().sum()

3