# Count and Remove NaNs in DataFrame

https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.sum.html

https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.dropna.html

In [2]:
import pandas as pd
import numpy as np

## Create dataframe

In [31]:
df = pd.DataFrame(np.random.random([10, 6]))
df

Unnamed: 0,0,1,2,3,4,5
0,0.432736,0.732597,0.425808,0.783352,0.019382,0.4153
1,0.767546,0.566711,0.601399,0.304204,0.333865,0.698684
2,0.670369,0.507466,0.090007,0.62019,0.835883,0.185997
3,0.828716,0.390336,0.684362,0.990188,0.680699,0.706278
4,0.460318,0.447486,0.222043,0.69947,0.513488,0.398697
5,0.990892,0.344408,0.195109,0.702128,0.269169,0.274922
6,0.040426,0.913309,0.502228,0.809862,0.937477,0.930431
7,0.994586,0.890029,0.666244,0.618129,0.435536,0.211373
8,0.052044,0.463306,0.343766,0.176876,0.660021,0.36962
9,0.129857,0.418827,0.88146,0.614465,0.920535,0.77799


## Replace some values with NaNs

In [37]:
df.iloc[[0, 4, 6], [1, 3]] = None
df.iloc[8, :] = None
df

Unnamed: 0,0,1,2,3,4,5
0,0.432736,,0.425808,,0.019382,0.4153
1,0.767546,0.566711,0.601399,0.304204,0.333865,0.698684
2,0.670369,0.507466,0.090007,0.62019,0.835883,0.185997
3,0.828716,0.390336,0.684362,0.990188,0.680699,0.706278
4,0.460318,,0.222043,,0.513488,0.398697
5,0.990892,0.344408,0.195109,0.702128,0.269169,0.274922
6,0.040426,,0.502228,,0.937477,0.930431
7,0.994586,0.890029,0.666244,0.618129,0.435536,0.211373
8,,,,,,
9,0.129857,0.418827,0.88146,0.614465,0.920535,0.77799


## Identify NaNs


In [38]:
df.isnull()

Unnamed: 0,0,1,2,3,4,5
0,False,True,False,True,False,False
1,False,False,False,False,False,False
2,False,False,False,False,False,False
3,False,False,False,False,False,False
4,False,True,False,True,False,False
5,False,False,False,False,False,False
6,False,True,False,True,False,False
7,False,False,False,False,False,False
8,True,True,True,True,True,True
9,False,False,False,False,False,False


## Count NaNs

### Row-wise

In [39]:
df.isnull().sum(axis=1)

0    2
1    0
2    0
3    0
4    2
5    0
6    2
7    0
8    6
9    0
dtype: int64

### Column-wise

In [40]:
df.isnull().sum(axis=0)

0    1
1    4
2    1
3    4
4    1
5    1
dtype: int64

## Drop NaNs

### Drop rows with all NaNs


In [43]:
df.dropna(axis=0, how='all', inplace=True)
df

Unnamed: 0,0,1,2,3,4,5
0,0.432736,,0.425808,,0.019382,0.4153
1,0.767546,0.566711,0.601399,0.304204,0.333865,0.698684
2,0.670369,0.507466,0.090007,0.62019,0.835883,0.185997
3,0.828716,0.390336,0.684362,0.990188,0.680699,0.706278
4,0.460318,,0.222043,,0.513488,0.398697
5,0.990892,0.344408,0.195109,0.702128,0.269169,0.274922
6,0.040426,,0.502228,,0.937477,0.930431
7,0.994586,0.890029,0.666244,0.618129,0.435536,0.211373
9,0.129857,0.418827,0.88146,0.614465,0.920535,0.77799


Note that the row with index 8 is removed from the output. 


### Drop columns with any NaNs

In [44]:
df.dropna(axis=1, how='any', inplace=False)

Unnamed: 0,0,2,4,5
0,0.432736,0.425808,0.019382,0.4153
1,0.767546,0.601399,0.333865,0.698684
2,0.670369,0.090007,0.835883,0.185997
3,0.828716,0.684362,0.680699,0.706278
4,0.460318,0.222043,0.513488,0.398697
5,0.990892,0.195109,0.269169,0.274922
6,0.040426,0.502228,0.937477,0.930431
7,0.994586,0.666244,0.435536,0.211373
9,0.129857,0.88146,0.920535,0.77799


Only the columns that were free of NaNs are retained in the output.