# Treating _null_ or missing values
---

In [1]:
import numpy as np
import pandas as pd

In [2]:
s = {'COL_1':[212, 434,np.nan,44,np.nan],
     'COL_2':[43, np.nan,np.nan, np.nan, np.nan],
     'COL_3':[555, 603, 102, 77, 809], 
     'COL_4':[567, 560, 614, 88, 128],
     'COL_5':[555, 603, 102, 64, np.nan]}

In [3]:
# Loading data to a Pandas DataFrame
df = pd.DataFrame(s)
df.head()

Unnamed: 0,COL_1,COL_2,COL_3,COL_4,COL_5
0,212.0,43.0,555,567,555.0
1,434.0,,603,560,603.0
2,,,102,614,102.0
3,44.0,,77,88,64.0
4,,,809,128,


## Excluding (with `dropna()`)

In [4]:
# Excluding rows
df.dropna(axis=0)

Unnamed: 0,COL_1,COL_2,COL_3,COL_4,COL_5
0,212.0,43.0,555,567,555.0


In [5]:
# Excluding with minimal required non NA values
df.dropna(axis=0, thresh=4)

Unnamed: 0,COL_1,COL_2,COL_3,COL_4,COL_5
0,212.0,43.0,555,567,555.0
1,434.0,,603,560,603.0
3,44.0,,77,88,64.0


In [6]:
# Excluding columns
df.dropna(axis=1, thresh=4) # 'thresh' is opcional

Unnamed: 0,COL_3,COL_4,COL_5
0,555,567,555.0
1,603,560,603.0
2,102,614,102.0
3,77,88,64.0
4,809,128,


## Replacing (with `fillna()`)

In [7]:
# With const value
df.fillna(value=0)

Unnamed: 0,COL_1,COL_2,COL_3,COL_4,COL_5
0,212.0,43.0,555,567,555.0
1,434.0,0.0,603,560,603.0
2,0.0,0.0,102,614,102.0
3,44.0,0.0,77,88,64.0
4,0.0,0.0,809,128,0.0


In [8]:
# Or a smarter way :) 
df['COL_1'] = df['COL_1'].fillna(value=df['COL_1'].mean()) # Only setting the first columns
df

Unnamed: 0,COL_1,COL_2,COL_3,COL_4,COL_5
0,212.0,43.0,555,567,555.0
1,434.0,,603,560,603.0
2,230.0,,102,614,102.0
3,44.0,,77,88,64.0
4,230.0,,809,128,
