- fillna - Fill missing values
- interpolate - Make a guess on missing values
- dropna- Drop rows with missing values

In [1]:
import pandas as pd
import numpy as np
import random 
pd.options.mode.chained_assignment = None

#### Original Unchanged Data

In [2]:
compare = pd.DataFrame({'compare1': np.arange(12), 'compare2': np.arange(12), 'compare3': np.arange(12), 'compare4': np.arange(12)})
compare['compare1'] = 1
compare['compare2'] = np.nan
compare['compare3'] = np.nan
compare['compare4'] = np.nan
compare.compare1[10:] = np.nan
compare.compare2[0:4] = 0
compare.compare2[8:] = 10
compare.compare3[0:1] = 0
compare.compare3[11:] = 100
compare.compare4[2:4] = 20
compare.compare4[10:] = 50

In [3]:
data = pd.DataFrame({'c1': np.arange(12), 'c2': np.arange(12), 'c3': np.arange(12), 'c4': np.arange(12)})
data['c1'] = 1
data['c2'] = np.nan
data['c3'] = np.nan
data['c4'] = np.nan
data.c1[10:] = np.nan
data.c2[0:4] = 0
data.c2[8:] = 10
data.c3[0:1] = 0
data.c3[11:] = 100
data.c4[2:4] = 20
data.c4[10:] = 50
data

Unnamed: 0,c1,c2,c3,c4
0,1.0,0.0,0.0,
1,1.0,0.0,,
2,1.0,0.0,,20.0
3,1.0,0.0,,20.0
4,1.0,,,
5,1.0,,,
6,1.0,,,
7,1.0,,,
8,1.0,10.0,,
9,1.0,10.0,,


##### Interpolate Missing Data - Replaces NaNs With Random Values Between Min/Max of Values
- Imputes Along Columns (axis = 0)
- Imputes Along Rows (axis = 1)

In [4]:
interpolate = data.interpolate(axis = 0)
pd.DataFrame.join(interpolate, compare)

Unnamed: 0,c1,c2,c3,c4,compare1,compare2,compare3,compare4
0,1.0,0.0,0.0,,1.0,0.0,0.0,
1,1.0,0.0,9.090909,,1.0,0.0,,
2,1.0,0.0,18.181818,20.0,1.0,0.0,,20.0
3,1.0,0.0,27.272727,20.0,1.0,0.0,,20.0
4,1.0,2.0,36.363636,24.285714,1.0,,,
5,1.0,4.0,45.454545,28.571429,1.0,,,
6,1.0,6.0,54.545455,32.857143,1.0,,,
7,1.0,8.0,63.636364,37.142857,1.0,,,
8,1.0,10.0,72.727273,41.428571,1.0,10.0,,
9,1.0,10.0,81.818182,45.714286,1.0,10.0,,


##### Fill NaN With Your Own Values (e.g. 0, Means)
- Imputes Along Columns (axis = 0)
- Imputes Along Rows (axis = 1)

In [5]:
zeros = data.fillna(0)
mean = data.fillna(data.mean(), inplace = True) 
pd.DataFrame.join(mean, compare)

Unnamed: 0,c1,c2,c3,c4,compare1,compare2,compare3,compare4
0,1.0,0.0,0.0,35.0,1.0,0.0,0.0,
1,1.0,0.0,50.0,35.0,1.0,0.0,,
2,1.0,0.0,50.0,20.0,1.0,0.0,,20.0
3,1.0,0.0,50.0,20.0,1.0,0.0,,20.0
4,1.0,5.0,50.0,35.0,1.0,,,
5,1.0,5.0,50.0,35.0,1.0,,,
6,1.0,5.0,50.0,35.0,1.0,,,
7,1.0,5.0,50.0,35.0,1.0,,,
8,1.0,10.0,50.0,35.0,1.0,10.0,,
9,1.0,10.0,50.0,35.0,1.0,10.0,,


##### Forward Fill - Use Data Taken Before NaN Values to Replace the NaNs
- Imputes Along Columns (axis = 0)
- Imputes Along Rows (axis = 1)

In [6]:
data = pd.DataFrame({'c1': np.arange(12), 'c2': np.arange(12), 'c3': np.arange(12), 'c4': np.arange(12)})
data['c1'] = 1
data['c2'] = np.nan
data['c3'] = np.nan
data['c4'] = np.nan
data.c1[10:] = np.nan
data.c2[0:4] = 0
data.c2[8:] = 10
data.c3[0:1] = 0
data.c3[11:] = 100
data.c4[2:4] = 20
data.c4[10:] = 50

In [7]:
"""pad and ffill"""
"""NaN With No Real Values Before It Will Not Be Imputed (e.g c4)"""
"""In c2: NaNs Have 0.0 Before It So Is Replaced By 0.0"""
"""In c3: NaNs Have 0.0 Before It So Is Replaced By 0.0"""
"""In c4: NaNs Have 20.0 Before It So Is Replaced By 20.0"""
data.fillna(method = 'ffill', inplace = True, axis = 0)
pd.DataFrame.join(data, compare)

Unnamed: 0,c1,c2,c3,c4,compare1,compare2,compare3,compare4
0,1.0,0.0,0.0,,1.0,0.0,0.0,
1,1.0,0.0,0.0,,1.0,0.0,,
2,1.0,0.0,0.0,20.0,1.0,0.0,,20.0
3,1.0,0.0,0.0,20.0,1.0,0.0,,20.0
4,1.0,0.0,0.0,20.0,1.0,,,
5,1.0,0.0,0.0,20.0,1.0,,,
6,1.0,0.0,0.0,20.0,1.0,,,
7,1.0,0.0,0.0,20.0,1.0,,,
8,1.0,10.0,0.0,20.0,1.0,10.0,,
9,1.0,10.0,0.0,20.0,1.0,10.0,,


##### Back Fill - Use Data Taken Before NaN Values to Replace the NaNs
- Imputes Along Columns (axis = 0)
- Imputes Along Rows (axis = 1)

In [8]:
data = pd.DataFrame({'c1': np.arange(12), 'c2': np.arange(12), 'c3': np.arange(12), 'c4': np.arange(12)})
data['c1'] = 1
data['c2'] = np.nan
data['c3'] = np.nan
data['c4'] = np.nan
data.c1[10:] = np.nan
data.c2[0:4] = 0
data.c2[8:] = 10
data.c3[0:1] = 0
data.c3[11:] = 100
data.c4[2:4] = 20
data.c4[10:] = 50

In [9]:
"""backfill and bfill"""
"""NaN With No Real Values After It Will Not Be Imputed (e.g. c1)"""
"""In c2: NaNs Have 10.0 Before It So Is Replaced By 10.0"""
"""In c3: NaNs Have 100.0 Before It So Is Replaced By 100.0"""
"""In c4: NaNs Have 50.0 Before It So Is Replaced By 50.0"""
data.fillna(method = 'bfill', inplace = True) 
pd.DataFrame.join(data, compare)

Unnamed: 0,c1,c2,c3,c4,compare1,compare2,compare3,compare4
0,1.0,0.0,0.0,20.0,1.0,0.0,0.0,
1,1.0,0.0,100.0,20.0,1.0,0.0,,
2,1.0,0.0,100.0,20.0,1.0,0.0,,20.0
3,1.0,0.0,100.0,20.0,1.0,0.0,,20.0
4,1.0,10.0,100.0,50.0,1.0,,,
5,1.0,10.0,100.0,50.0,1.0,,,
6,1.0,10.0,100.0,50.0,1.0,,,
7,1.0,10.0,100.0,50.0,1.0,,,
8,1.0,10.0,100.0,50.0,1.0,10.0,,
9,1.0,10.0,100.0,50.0,1.0,10.0,,
