# Missing Values
---

**Purpose:**

Experimenting the issues with leaving values as blanks and different techniques to handle them

1. [Zero](#Zero-Fill)
1. [Average](#Average)
1. [Rolling Average](#Rolling-Average)
1. [Back Fill](#Back-Fill)
1. [Forward Fill](#Forward-Fill)
1. [Pad](#Pad-Interpolation)
1. [Linear](#Linear-Interpolation)
1. [Polynomial](#Polynomial-Interpolation)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

plt.style.use("bmh")

In [None]:
df = pd.DataFrame(
{
    'period':range(2000, 2011),
    'col1': [296. ,  321,  369,  450, 499 , 577,  708,  923, 1209, 1294, 1492]
})

df['col2'] = df['col1'].copy()

df.set_index(['period'], inplace=True)
df

In [None]:
df.plot(alpha=.5, marker='o');

In [None]:
beg, end = 2005, 2008

df.loc[beg:end, ]

### Zero Fill
---

In [None]:
df.loc[beg:end, ['col2']] = None
df['col3'] = df['col2'].fillna(0)
df.describe()

In [None]:
(df['col1'] - df['col3'])

In [None]:
(df['col1'] - df['col3']).mad()

In [None]:
df[['col1','col3']].plot(alpha=.5, marker='o'); 

### Average
---

In [None]:
df.loc[beg:end, ['col2']] = None

avg = df['col2'].mean()
df['col3'] = df['col2'].fillna(avg)

df.describe()

In [None]:
(df['col1'] - df['col3'])

In [None]:
(df['col1'] - df['col3']).mad()

In [None]:
df[['col1','col3']].plot(alpha=.5, marker='o'); 

### Rolling Average
---

In [None]:
df.loc[beg:end, ['col2']] = None

rolling = df['col2'].rolling(window=6, min_periods=2).mean()

df['col3'] = df['col2'].fillna(rolling)

df.describe()

In [None]:
(df['col1'] - df['col3'])

In [None]:
(df['col1'] - df['col3']).mad()

In [None]:
df[['col1','col3']].plot(alpha=.5, marker='o'); 

### Back Fill
---

In [None]:
df.loc[beg:end, ['col2']] = None

df['col3'] = df['col2'].fillna(method='bfill')

df.describe()

In [None]:
(df['col1'] - df['col3'])

In [None]:
(df['col1'] - df['col3']).mad()

In [None]:
df[['col1','col3']].plot(alpha=.5, marker='o'); 

### Forward Fill
---

In [None]:
df.loc[beg:end, ['col2']] = None

df['col3'] = df['col2'].fillna(method='ffill')

df.describe()

In [None]:
(df['col1'] - df['col3'])

In [None]:
(df['col1'] - df['col3']).mad()

In [None]:
df[['col1','col3']].plot(alpha=.5, marker='o'); 

### Pad Interpolation
---

In [None]:
df.loc[beg:end, ['col2']] = None

df['col3'] = df['col2'].interpolate(method='pad')

df.describe()

In [None]:
(df['col1'] - df['col3'])

In [None]:
(df['col1'] - df['col3']).mad()

In [None]:
df[['col1','col3']].plot(alpha=.5, marker='o'); 

### Linear Interpolation
---

In [None]:
df.loc[beg:end, ['col2']] = None

df['col3'] = df['col2'].interpolate(method='linear')

df.describe()

In [None]:
(df['col1'] - df['col3'])

In [None]:
(df['col1'] - df['col3']).mad()

In [None]:
df[['col1','col3']].plot(alpha=.5, marker='o');

### Polynomial Interpolation
---

In [None]:
df.loc[beg:end, ['col2']] = None

df['col3'] = df['col2'].interpolate(method='polynomial', order=3)

df.describe()

In [None]:
(df['col1'] - df['col3'])

In [None]:
(df['col1'] - df['col3']).mad()

In [None]:
df[['col1','col3']].plot(alpha=.5, marker='o');