### Simple examples of data imputation with scikit-learn
#### (read and play)

In [0]:
import numpy as np
import pandas as pd
from io import StringIO

Creating some data with missing values

In [0]:
csvdata = '''
A,B,C,D,E
1,2,3,4,
5,6,,8,
0,,11,12,13
'''

df = pd.read_csv(StringIO(csvdata))
df

Radical choice: delete whole column

In [0]:
df.drop(["E"], axis=1, inplace=True)
df

Recreating

In [0]:
df = pd.read_csv(StringIO(csvdata))
df

Less Radical: delete rows with missing values on "C" column

In [0]:
df.dropna(axis=0, how='any', thresh=None, subset=["C"], inplace=True)
df

If you do not specify the columns, it will delete every row with any missing value

In [0]:
df.dropna(axis=0, how='any', thresh=None, subset=None, inplace=True)
df

Imputing with scikit-learn

In [0]:
from sklearn.impute import SimpleImputer

In [0]:
df = pd.read_csv(StringIO(csvdata))
df

Imputing mean values

In [0]:
imp = SimpleImputer(missing_values=np.nan, strategy='mean')
imp.fit(df["C"].values.reshape(-1,1))
df["C"] = imp.transform(df["C"].values.reshape(-1,1))
df

In [0]:
df = pd.read_csv(StringIO(csvdata))
df

Imputing a constant value

In [0]:
imp = SimpleImputer(missing_values=np.nan, fill_value=200, strategy='constant')
imp.fit(df["C"].values.reshape(-1,1))
df["C"] = imp.transform(df["C"].values.reshape(-1,1))
df

In [0]:
df = pd.read_csv(StringIO(csvdata))
df

Interactive imputing (experimental)

In [0]:
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

In [0]:
imp_mean = IterativeImputer(random_state=0)
imp_mean.fit(df)
columns = df.columns
df = pd.DataFrame(imp_mean.transform(df), columns=columns)
df