In [1]:
import numpy as np
import pandas as pd

In [2]:
data_fake = pd.DataFrame(
    {
        'items': [np.nan, 2,4,8,10],
        'age': [23, np.nan, 28, 32,40],
        'cost': [9500, 11000, np.nan, np.nan, 14760]
    }
)

In [3]:
data_fake

Unnamed: 0,items,age,cost
0,,23.0,9500.0
1,2.0,,11000.0
2,4.0,28.0,
3,8.0,32.0,
4,10.0,40.0,14760.0


# Simple Imputer (Univariate)

In [4]:
from sklearn.impute import SimpleImputer
imp = SimpleImputer(missing_values=np.nan, strategy='median')
imp.fit_transform(data_fake)

array([[6.000e+00, 2.300e+01, 9.500e+03],
       [2.000e+00, 3.000e+01, 1.100e+04],
       [4.000e+00, 2.800e+01, 1.100e+04],
       [8.000e+00, 3.200e+01, 1.100e+04],
       [1.000e+01, 4.000e+01, 1.476e+04]])

In [6]:
imp = SimpleImputer(missing_values=np.nan, strategy='mean')
imp.fit_transform(data_fake)

array([[6.00000000e+00, 2.30000000e+01, 9.50000000e+03],
       [2.00000000e+00, 3.07500000e+01, 1.10000000e+04],
       [4.00000000e+00, 2.80000000e+01, 1.17533333e+04],
       [8.00000000e+00, 3.20000000e+01, 1.17533333e+04],
       [1.00000000e+01, 4.00000000e+01, 1.47600000e+04]])

In [8]:
imp = SimpleImputer(missing_values=np.nan, strategy='mean', add_indicator=True)
pd.DataFrame(imp.fit_transform(data_fake))

Unnamed: 0,0,1,2,3,4,5
0,6.0,23.0,9500.0,1.0,0.0,0.0
1,2.0,30.75,11000.0,0.0,1.0,0.0
2,4.0,28.0,11753.333333,0.0,0.0,1.0
3,8.0,32.0,11753.333333,0.0,0.0,1.0
4,10.0,40.0,14760.0,0.0,0.0,0.0


# Multivariate Imputers

## Iterative Imputer

In [9]:
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

In [11]:
it = IterativeImputer()
pd.DataFrame(it.fit_transform(data_fake))

Unnamed: 0,0,1,2
0,0.994732,23.0,9500.0
1,2.0,27.846014,11000.0
2,4.0,28.0,11046.924962
3,8.0,32.0,12283.865998
4,10.0,40.0,14760.0


## K-NN Imputer

In [16]:
from sklearn.impute import KNNImputer
knn = KNNImputer(n_neighbors=2)
pd.DataFrame(knn.fit_transform(data_fake))

Unnamed: 0,0,1,2
0,6.0,23.0,9500.0
1,2.0,30.0,11000.0
2,4.0,28.0,10250.0
3,8.0,32.0,12880.0
4,10.0,40.0,14760.0


In [13]:
data_fake

Unnamed: 0,items,age,cost
0,,23.0,9500.0
1,2.0,,11000.0
2,4.0,28.0,
3,8.0,32.0,
4,10.0,40.0,14760.0
