# Multivariate Imputation by Chained Equations(MICE)

In [1]:
import pandas as pd

from feature_engine.imputation import (AddMissingIndicator,
                                      CategoricalImputer,
                                      MeanMedianImputer)
from sklearn.compose import ColumnTransformer
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import (SimpleImputer,
                            IterativeImputer)
from sklearn.linear_model import BayesianRidge
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline

In [2]:
variables = ['A2', 'A3', 'A8', 'A11', 'A14', 'A15', 'target']

In [3]:
data = pd.read_csv('../data/preprocessing/credit_approval_uci.csv', usecols=variables)
data.head()

Unnamed: 0,A2,A3,A8,A11,A14,A15,target
0,30.83,0.0,1.25,1,202.0,0,1
1,58.67,4.46,3.04,6,43.0,560,1
2,24.5,,,0,280.0,824,1
3,27.83,1.54,3.75,5,100.0,3,1
4,20.17,5.625,1.71,0,120.0,0,1


In [5]:
data.dtypes

A2        float64
A3        float64
A8        float64
A11         int64
A14       float64
A15         int64
target      int64
dtype: object

In [6]:
X_train, X_test, y_train, y_test = train_test_split(data.drop("target", axis=1),
                                                   data['target'],
                                                   test_size=0.3,
                                                   random_state=0
                                                   )

In [7]:
imputer = IterativeImputer(
    estimator=BayesianRidge(),
    max_iter=10,
    random_state=42
)
imputer.fit(X_train)
X_train = imputer.transform(X_train)
X_test = imputer.transform(X_test)

In [13]:
X_train = pd.DataFrame(X_train, columns=['A2', 'A3', 'A8', 'A11', 'A14', 'A15'])
X_train.head()

Unnamed: 0,A2,A3,A8,A11,A14,A15
0,46.08,3.0,2.375,8.0,396.0,4159.0
1,15.92,2.875,0.085,0.0,120.0,0.0
2,36.33,2.125,0.085,1.0,50.0,1187.0
3,22.17,0.585,0.0,0.0,100.0,0.0
4,57.83,7.04,14.0,6.0,360.0,1332.0


In [None]:
X_train.isna().