# Import Library

In [18]:
import pandas as pd
import numpy as np
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.linear_model import LinearRegression  # Example estimator
from sklearn.ensemble import RandomForestRegressor #Another example estimator
from sklearn.preprocessing import OrdinalEncoder

# Create dataset

In [12]:
# Create a sample DataFrame with missing values
data = {'A': [1, 2, np.nan, 4, 5, np.nan],
        'B': [7, np.nan, 9, 10, 11, 12],
        'C': [13, 14, 15, np.nan, 17, 18]}
df = pd.DataFrame(data)
df.head()

Unnamed: 0,A,B,C
0,1.0,7.0,13.0
1,2.0,,14.0
2,,9.0,15.0
3,4.0,10.0,
4,5.0,11.0,17.0


# Initialize the IterativeImputer (LinearRegression)

In [13]:
# Initialize the IterativeImputer
imputer = IterativeImputer(random_state=0, estimator=LinearRegression()) #you can change the estimator.


# Fit and Transform

In [15]:
# Fit and transform the DataFrame
trf1 = imputer.fit_transform(df)
df_imputed = pd.DataFrame(trf1, columns=df.columns)
df_imputed

Unnamed: 0,A,B,C
0,1.0,7.0,13.0
1,2.0,8.000005,14.0
2,3.0,9.0,15.0
3,4.0,10.0,15.999999
4,5.0,11.0,17.0
5,6.000001,12.0,18.0


#Example with a different estimator. (RandomForestRegression)

In [17]:
#Example with a different estimator.
imputer2 = IterativeImputer(random_state=0, estimator=RandomForestRegressor())
df_imputed2 = pd.DataFrame(imputer2.fit_transform(df), columns=df.columns)
df_imputed2




Unnamed: 0,A,B,C
0,1.0,7.0,13.0
1,2.0,8.12,14.0
2,2.4,9.0,15.0
3,4.0,10.0,16.4
4,5.0,11.0,17.0
5,4.42,12.0,18.0


# Example Categorical Data

## Create DataFrame

In [19]:
data2 = {'A': [1, 2, np.nan, 4, 5, 6],
        'B': ['cat', np.nan, 'dog', 'cat', 'bird', 'dog']}
df2 = pd.DataFrame(data2)
df2

Unnamed: 0,A,B
0,1.0,cat
1,2.0,
2,,dog
3,4.0,cat
4,5.0,bird
5,6.0,dog


## One Hot Encoder apply

In [21]:
encoder = OrdinalEncoder()
df2['B'] = encoder.fit_transform(df2[['B']])
df2

Unnamed: 0,A,B
0,1.0,1.0
1,2.0,
2,,2.0
3,4.0,1.0
4,5.0,0.0
5,6.0,2.0


# Apply `LinearRegression` on Categorical Column

In [22]:
imputer3 = IterativeImputer(random_state=0, estimator=LinearRegression())
df2_imputed = pd.DataFrame(imputer3.fit_transform(df2), columns = df2.columns)
df2_imputed

Unnamed: 0,A,B
0,1.0,1.0
1,2.0,1.050106
2,4.054665,2.0
3,4.0,1.0
4,5.0,0.0
5,6.0,2.0


In [27]:
df2_imputed['B_inverse_trf'] = encoder.inverse_transform(df2_imputed[['B']])

In [28]:
df2_imputed

Unnamed: 0,A,B,B_inverse_trf
0,1.0,1.0,1.0
1,2.0,1.050106,1.0
2,4.054665,2.0,2.0
3,4.0,1.0,1.0
4,5.0,0.0,0.0
5,6.0,2.0,2.0
