In [67]:
import pandas as pd

In [68]:
import numpy as np

In [69]:
miles = pd.DataFrame({
    'farthest_run_mi': [50, 62, np.nan, 100, 26, 13, 31, 50]
})

In [70]:
miles

Unnamed: 0,farthest_run_mi
0,50.0
1,62.0
2,
3,100.0
4,26.0
5,13.0
6,31.0
7,50.0


In [71]:
# Checking for missing values in 'miles'
miles.isna().sum()

Unnamed: 0,0
farthest_run_mi,1


In [72]:
# Importing SimpleImputer for handling missing value
from sklearn.impute import SimpleImputer

In [73]:
# Creating a SimpleImputer with 'mean' strategy
imp_mean = SimpleImputer(strategy = 'mean')

In [74]:
imp_mean.fit_transform(miles)

array([[ 50.        ],
       [ 62.        ],
       [ 47.42857143],
       [100.        ],
       [ 26.        ],
       [ 13.        ],
       [ 31.        ],
       [ 50.        ]])

In [75]:
# Creating a SimpleImputer with 'median' strategy
imp_median = SimpleImputer(strategy = 'median')

In [76]:
imp_median.fit_transform(miles)

array([[ 50.],
       [ 62.],
       [ 50.],
       [100.],
       [ 26.],
       [ 13.],
       [ 31.],
       [ 50.]])

In [77]:
# Creating a SimpleImputer with 'most_frequent' strategy
imp_mode = SimpleImputer(strategy = 'most_frequent')

In [78]:
imp_mode.fit_transform(miles)

array([[ 50.],
       [ 62.],
       [ 50.],
       [100.],
       [ 26.],
       [ 13.],
       [ 31.],
       [ 50.]])

In [79]:
# Creating a SimpleImputer with a constant value of 13
imp_constant = SimpleImputer(strategy = 'constant', fill_value = 13)

In [80]:
imp_constant.fit_transform(miles)

array([[ 50.],
       [ 62.],
       [ 13.],
       [100.],
       [ 26.],
       [ 13.],
       [ 31.],
       [ 50.]])

In [81]:
names = pd.DataFrame({
    'names': ['ryan', 'nolan', 'honus', 'wagner', np.nan, 'ruth']
})

In [82]:
names

Unnamed: 0,names
0,ryan
1,nolan
2,honus
3,wagner
4,
5,ruth


In [83]:
# Imputation of missing values in 'names' using a constant value 'babe'
imp_constant_cat = SimpleImputer(strategy = 'constant', fill_value = 'babe')

In [84]:
imp_constant_cat.fit_transform(names)

array([['ryan'],
       ['nolan'],
       ['honus'],
       ['wagner'],
       ['babe'],
       ['ruth']], dtype=object)

In [85]:
# Creating a SimpleImputer with 'mean' and marking missing values
imp_mean_marked = SimpleImputer(strategy = 'mean', add_indicator = True)

In [86]:
# Applying mean imputation and adding indicator for missing values
imp_mean_marked.fit_transform(miles)

array([[ 50.        ,   0.        ],
       [ 62.        ,   0.        ],
       [ 47.42857143,   1.        ],
       [100.        ,   0.        ],
       [ 26.        ,   0.        ],
       [ 13.        ,   0.        ],
       [ 31.        ,   0.        ],
       [ 50.        ,   0.        ]])

In [87]:
df =pd.DataFrame({
    'Name': ['Ryan', 'Nolan', 'Walter', 'Honus', 'Christy', np.nan, 'Nepoleon', 'Tris'],
    'farthest_run_mi': [50, 62, np.nan, 100, 26, 13, 31, 50]
})

In [88]:
df

Unnamed: 0,Name,farthest_run_mi
0,Ryan,50.0
1,Nolan,62.0
2,Walter,
3,Honus,100.0
4,Christy,26.0
5,,13.0
6,Nepoleon,31.0
7,Tris,50.0


In [89]:
# Column Transformation with Multiple Imputation Strategies
from sklearn.compose import make_column_transformer

In [90]:
# Creating a column transformer to apply different imputers to different columns
ct = make_column_transformer(
    (imp_constant_cat, ['Name']),
    (imp_mean, ['farthest_run_mi']),
    remainder = 'drop'
)

In [91]:
ct

In [92]:
# Setting output format to pandas DataFrame
ct.set_output(transform = 'pandas')

In [93]:
# Applying the transformations and converting the result to a DataFrame
df_pandas = ct.fit_transform(df)

In [94]:
df_pandas

Unnamed: 0,simpleimputer-1__Name,simpleimputer-2__farthest_run_mi
0,Ryan,50.0
1,Nolan,62.0
2,Walter,47.428571
3,Honus,100.0
4,Christy,26.0
5,babe,13.0
6,Nepoleon,31.0
7,Tris,50.0
