In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, MinMaxScaler, StandardScaler
data={
    'Category': ['A', 'B', 'A', 'C', 'B', 'A', np.nan, 'C', 'B', 'C'],
    'Value1': [1, 2, np.nan, 4, 5, 6, 7, np.nan, 9, 10],
    'Value2': [20.5, 30.5, 40.5, 50.5, np.nan, 70.5, 80.5, 90.5, 100.5, np.nan]
  }
df=pd.DataFrame(data)
df

Unnamed: 0,Category,Value1,Value2
0,A,1.0,20.5
1,B,2.0,30.5
2,A,,40.5
3,C,4.0,50.5
4,B,5.0,
5,A,6.0,70.5
6,,7.0,80.5
7,C,,90.5
8,B,9.0,100.5
9,C,10.0,


In [None]:
df_imputed = df.copy()
df_imputed['Category'].fillna(df_imputed['Category'].mode()[0], inplace=True)
df_imputed['Value1'].fillna(df_imputed['Value1'].mean(), inplace=True)
df_imputed['Value2'].fillna(df_imputed['Value2'].mean(), inplace=True)
df_imputed

Unnamed: 0,Category,Value1,Value2
0,A,1.0,20.5
1,B,2.0,30.5
2,A,5.5,40.5
3,C,4.0,50.5
4,B,5.0,60.5
5,A,6.0,70.5
6,A,7.0,80.5
7,C,5.5,90.5
8,B,9.0,100.5
9,C,10.0,60.5


In [None]:
df_ffill=df.copy()
df_ffill.fillna(method='ffill')
df_ffill

Unnamed: 0,Category,Value1,Value2
0,A,1.0,20.5
1,B,2.0,30.5
2,A,,40.5
3,C,4.0,50.5
4,B,5.0,
5,A,6.0,70.5
6,,7.0,80.5
7,C,,90.5
8,B,9.0,100.5
9,C,10.0,


In [None]:
df_bfill=df.copy()
df_bfill.fillna(method='bfill')
df_bfill

Unnamed: 0,Category,Value1,Value2
0,A,1.0,20.5
1,B,2.0,30.5
2,A,,40.5
3,C,4.0,50.5
4,B,5.0,
5,A,6.0,70.5
6,,7.0,80.5
7,C,,90.5
8,B,9.0,100.5
9,C,10.0,


In [None]:
#One-Hot Encoder
onehot_encoder=OneHotEncoder(sparse=False)
category_encoded=onehot_encoder.fit_transform(df_imputed[['Category']])
category_encoded_df=pd.DataFrame(category_encoded, columns=onehot_encoder.get_feature_names_out(['Category']))
df_onehot_encoded=pd.concat([df_imputed.drop('Category', axis=1), category_encoded_df], axis=1)
df_onehot_encoded



Unnamed: 0,Value1,Value2,Category_A,Category_B,Category_C
0,1.0,20.5,1.0,0.0,0.0
1,2.0,30.5,0.0,1.0,0.0
2,5.5,40.5,1.0,0.0,0.0
3,4.0,50.5,0.0,0.0,1.0
4,5.0,60.5,0.0,1.0,0.0
5,6.0,70.5,1.0,0.0,0.0
6,7.0,80.5,1.0,0.0,0.0
7,5.5,90.5,0.0,0.0,1.0
8,9.0,100.5,0.0,1.0,0.0
9,10.0,60.5,0.0,0.0,1.0


In [None]:
#Label Encoder
label_encoder = LabelEncoder()
df_label_encoded = df_imputed.copy()
df_label_encoded['Category'] = label_encoder.fit_transform(df_label_encoded['Category'])
df_label_encoded

Unnamed: 0,Category,Value1,Value2
0,0,1.0,20.5
1,1,2.0,30.5
2,0,5.5,40.5
3,2,4.0,50.5
4,1,5.0,60.5
5,0,6.0,70.5
6,0,7.0,80.5
7,2,5.5,90.5
8,1,9.0,100.5
9,2,10.0,60.5


In [None]:
# Min-Max scaling
minmax_scaler = MinMaxScaler()
df_minmax_scaled = df_label_encoded.copy()
df_minmax_scaled[['Value1', 'Value2']] = minmax_scaler.fit_transform(df_minmax_scaled[['Value1', 'Value2']])
df_minmax_scaled

Unnamed: 0,Category,Value1,Value2
0,0,0.0,0.0
1,1,0.111111,0.125
2,0,0.5,0.25
3,2,0.333333,0.375
4,1,0.444444,0.5
5,0,0.555556,0.625
6,0,0.666667,0.75
7,2,0.5,0.875
8,1,0.888889,1.0
9,2,1.0,0.5


In [None]:
# Standardization
standard_scaler = StandardScaler()
df_standard_scaled = df_label_encoded.copy()
df_standard_scaled[['Value1', 'Value2']] = standard_scaler.fit_transform(df_standard_scaled[['Value1', 'Value2']])
df_standard_scaled

Unnamed: 0,Category,Value1,Value2
0,0,-1.70084,-1.632993
1,1,-1.322876,-1.224745
2,0,0.0,-0.816497
3,2,-0.566947,-0.408248
4,1,-0.188982,0.0
5,0,0.188982,0.408248
6,0,0.566947,0.816497
7,2,0.0,1.224745
8,1,1.322876,1.632993
9,2,1.70084,0.0
