In [1]:
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler

# Sample dataset
data = {
    'Age': [25, 30, None, 28, 35],
    'Gender': ['Female', 'Male', 'Male', 'Female', 'Male'],
    'Income': [50000, 60000, 45000, None, 70000]
}

df = pd.DataFrame(data)

# 1. Handle missing values
imputer = SimpleImputer(strategy='mean')
df[['Age', 'Income']] = imputer.fit_transform(df[['Age', 'Income']])
print("📌 After handling missing values:")
print(df)

# 2. Encode gender
encoder = OneHotEncoder()
encoded = encoder.fit_transform(df[['Gender']]).toarray()
encoded_df = pd.DataFrame(encoded, columns=encoder.get_feature_names_out(['Gender']))
print("\n📌 After encoding categorical values:")
print(encoded_df)

# 3. Feature scaling
scaler = StandardScaler()
scaled = scaler.fit_transform(df[['Age', 'Income']])
scaled_df = pd.DataFrame(scaled, columns=['Scaled Age', 'Scaled Income'])
print("\n📌 After feature scaling:")
print(scaled_df)


📌 After handling missing values:
    Age  Gender   Income
0  25.0  Female  50000.0
1  30.0    Male  60000.0
2  29.5    Male  45000.0
3  28.0  Female  56250.0
4  35.0    Male  70000.0

📌 After encoding categorical values:
   Gender_Female  Gender_Male
0            1.0          0.0
1            0.0          1.0
2            0.0          1.0
3            1.0          0.0
4            0.0          1.0

📌 After feature scaling:
   Scaled Age  Scaled Income
0   -1.382164      -0.727778
1    0.153574       0.436667
2    0.000000      -1.310001
3   -0.460721       0.000000
4    1.689312       1.601112
