Feature Encoding & Feature Scaling

In [1]:
# FEATURE ENCODING & FEATURE SCALING PRACTICAL
# 1. Import Required Libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.compose import ColumnTransformer

# 2. Create Sample Dataset
data = {
    "Name": ["Amit", "Sita", "Rohit", "Priya", "Kiran"],
    "City": ["Delhi", "Mumbai", "Delhi", "Chennai", "Mumbai"],   # Categorical
    "Experience": [1, 3, 2, 5, 4],                                # Numerical
    "Salary": [30000, 55000, 40000, 80000, 65000]
}

df = pd.DataFrame(data)
print("Original Dataset:\n")
display(df)

#                FEATURE ENCODING
# 3. Label Encoding (for Single Categorical Column)
print("\n--- Label Encoding City Column ---")
label_encoder = LabelEncoder()
df['City_Label'] = label_encoder.fit_transform(df['City'])

display(df)

print("\nMapping of Label Encoding:")
print(dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_))))

# 4. One-Hot Encoding (Dummy Variables)
print("\n--- One Hot Encoding City Column ---")

ohe = OneHotEncoder(sparse_output=False)

city_encoded = ohe.fit_transform(df[['City']])
city_encoded_df = pd.DataFrame(city_encoded, columns=ohe.get_feature_names_out(['City']))

df_ohe = pd.concat([df, city_encoded_df], axis=1)
display(df_ohe)

#                FEATURE SCALING
# 5. StandardScaler (Mean=0, Std=1)
print("\n--- Standard Scaling (Experience & Salary) ---")

scaler_standard = StandardScaler()
scaled_standard = scaler_standard.fit_transform(df[['Experience', 'Salary']])

df_standard = pd.DataFrame(scaled_standard, columns=['Experience_Std', 'Salary_Std'])
df_scaled1 = pd.concat([df, df_standard], axis=1)
display(df_scaled1)

# 6. MinMaxScaler (Range 0 to 1)
print("\n--- MinMax Scaling (Experience & Salary) ---")

scaler_minmax = MinMaxScaler()
scaled_minmax = scaler_minmax.fit_transform(df[['Experience', 'Salary']])

df_minmax = pd.DataFrame(scaled_minmax, columns=['Experience_MinMax', 'Salary_MinMax'])
df_scaled2 = pd.concat([df, df_minmax], axis=1)
display(df_scaled2)

# 7. Final Output
print("\nPractical Completed Successfully!")


Original Dataset:



Unnamed: 0,Name,City,Experience,Salary
0,Amit,Delhi,1,30000
1,Sita,Mumbai,3,55000
2,Rohit,Delhi,2,40000
3,Priya,Chennai,5,80000
4,Kiran,Mumbai,4,65000



--- Label Encoding City Column ---


Unnamed: 0,Name,City,Experience,Salary,City_Label
0,Amit,Delhi,1,30000,1
1,Sita,Mumbai,3,55000,2
2,Rohit,Delhi,2,40000,1
3,Priya,Chennai,5,80000,0
4,Kiran,Mumbai,4,65000,2



Mapping of Label Encoding:
{'Chennai': np.int64(0), 'Delhi': np.int64(1), 'Mumbai': np.int64(2)}

--- One Hot Encoding City Column ---


Unnamed: 0,Name,City,Experience,Salary,City_Label,City_Chennai,City_Delhi,City_Mumbai
0,Amit,Delhi,1,30000,1,0.0,1.0,0.0
1,Sita,Mumbai,3,55000,2,0.0,0.0,1.0
2,Rohit,Delhi,2,40000,1,0.0,1.0,0.0
3,Priya,Chennai,5,80000,0,1.0,0.0,0.0
4,Kiran,Mumbai,4,65000,2,0.0,0.0,1.0



--- Standard Scaling (Experience & Salary) ---


Unnamed: 0,Name,City,Experience,Salary,City_Label,Experience_Std,Salary_Std
0,Amit,Delhi,1,30000,1,-1.414214,-1.354398
1,Sita,Mumbai,3,55000,2,0.0,0.056433
2,Rohit,Delhi,2,40000,1,-0.707107,-0.790066
3,Priya,Chennai,5,80000,0,1.414214,1.467265
4,Kiran,Mumbai,4,65000,2,0.707107,0.620766



--- MinMax Scaling (Experience & Salary) ---


Unnamed: 0,Name,City,Experience,Salary,City_Label,Experience_MinMax,Salary_MinMax
0,Amit,Delhi,1,30000,1,0.0,0.0
1,Sita,Mumbai,3,55000,2,0.5,0.5
2,Rohit,Delhi,2,40000,1,0.25,0.2
3,Priya,Chennai,5,80000,0,1.0,1.0
4,Kiran,Mumbai,4,65000,2,0.75,0.7



Practical Completed Successfully!
