In [1]:
import pandas as pd

In [2]:
df = pd.DataFrame({
    "Color": ["Red", "Green", "Blue", "Green", "Red"],
    "Size": ["Small", "Medium", "Large", "Medium", "Small"],
    "City": ["Hyderbad", "Delhi", "Mumbai", "Delhi", "Chennai"],
    "Target": [10,20,15,25,12]
})
df

Unnamed: 0,Color,Size,City,Target
0,Red,Small,Hyderbad,10
1,Green,Medium,Delhi,20
2,Blue,Large,Mumbai,15
3,Green,Medium,Delhi,25
4,Red,Small,Chennai,12


# Label Encoding

In [3]:
from sklearn.preprocessing import LabelEncoder

In [4]:
df["Color_Label"] = LabelEncoder().fit_transform(df["Color"])
df[["Color", "Color_Label"]]

Unnamed: 0,Color,Color_Label
0,Red,2
1,Green,1
2,Blue,0
3,Green,1
4,Red,2


# One-Hot Encoding

In [5]:
one_hot = pd.get_dummies(df["Color"], prefix="Color")
one_hot

Unnamed: 0,Color_Blue,Color_Green,Color_Red
0,False,False,True
1,False,True,False
2,True,False,False
3,False,True,False
4,False,False,True


In [6]:
one_hot = pd.get_dummies(df, columns=["Color"])
print(one_hot)

     Size      City  Target  Color_Label  Color_Blue  Color_Green  Color_Red
0   Small  Hyderbad      10            2       False        False       True
1  Medium     Delhi      20            1       False         True      False
2   Large    Mumbai      15            0        True        False      False
3  Medium     Delhi      25            1       False         True      False
4   Small   Chennai      12            2       False        False       True


In [7]:
from sklearn.preprocessing import OneHotEncoder

In [8]:
encoder = OneHotEncoder(sparse_output=False)
encoded = encoder.fit_transform(df[["City"]])
encoded_df = pd.DataFrame(encoded,columns=encoder.get_feature_names_out(["City"]))
encoded_df

Unnamed: 0,City_Chennai,City_Delhi,City_Hyderbad,City_Mumbai
0,0.0,0.0,1.0,0.0
1,0.0,1.0,0.0,0.0
2,0.0,0.0,0.0,1.0
3,0.0,1.0,0.0,0.0
4,1.0,0.0,0.0,0.0


# Ordinal Encoding

In [9]:
from sklearn.preprocessing import OrdinalEncoder

In [10]:
order = [["Small", "Medium", "Large"]]
df["Size_Ordinal"] = OrdinalEncoder(categories=order).fit_transform(df[["Size"]])
df[["Size","Size_Ordinal"]]

Unnamed: 0,Size,Size_Ordinal
0,Small,0.0
1,Medium,1.0
2,Large,2.0
3,Medium,1.0
4,Small,0.0


# Targetting Encoding

In [11]:
te = df.groupby("City")["Target"].mean()
df["City_Target_Enc"] = df["City"].map(te)
df[["City", "City_Target_Enc"]]

Unnamed: 0,City,City_Target_Enc
0,Hyderbad,10.0
1,Delhi,22.5
2,Mumbai,15.0
3,Delhi,22.5
4,Chennai,12.0


# Frequency Encoding

In [12]:
freq = df["City"].value_counts()
df["City_Target_Enc"] = df["City"].map(freq)
df[["City", "City_Target_Enc"]]

Unnamed: 0,City,City_Target_Enc
0,Hyderbad,1
1,Delhi,2
2,Mumbai,1
3,Delhi,2
4,Chennai,1


# Binary Encoding

In [13]:
import category_encoders as ce

In [14]:
data_binary = ce.BinaryEncoder(cols=["City"]).fit_transform(df["City"])

In [15]:
data_binary

Unnamed: 0,City_0,City_1,City_2
0,0,0,1
1,0,1,0
2,0,1,1
3,0,1,0
4,1,0,0
