In [1]:
import pandas as pd

In [2]:
df = pd.DataFrame({
    "Color":["Black","Blue","Green","Purple","Red"],
    "Size":["Small","Medium","Large","Medium","Small"],
    "City":["Anakapalli","Kakinada","Vizag","Hyderabad","Mumbai"],
    "Target":[45,34,22,26,12]
})
df

Unnamed: 0,Color,Size,City,Target
0,Black,Small,Anakapalli,45
1,Blue,Medium,Kakinada,34
2,Green,Large,Vizag,22
3,Purple,Medium,Hyderabad,26
4,Red,Small,Mumbai,12


## LABEL ENCODING

In [3]:
from sklearn.preprocessing import LabelEncoder

In [4]:
df["Color_Label"] = LabelEncoder().fit_transform(df["Color"])
df[["Color", "Color_Label"]]

Unnamed: 0,Color,Color_Label
0,Black,0
1,Blue,1
2,Green,2
3,Purple,3
4,Red,4


## ONE-HOT ENCODING

In [5]:
one_hot = pd.get_dummies(df["Color"],prefix="Color")
one_hot

Unnamed: 0,Color_Black,Color_Blue,Color_Green,Color_Purple,Color_Red
0,True,False,False,False,False
1,False,True,False,False,False
2,False,False,True,False,False
3,False,False,False,True,False
4,False,False,False,False,True


In [6]:
one_hot = pd.get_dummies(df, columns=["Color"])
print(one_hot)

     Size        City  Target  Color_Label  Color_Black  Color_Blue  \
0   Small  Anakapalli      45            0         True       False   
1  Medium    Kakinada      34            1        False        True   
2   Large       Vizag      22            2        False       False   
3  Medium   Hyderabad      26            3        False       False   
4   Small      Mumbai      12            4        False       False   

   Color_Green  Color_Purple  Color_Red  
0        False         False      False  
1        False         False      False  
2         True         False      False  
3        False          True      False  
4        False         False       True  


In [7]:
from sklearn.preprocessing import OneHotEncoder

## ORDINAL ENCODING

In [8]:
from sklearn.preprocessing import OrdinalEncoder

In [9]:
order = [["Small", "Medium", "Large"]]
df["Size_Ordinal"] = OrdinalEncoder(categories=order).fit_transform(df[["Size"]])
df[["Size","Size_Ordinal"]]

Unnamed: 0,Size,Size_Ordinal
0,Small,0.0
1,Medium,1.0
2,Large,2.0
3,Medium,1.0
4,Small,0.0


## TARGET ENCODING

In [11]:
te = df.groupby("City")["Target"].mean()
df["City_Traget_Enc"] = df["City"].map(te)


In [12]:
df

Unnamed: 0,Color,Size,City,Target,Color_Label,Size_Ordinal,City_Traget_Enc
0,Black,Small,Anakapalli,45,0,0.0,45.0
1,Blue,Medium,Kakinada,34,1,1.0,34.0
2,Green,Large,Vizag,22,2,2.0,22.0
3,Purple,Medium,Hyderabad,26,3,1.0,26.0
4,Red,Small,Mumbai,12,4,0.0,12.0


## FREQUENCY ENCODING

In [14]:
freq = df["City"].value_counts()
df["City_freq_Enc"] = df["City"].map(freq)
df

Unnamed: 0,Color,Size,City,Target,Color_Label,Size_Ordinal,City_Traget_Enc,City_freq_Enc
0,Black,Small,Anakapalli,45,0,0.0,45.0,1
1,Blue,Medium,Kakinada,34,1,1.0,34.0,1
2,Green,Large,Vizag,22,2,2.0,22.0,1
3,Purple,Medium,Hyderabad,26,3,1.0,26.0,1
4,Red,Small,Mumbai,12,4,0.0,12.0,1


## BINARY ENCODING

In [15]:
import category_encoders as ce

In [16]:
data_binary = ce.BinaryEncoder(cols=["City"]).fit_transform(df["City"])

In [17]:
data_binary

Unnamed: 0,City_0,City_1,City_2
0,0,0,1
1,0,1,0
2,0,1,1
3,1,0,0
4,1,0,1


## OUTLIERS