In [1]:
# ONE HOT ENCODING, LABEL ENCODING, FREQUENCY ENCODING, ORDINAL ENCODING, MEAN ENCODING     #map, reduce, filter, lambda, apply imp functions, list comprehension
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
import numpy as np

In [2]:
# sample data for encoding
data = {'Color':['Red','Green','Blue','Green','Blue','Red'],
        'Size':['S','M','L','M','L','S'],
        'Category':['A','B','A','A','B','C']}

# creating dataframe
df = pd.DataFrame(data)
df

Unnamed: 0,Color,Size,Category
0,Red,S,A
1,Green,M,B
2,Blue,L,A
3,Green,M,A
4,Blue,L,B
5,Red,S,C


In [3]:
# 1. One Hot Encoding
one_hot = pd.get_dummies(df['Color'],dtype='int')
df_one_hot = pd.concat([df, one_hot], axis=1)
print("\nAfter One Hot Encoding: ")
df_one_hot


After One Hot Encoding: 


Unnamed: 0,Color,Size,Category,Blue,Green,Red
0,Red,S,A,0,0,1
1,Green,M,B,0,1,0
2,Blue,L,A,1,0,0
3,Green,M,A,0,1,0
4,Blue,L,B,1,0,0
5,Red,S,C,0,0,1


In [4]:
# 2. Label Encoding
label_encoder = LabelEncoder()
df['Color_Label'] = label_encoder.fit_transform(df['Color'])
df['Size_Label'] = label_encoder.fit_transform(df['Size'])
df['Category_Label'] = label_encoder.fit_transform(df['Category'])
print("\nAfter Label Encoding: ")
df[['Color','Color_Label','Size','Size_Label','Category','Category_Label']]


After Label Encoding: 


Unnamed: 0,Color,Color_Label,Size,Size_Label,Category,Category_Label
0,Red,2,S,2,A,0
1,Green,1,M,1,B,1
2,Blue,0,L,0,A,0
3,Green,1,M,1,A,0
4,Blue,0,L,0,B,1
5,Red,2,S,2,C,2


In [5]:
# 3. Frequency Encoding
freq_encoding = df['Category'].value_counts().to_dict()
df['Category_Frequency'] = df['Category'].map(freq_encoding)
print("\nAfter Frequency Encoding: ")
df[['Category','Category_Frequency']]


After Frequency Encoding: 


Unnamed: 0,Category,Category_Frequency
0,A,3
1,B,2
2,A,3
3,A,3
4,B,2
5,C,1


In [6]:
# 4. Ordinal Encoding
# ordinal mapping based on predefined order
ordinal_mapping = {'S':1, 'M':2, 'L':3}
df['Size_Ordinal'] = df['Size'].map(ordinal_mapping)
print("\nAfter Ordinal Encoding: ")
df[['Size','Size_Ordinal']]


After Ordinal Encoding: 


Unnamed: 0,Size,Size_Ordinal
0,S,1
1,M,2
2,L,3
3,M,2
4,L,3
5,S,1


In [7]:
# 5. Mean Encoding
mean_encoding = df.groupby('Category')['Size'].apply(lambda x: x.map({'S':1,'M':2,'L':3}).mean()).to_dict()
df['Category_Mean'] = df['Category'].map(mean_encoding)
print("\nAfter Mean Encoding: ")
df[['Category','Category_Mean']]


After Mean Encoding: 


Unnamed: 0,Category,Category_Mean
0,A,2.0
1,B,2.5
2,A,2.0
3,A,2.0
4,B,2.5
5,C,1.0
