In [31]:
classes = ['ClassA', 'ClassB', 'ClassC', 'ClassD']
instances = ['ClassA', 'ClassB', 'ClassC', 'ClassD','ClassA', 'ClassB', 'ClassC', 'ClassD','ClassA', 'ClassB']

In [32]:
label_to_int = {label: index for index, label in enumerate(classes)}
encoded_labels= [label_to_int[label] for label in instances]
print("Encoded labels:", encoded_labels)

Encoded labels: [0, 1, 2, 3, 0, 1, 2, 3, 0, 1]


In [33]:
int_to_label = {index: label for label, index in label_to_int.items()}
decoded_labels = [int_to_label[index] for index in encoded_labels]

print("Encoded labels: ", encoded_labels)
print("Decoded labels: ", decoded_labels)

Encoded labels:  [0, 1, 2, 3, 0, 1, 2, 3, 0, 1]
Decoded labels:  ['ClassA', 'ClassB', 'ClassC', 'ClassD', 'ClassA', 'ClassB', 'ClassC', 'ClassD', 'ClassA', 'ClassB']


# Sklearn - Label Encoder

In [34]:
from sklearn.preprocessing import LabelEncoder

In [35]:
label_encoder = LabelEncoder()

In [36]:
encoded_labels = label_encoder.fit_transform(instances)
print("Encoded labels:", encoded_labels)

Encoded labels: [0 1 2 3 0 1 2 3 0 1]


In [37]:
original_labels = label_encoder.inverse_transform(encoded_labels)
print("Encoded labels:", encoded_labels)
print("Original labels:", original_labels)

Encoded labels: [0 1 2 3 0 1 2 3 0 1]
Original labels: ['ClassA' 'ClassB' 'ClassC' 'ClassD' 'ClassA' 'ClassB' 'ClassC' 'ClassD'
 'ClassA' 'ClassB']


# One Hot Encoding 

In [38]:
import pandas as pd 

In [39]:
data = { 'Category': ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C' ]}

In [40]:
df = pd.DataFrame(data)

In [41]:
df.head()

Unnamed: 0,Category
0,A
1,B
2,C
3,A
4,B


In [42]:
one_hot_encoded_df = pd.get_dummies(df, columns=['Category'])
one_hot_encoded_df

Unnamed: 0,Category_A,Category_B,Category_C
0,True,False,False
1,False,True,False
2,False,False,True
3,True,False,False
4,False,True,False
5,False,False,True
6,True,False,False
7,False,True,False
8,False,False,True


In [43]:
one_hot_encoded_df = pd.get_dummies(df, columns=['Category'], prefix ='Encoded', drop_first=True)
one_hot_encoded_df

Unnamed: 0,Encoded_B,Encoded_C
0,False,False
1,True,False
2,False,True
3,False,False
4,True,False
5,False,True
6,False,False
7,True,False
8,False,True


# Binary Encoding

In [44]:
!pip install category_encoders




[notice] A new release of pip available: 22.2.2 -> 24.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [45]:
import pandas as pd
import category_encoders as ce

In [46]:
data = { 'Category': ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C' ]}
df = pd.DataFrame(data)

In [47]:
df.head()

Unnamed: 0,Category
0,A
1,B
2,C
3,A
4,B


In [48]:
df.shape

(9, 1)

In [49]:
encoder = ce.BinaryEncoder(cols=['Category'], return_df=True)

In [50]:
df_binary_encoded = encoder.fit_transform(df)
df_binary_encoded

Unnamed: 0,Category_0,Category_1
0,0,1
1,1,0
2,1,1
3,0,1
4,1,0
5,1,1
6,0,1
7,1,0
8,1,1


# Ordinal Encoding 

In [51]:
import pandas as pd
from sklearn.preprocessing import OrdinalEncoder

In [52]:
data = [
    ['good'], ['bad'], ['execellent'], ['average'],
     ['good'], ['average'], ['execellent'], ['bad'],
     ['average'], ['good']
]

In [53]:
data = pd.DataFrame(data=data, columns=['reviews'])
data.head()

Unnamed: 0,reviews
0,good
1,bad
2,execellent
3,average
4,good


In [54]:
data.shape

(10, 1)

In [63]:
categories = [['bad', 'average', 'good', 'execellent']]

In [64]:
categories

[['bad', 'average', 'good', 'execellent']]

In [65]:
encoder = OrdinalEncoder(categories=categories)

In [66]:
encoded_data = encoder.fit_transform(data)
encoded_data

array([[2.],
       [0.],
       [3.],
       [1.],
       [2.],
       [1.],
       [3.],
       [0.],
       [1.],
       [2.]])