# Handling categorical Features:

![](images/Types.png)

In [23]:
import pandas as pd

In [24]:
df = pd.read_csv('Datasets/data_for_encoding.csv')

In [25]:
df.sample(5)

Unnamed: 0,income level,satisfaction rating,purchased
94,50K-100K,like,no
180,less than 50K,extremely like,yes
187,50K-100K,extremely like,no
173,over 100K,neutral,yes
201,less than 50K,like,yes


## When to use:

![](images/chart.png)

## 1. OrdinalEncoder

`Ordinal encoding` consists of converting categorical data into numeric data by assigning a unique integer to each category.

![](images/ordinal.png)

In [26]:
from sklearn.preprocessing import OrdinalEncoder

In [27]:
from sklearn.model_selection import train_test_split

x = df.drop('purchased',axis=1)
y = df['purchased']
X_train,X_test,y_train,y_test = train_test_split(x,y,test_size=0.3)

X_train.shape , X_test.shape

((141, 2), (61, 2))

In [28]:
X_train

Unnamed: 0,income level,satisfaction rating
33,less than 50K,extremely dislike
197,over 100K,neutral
121,50K-100K,dislike
80,over 100K,extremely dislike
97,50K-100K,neutral
...,...,...
184,50K-100K,dislike
67,50K-100K,neutral
83,over 100K,neutral
157,50K-100K,dislike


In [29]:
print(df['income level'].unique())
print(df['satisfaction rating'].unique())

['less than 50K' '50K-100K' 'over 100K']
['neutral' 'like' 'extremely dislike' 'extremely like' 'dislike']


In [30]:
oe = OrdinalEncoder(categories=[['less than 50K', '50K-100K', 'over 100K'],['extremely dislike','dislike','neutral','like','extremely like']])

In [31]:
oe.fit(X_train)

In [32]:
X_train_oe = oe.transform(X_train)
X_test_oe = oe.transform(X_test)

In [33]:
X_train_oe

array([[0., 0.],
       [2., 2.],
       [1., 1.],
       [2., 0.],
       [1., 2.],
       [2., 4.],
       [0., 2.],
       [2., 2.],
       [1., 3.],
       [1., 0.],
       [2., 2.],
       [2., 3.],
       [1., 4.],
       [1., 3.],
       [1., 2.],
       [0., 3.],
       [0., 2.],
       [0., 4.],
       [0., 3.],
       [1., 2.],
       [0., 2.],
       [2., 1.],
       [0., 1.],
       [0., 0.],
       [0., 3.],
       [0., 3.],
       [1., 0.],
       [2., 3.],
       [2., 2.],
       [2., 3.],
       [2., 3.],
       [2., 1.],
       [2., 0.],
       [0., 0.],
       [0., 2.],
       [2., 1.],
       [0., 4.],
       [2., 2.],
       [1., 1.],
       [1., 2.],
       [1., 4.],
       [2., 1.],
       [1., 1.],
       [0., 2.],
       [0., 2.],
       [1., 4.],
       [0., 3.],
       [0., 1.],
       [1., 1.],
       [2., 1.],
       [2., 4.],
       [0., 2.],
       [0., 3.],
       [1., 1.],
       [0., 1.],
       [0., 1.],
       [0., 4.],
       [2., 2.],
       [2., 1.

In [37]:
X_train_oe = pd.DataFrame(X_train_oe, columns=X_train.columns,dtype=int)
X_test_oe = pd.DataFrame(X_test_oe, columns=X_test.columns,dtype=int)

In [40]:
X_train

Unnamed: 0,income level,satisfaction rating
33,less than 50K,extremely dislike
197,over 100K,neutral
121,50K-100K,dislike
80,over 100K,extremely dislike
97,50K-100K,neutral
...,...,...
184,50K-100K,dislike
67,50K-100K,neutral
83,over 100K,neutral
157,50K-100K,dislike


In [39]:
X_train_oe

Unnamed: 0,income level,satisfaction rating
0,0,0
1,2,2
2,1,1
3,2,0
4,1,2
...,...,...
136,1,1
137,1,2
138,2,2
139,1,1


## 2. LabelEncoder:

Works same as the ordinal encoding but it is applied on output feature..

In [41]:
from sklearn.preprocessing import LabelEncoder

In [42]:
le = LabelEncoder()

In [43]:
le.fit(y_train)

In [44]:
le.classes_

array(['no', 'yes'], dtype=object)

In [45]:
y_train = le.transform(y_train)
y_test = le.transform(y_test)

In [46]:
y_train

array([0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1,
       1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0,
       1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0,
       1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1,
       0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0,
       1, 1, 0, 1, 1, 1, 1, 1, 1])

## Ordinal Encoding VS Label Encoding:

![](images/diff.png)