## Ordinal Encoding

It is the type of encoding technique, used when the variables in the data are ordinal, it converts each label into integer value

In [1]:
#@ Importing the libraries

import pandas as pd
import numpy as np

In [2]:
#@ Loading the datasets

data = pd.read_csv('./dataset/customer_data.csv')
data.head()

Unnamed: 0,age,gender,review,education,purchased
0,30,Female,Average,School,No
1,68,Female,Poor,UG,No
2,70,Female,Good,PG,No
3,72,Female,Good,PG,No
4,16,Female,Average,UG,No


In [3]:
# Here, we will skip 'age % gender' column and encode the remaining columns

data = data.iloc[:, 2:]
data.head()

Unnamed: 0,review,education,purchased
0,Average,School,No
1,Poor,UG,No
2,Good,PG,No
3,Good,PG,No
4,Average,UG,No


In [4]:
#@ first we will do train test split

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data.iloc[:,0:2], 
                                                    data.iloc[:,-1], 
                                                    test_size = 0.2)

In [5]:
X_train

Unnamed: 0,review,education
45,Poor,PG
22,Poor,PG
43,Poor,PG
19,Poor,PG
18,Good,School
24,Average,PG
25,Good,School
26,Poor,PG
5,Average,School
0,Average,School


In [6]:
#@ Importing ordinal encoder

from sklearn.preprocessing import OrdinalEncoder

In [7]:
# Creating object in ordinal encoder, there we will pass the parameter called 'categories' which is going to be a list and 
# inside there we again pass the list and include the categorical data which we want to transform

oe = OrdinalEncoder(categories = [['Poor', 'Average', 'Good'], ['School', 'UG', 'PG']])

In [8]:
#@ Fitting the data
# Note: we fit only train data but transform both train and test data
oe.fit(X_train)

OrdinalEncoder(categories=[['Poor', 'Average', 'Good'], ['School', 'UG', 'PG']])

In [9]:
#@ Now, we transform the data

X_train = oe.transform(X_train)
X_train

array([[0., 2.],
       [0., 2.],
       [0., 2.],
       [0., 2.],
       [2., 0.],
       [1., 2.],
       [2., 0.],
       [0., 2.],
       [1., 0.],
       [1., 0.],
       [0., 2.],
       [0., 2.],
       [2., 2.],
       [2., 1.],
       [2., 1.],
       [2., 0.],
       [1., 1.],
       [2., 1.],
       [2., 0.],
       [0., 2.],
       [1., 1.],
       [1., 1.],
       [2., 2.],
       [0., 1.],
       [0., 0.],
       [1., 1.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [2., 1.],
       [1., 1.],
       [2., 1.],
       [1., 0.],
       [1., 1.],
       [0., 1.],
       [2., 2.],
       [1., 2.],
       [1., 0.],
       [1., 2.]])

In [10]:
X_test = oe.transform(X_test)
X_test

array([[1., 0.],
       [2., 2.],
       [0., 1.],
       [0., 2.],
       [2., 2.],
       [2., 0.],
       [0., 1.],
       [2., 0.],
       [2., 1.],
       [2., 2.]])

In [11]:
oe.categories_

[array(['Poor', 'Average', 'Good'], dtype=object),
 array(['School', 'UG', 'PG'], dtype=object)]

### Now, we will encode the output column that is 'purchased' using LabelEncoder

In [12]:
from sklearn.preprocessing import LabelEncoder

In [13]:
le = LabelEncoder()           # creating the object
le.fit(y_train)

LabelEncoder()

In [14]:
le.classes_

array(['No', 'Yes'], dtype=object)

In [15]:
y_train = le.transform(y_train)
y_test = le.transform(y_test)

In [16]:
y_train

array([1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0,
       0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0])