#### Import libraries and packages

In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv('/content/customer.csv')
df.sample(5)

Unnamed: 0,age,gender,review,education,purchased
35,74,Male,Poor,School,Yes
1,68,Female,Poor,UG,No
24,16,Female,Average,PG,Yes
25,57,Female,Good,School,No
20,57,Female,Average,School,Yes


In [3]:
#select specific columns
df = df.iloc[:,2:]
df.head()

Unnamed: 0,review,education,purchased
0,Average,School,No
1,Poor,UG,No
2,Good,PG,No
3,Good,PG,No
4,Average,UG,No


#### Ordinal Encoder from Scikit Learn

In [4]:
from sklearn.preprocessing import OrdinalEncoder
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df.drop('purchased', axis=1),
                                                    df['purchased'],
                                                    test_size=0.3,
                                                    random_state=40)

In [5]:
X_train.head()

Unnamed: 0,review,education
41,Good,PG
23,Good,School
36,Good,UG
5,Average,School
13,Average,School


#### Create object for Ordinal Encoder for review and eduction features

In [6]:
# As a parameter to OrdinalEncoder we pass list of features. The order of items in the list decides the order of the category.
# If we do not give an order then it is decided randomly. The position of the lists should be as per their index in the dataframe.
oe = OrdinalEncoder(categories=[['Poor','Average','Good'],['School','UG','PG']])

In [7]:
oe.fit(X_train)
X_train = oe.transform(X_train)
X_test = oe.transform(X_test)

In [8]:
X_train

array([[2., 2.],
       [2., 0.],
       [2., 1.],
       [1., 0.],
       [1., 0.],
       [0., 2.],
       [0., 1.],
       [0., 2.],
       [1., 2.],
       [2., 2.],
       [0., 2.],
       [2., 0.],
       [0., 2.],
       [1., 0.],
       [1., 0.],
       [0., 0.],
       [0., 2.],
       [0., 1.],
       [1., 1.],
       [1., 1.],
       [0., 2.],
       [1., 1.],
       [2., 1.],
       [2., 1.],
       [2., 2.],
       [2., 1.],
       [0., 0.],
       [0., 2.],
       [2., 2.],
       [0., 0.],
       [0., 1.],
       [1., 2.],
       [0., 0.],
       [0., 2.],
       [2., 0.]])

In [9]:
oe.categories_

[array(['Poor', 'Average', 'Good'], dtype=object),
 array(['School', 'UG', 'PG'], dtype=object)]

#### Create object for Label Encoder for target feature only

In [10]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder() #It decides the order or label on its own, here we cannot decide like ordinal encoder
le.fit(y_train)
le.classes_

array(['No', 'Yes'], dtype=object)

In [11]:
y_train = le.transform(y_train)
y_test = le.transform(y_test)

In [12]:
y_train

array([1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0])