### Importing the module

In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import LabelEncoder

### Load the Data

In [2]:
df = pd.read_csv("/content/customer.csv")

In [3]:
df.head(10)

Unnamed: 0,age,gender,review,education,purchased
0,30,Female,Average,School,No
1,68,Female,Poor,UG,No
2,70,Female,Good,PG,No
3,72,Female,Good,PG,No
4,16,Female,Average,UG,No
5,31,Female,Average,School,Yes
6,18,Male,Good,School,No
7,60,Female,Poor,School,Yes
8,65,Female,Average,UG,No
9,74,Male,Good,UG,Yes


In [4]:
df = df.iloc[:,2:]

In [5]:
df.head()

Unnamed: 0,review,education,purchased
0,Average,School,No
1,Poor,UG,No
2,Good,PG,No
3,Good,PG,No
4,Average,UG,No


### Separate independent and dependent features

In [6]:
X = df.drop("purchased",axis=1)
y = df["purchased"]

### Train , Test , Split

In [7]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=2)

In [8]:
X_train.head()

Unnamed: 0,review,education
24,Average,PG
48,Good,UG
17,Poor,UG
12,Poor,School
27,Poor,PG


In [9]:
X_test.head()

Unnamed: 0,review,education
36,Good,UG
47,Good,PG
28,Poor,School
9,Good,UG
13,Average,School


### Ordinal Encoding

In [10]:
oe = OrdinalEncoder(categories=[["Poor","Average","Good"],["School","UG","PG"]])

In [11]:
X_train = oe.fit_transform(X_train)
X_test = oe.transform(X_test)

In [12]:
X_train[0:2]

array([[1., 2.],
       [2., 1.]])

In [13]:
oe.categories_

[array(['Poor', 'Average', 'Good'], dtype=object),
 array(['School', 'UG', 'PG'], dtype=object)]

### Label Encoder

In [14]:
le = LabelEncoder()

In [15]:
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)

In [16]:
le.classes_

array(['No', 'Yes'], dtype=object)

In [17]:
y_train

array([1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0,
       0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0])

In [18]:
y_test

array([1, 1, 0, 1, 0, 0, 0, 0, 0, 0])