## Import libraries

In [1]:
import pandas as pd
import numpy as np

In [2]:
#load dataset
df = pd.read_csv("data/customer.csv")
df.head()

Unnamed: 0,age,gender,review,education,purchased
0,30,Female,Average,School,No
1,68,Female,Poor,UG,No
2,70,Female,Good,PG,No
3,72,Female,Good,PG,No
4,16,Female,Average,UG,No


In [3]:
#taking only review, education and purchased column
df = df.iloc[:, 2: ]
df.head()

Unnamed: 0,review,education,purchased
0,Average,School,No
1,Poor,UG,No
2,Good,PG,No
3,Good,PG,No
4,Average,UG,No


In [4]:
print(df.education.unique())
print(df.review.unique())

['School' 'UG' 'PG']
['Average' 'Poor' 'Good']


## Spliting data into train and test set

In [5]:
X = df.drop("purchased", axis=1)
y = df["purchased"]

In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

X_train.shape, X_test.shape

((40, 2), (10, 2))

## Ordinal Encoding

In [7]:
from sklearn.preprocessing import OrdinalEncoder

oe = OrdinalEncoder(categories=[['Poor','Average','Good'],['School','UG','PG']])

oe.fit(X_train)

OrdinalEncoder(categories=[['Poor', 'Average', 'Good'], ['School', 'UG', 'PG']])

In [8]:
X_train_encoded = oe.transform(X_train)
X_test_encoded = oe.transform(X_test)

In [9]:
oe.categories_

[array(['Poor', 'Average', 'Good'], dtype=object),
 array(['School', 'UG', 'PG'], dtype=object)]

In [10]:
X_train_encoded = pd.DataFrame(X_train_encoded, columns=X_train.columns)
X_test_encoded = pd.DataFrame(X_test_encoded, columns=X_test.columns)

In [11]:
X_train_encoded

Unnamed: 0,review,education
0,1.0,2.0
1,2.0,1.0
2,0.0,1.0
3,0.0,0.0
4,0.0,2.0
5,2.0,2.0
6,0.0,1.0
7,2.0,2.0
8,2.0,0.0
9,0.0,2.0


In [12]:
X_test_encoded

Unnamed: 0,review,education
0,2.0,1.0
1,2.0,2.0
2,0.0,0.0
3,2.0,1.0
4,1.0,0.0
5,1.0,0.0
6,1.0,1.0
7,0.0,2.0
8,0.0,2.0
9,2.0,0.0


## Encoding target data (Label encoder)

In [13]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()

le.fit(y_train)

LabelEncoder()

In [14]:
le.classes_

array(['No', 'Yes'], dtype=object)

In [15]:
y_train = le.transform(y_train)
y_test = le.transform(y_test)

In [16]:
y_train

array([1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0,
       0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0])

In [17]:
y_test

array([1, 1, 0, 1, 0, 0, 0, 0, 0, 0])

### Note:
* Ordinal encoding - to encode ordinal value in input data
* Label encoding - to encode target data