#### Ordinal encoding is a method for transforming categorical variables that have a natural order or ranking among the categories into numerical values, This technique assigns integers to the categories according to their relative order.

In [18]:
import pandas as pd
import numpy as np

In [19]:
df = pd.read_csv(r"C:\Users\hp\OneDrive\codes\python\Learn_ML\1. Datasets\customer.csv").drop(columns=["age","gender"])

In [20]:
df.head()

Unnamed: 0,review,education,purchased
0,Average,School,No
1,Poor,UG,No
2,Good,PG,No
3,Good,PG,No
4,Average,UG,No


### Ordinal Encoding

In [21]:
from sklearn.preprocessing import OrdinalEncoder
from sklearn.model_selection import train_test_split

In [22]:
X_train,X_test,y_train,y_test = train_test_split(df.iloc[:,0:2],df.iloc[:,-1],test_size=0.2,random_state=42)

In [23]:
X_train

Unnamed: 0,review,education
12,Poor,School
4,Average,UG
37,Average,PG
8,Average,UG
3,Good,PG
6,Good,School
41,Good,PG
46,Poor,PG
47,Good,PG
15,Poor,UG


In [24]:
oe = OrdinalEncoder(categories=[["Poor","Average","Good"],["School","UG","PG"]])

In [25]:
X_train = oe.fit_transform(X_train)
X_test = oe.transform(X_test)

In [26]:
X_train

array([[0., 0.],
       [1., 1.],
       [1., 2.],
       [1., 1.],
       [2., 2.],
       [2., 0.],
       [2., 2.],
       [0., 2.],
       [2., 2.],
       [0., 1.],
       [2., 1.],
       [0., 1.],
       [1., 2.],
       [1., 0.],
       [0., 0.],
       [1., 0.],
       [1., 1.],
       [0., 2.],
       [2., 2.],
       [1., 0.],
       [1., 1.],
       [2., 1.],
       [2., 1.],
       [0., 1.],
       [1., 2.],
       [2., 2.],
       [0., 2.],
       [0., 0.],
       [2., 0.],
       [2., 0.],
       [2., 1.],
       [0., 2.],
       [2., 0.],
       [2., 1.],
       [1., 0.],
       [0., 0.],
       [2., 2.],
       [0., 2.],
       [0., 0.],
       [2., 0.]])

In [27]:
X_test

array([[1., 0.],
       [0., 2.],
       [1., 1.],
       [0., 2.],
       [0., 1.],
       [2., 1.],
       [0., 2.],
       [2., 0.],
       [1., 1.],
       [0., 2.]])

In [28]:
oe.categories_ 
#  to tell about the categories of ordinal encoding

[array(['Poor', 'Average', 'Good'], dtype=object),
 array(['School', 'UG', 'PG'], dtype=object)]

In [29]:
oe.feature_names_in_
# to tell about the features on which encoding has been done

array(['review', 'education'], dtype=object)

In [30]:
oe.get_feature_names_out()

array(['review', 'education'], dtype=object)

In [31]:
oe.inverse_transform(np.array([0,2]).reshape(1,2))
# inverse_transform tells us about the feature 

array([['Poor', 'PG']], dtype=object)

In [32]:
oe1 =  OrdinalEncoder(categories=[["Poor","Average","Good"],["School","UG","PG"]],handle_unknown=True)

### Handling Rare Cases

##### min_frequency method

In [42]:
import numpy as np
df = np.array([["Cat"]*10 +["Dog"]*15+["Horse"]*20+["Rabbit"]*3+["Snake"]*2],dtype=object).T

In [43]:
df

array([['Cat'],
       ['Cat'],
       ['Cat'],
       ['Cat'],
       ['Cat'],
       ['Cat'],
       ['Cat'],
       ['Cat'],
       ['Cat'],
       ['Cat'],
       ['Dog'],
       ['Dog'],
       ['Dog'],
       ['Dog'],
       ['Dog'],
       ['Dog'],
       ['Dog'],
       ['Dog'],
       ['Dog'],
       ['Dog'],
       ['Dog'],
       ['Dog'],
       ['Dog'],
       ['Dog'],
       ['Dog'],
       ['Horse'],
       ['Horse'],
       ['Horse'],
       ['Horse'],
       ['Horse'],
       ['Horse'],
       ['Horse'],
       ['Horse'],
       ['Horse'],
       ['Horse'],
       ['Horse'],
       ['Horse'],
       ['Horse'],
       ['Horse'],
       ['Horse'],
       ['Horse'],
       ['Horse'],
       ['Horse'],
       ['Horse'],
       ['Horse'],
       ['Rabbit'],
       ['Rabbit'],
       ['Rabbit'],
       ['Snake'],
       ['Snake']], dtype=object)

In [48]:
enc = OrdinalEncoder(min_frequency=4).fit(df)

# min_frequency when set to some value then those having less than these frequency is grouped together

In [49]:
enc.transform(np.array([["Cat","Dog","Snake","Rabbit","Horse"]]).reshape(5,1))

array([[0.],
       [1.],
       [3.],
       [3.],
       [2.]])

#### max_categories method

In [54]:
enc = OrdinalEncoder(max_categories=3).fit(df)

# fixes the maximum no categories

In [55]:
enc.transform(np.array([["Cat","Dog","Snake","Rabbit","Horse"]]).reshape(5,1))

array([[2.],
       [0.],
       [2.],
       [2.],
       [1.]])

### Handling missing values

In [67]:
d1 = [["Cat"],[np.nan],["Dog"],[np.nan]]

In [68]:
d1

[['Cat'], [nan], ['Dog'], [nan]]

In [77]:
enc = OrdinalEncoder(encoded_missing_value= np.nan).fit(d1)

In [78]:
enc.transform(d1)

array([[ 0.],
       [nan],
       [ 1.],
       [nan]])