# Ordinal Data encoding

In [1]:
import pandas as pd 
from sklearn.preprocessing import OrdinalEncoder

In [2]:
df = pd.DataFrame({"size":["xl","xxl","l","m","s","m","l","xl","s","l"], "price":[23,43,12,43,21,41,12,32,42,11]})
df

Unnamed: 0,size,price
0,xl,23
1,xxl,43
2,l,12
3,m,43
4,s,21
5,m,41
6,l,12
7,xl,32
8,s,42
9,l,11


In [3]:
df["size"].info()

<class 'pandas.core.series.Series'>
RangeIndex: 10 entries, 0 to 9
Series name: size
Non-Null Count  Dtype 
--------------  ----- 
10 non-null     object
dtypes: object(1)
memory usage: 208.0+ bytes


Remember: 
👉 OrdinalEncoder needs input shape like (rows, columns) — 2D.

If you pass df['Size'] (1D), it will throw an error or work incorrectly.

If you pass df[['Size']] (2D), it works perfectly.

In [4]:
oe = OrdinalEncoder(categories=[['s', 'm', 'l', 'xl', 'xxl']])
df["en_size"] = oe.fit_transform(df[['size']])

In [5]:
df

Unnamed: 0,size,price,en_size
0,xl,23,3.0
1,xxl,43,4.0
2,l,12,2.0
3,m,43,1.0
4,s,21,0.0
5,m,41,1.0
6,l,12,2.0
7,xl,32,3.0
8,s,42,0.0
9,l,11,2.0


we can also perform ordinal encoding using map function. the advantage of using of map function is we can choose the value of category

In [None]:
order_data = {'s':0, 'm':1, 'l':2, 'xl':3, 'xxl':4} # 
df["en_size_map"] = df["size"].map(order_data)
df

Unnamed: 0,size,price,en_size,en_size_map
0,xl,23,3.0,3
1,xxl,43,4.0,4
2,l,12,2.0,2
3,m,43,1.0,1
4,s,21,0.0,0
5,m,41,1.0,1
6,l,12,2.0,2
7,xl,32,3.0,3
8,s,42,0.0,0
9,l,11,2.0,2
