## Ordinal Encoding

""" Similar to label encoding, ordinal encoding assigns numeric labels to categories.
However, with ordinal encoding, the labels are chosen based on the order or rank of the categories. For
example, if you have categories "low," "medium," and "high," you can assign them labels 1, 2, and 3,
respectively. This encoding preserves the ordinal relationship between categories.however since the categories
are string so ordinal encode will first convert the value in alphabatical order instead of it natural order
"""

In [79]:
from sklearn.preprocessing import OrdinalEncoder
import numpy as np
import pandas as pd

In [11]:
data = [
    ['Extra Small'],
    ['Small'],
    ['Medium'],
    ['Large'],
    ['Extra Large']
]

data

[['Extra Small'], ['Small'], ['Medium'], ['Large'], ['Extra Large']]

In [12]:
# Create an instance of OrdinalEncoder
encoder = OrdinalEncoder()

In [13]:
# Fit the encoder to the data
encoder.fit(data)

OrdinalEncoder()

In [14]:
# Transform the data using ordinal encoding
encoded_data = encoder.transform(data)

In [15]:
# Print the encoded data
print(encoded_data)

[[1.]
 [4.]
 [3.]
 [2.]
 [0.]]


In [24]:
# concate  original array and encode array
data_check = np.concatenate((data, encoded_data), axis=1)
data_check

array([['Extra Small', '1.0'],
       ['Small', '4.0'],
       ['Medium', '3.0'],
       ['Large', '2.0'],
       ['Extra Large', '0.0']], dtype='<U32')

In [26]:
# let sort to see how the Ordinal Encode work.

sorted_data = sorted(data_check, key=lambda x: x[1])

# Print the sorted array
for row in sorted_data:
    print(row)

['Extra Large' '0.0']
['Extra Small' '1.0']
['Large' '2.0']
['Medium' '3.0']
['Small' '4.0']


In [37]:
sorted_data = pd.DataFrame(sorted_data)
sorted_data

Unnamed: 0,0,1
0,Extra Large,0.0
1,Extra Small,1.0
2,Large,2.0
3,Medium,3.0
4,Small,4.0


""" because sometimes after we do the encode the order of our ordinal value
is not make any sense. since it order by alphabetical order instead of it's  
natural order, so here how we handle"""

In [71]:
data2 = [
    ['extra small'],
    ['small'],
    ['medium'],
    ['large'],
    ['extra large'],
    ['extra small'],
    ['small'],
    ['medium'],
    ['large']
]

data2 = pd.DataFrame(data2)
data2

Unnamed: 0,0
0,extra small
1,small
2,medium
3,large
4,extra large
5,extra small
6,small
7,medium
8,large


In [72]:
data2 = data2.rename(columns={0: 'size'})
data2

Unnamed: 0,size
0,extra small
1,small
2,medium
3,large
4,extra large
5,extra small
6,small
7,medium
8,large


In [73]:
# fit endcode and transform data2 again
encoder.fit(data2)

OrdinalEncoder()

In [74]:
encoded_data1 = encoder.transform(data2[["size"]])
encoded_data1 

array([[1.],
       [4.],
       [3.],
       [2.],
       [0.],
       [1.],
       [4.],
       [3.],
       [2.]])

In [88]:
data_size = ['extra small','small', 'medium', 'large', 'extra large']

In [89]:
encoded_data1 = OrdinalEncoder(categories = [data_size])

In [92]:
encoded_data1= encoded_data1.fit_transform(data2[['size']])
encoded_data1

array([[0.],
       [1.],
       [2.],
       [3.],
       [4.],
       [0.],
       [1.],
       [2.],
       [3.]])

In [94]:
encoded_data1 = pd.DataFrame(encoded_data1)
encoded_data1

Unnamed: 0,0
0,0.0
1,1.0
2,2.0
3,3.0
4,4.0
5,0.0
6,1.0
7,2.0
8,3.0


In [95]:
data3 = pd.concat([data2, encoded_data1], axis=1)
data3

Unnamed: 0,size,0
0,extra small,0.0
1,small,1.0
2,medium,2.0
3,large,3.0
4,extra large,4.0
5,extra small,0.0
6,small,1.0
7,medium,2.0
8,large,3.0


In [101]:
data3 = data3.sort_values(by=0)
data3

Unnamed: 0,size,0
0,extra small,0.0
5,extra small,0.0
1,small,1.0
6,small,1.0
2,medium,2.0
7,medium,2.0
3,large,3.0
8,large,3.0
4,extra large,4.0


In [102]:
data3 = data3.rename(columns={'size': 'Size',0:'Size Order'})
data3

Unnamed: 0,Size,Size Order
0,extra small,0.0
5,extra small,0.0
1,small,1.0
6,small,1.0
2,medium,2.0
7,medium,2.0
3,large,3.0
8,large,3.0
4,extra large,4.0
