# 5.3.8 OrdinalEncoder

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OrdinalEncoder

In [3]:
X = [
    ["male", "from US", "uses Safari"],
    ["female", "from Europe", "uses Firefox"],
    ["non_binary", "from America", "uses Chrome"],
]

df = pd.DataFrame(X, columns=["sex", "from", "uses"])
df

Unnamed: 0,sex,from,uses
0,male,from US,uses Safari
1,female,from Europe,uses Firefox
2,non_binary,from America,uses Chrome


In [4]:
ordinalEncoder = OrdinalEncoder(
    # -------------------------------------------------------------------------
    # Categories (unique values) per feature:
    # - ‘auto’ : Determine categories automatically from the training data.
    # - list : categories[i] holds the categories expected in the ith column.
    categories="auto",
    # -------------------------------------------------------------------------
    # Desired dtype of output.
    dtype=np.float64,
    # -------------------------------------------------------------------------
    # When set to ‘error’ an error will be raised in case an unknown
    # categorical feature is present during transform.
    # handle_unknown="error",
    # -------------------------------------------------------------------------
    # When the parameter handle_unknown is set to ‘use_encoded_value’, this
    # parameter is required and will set the encoded value of unknown
    # categories.
    # unknown_value=None,
)

ordinalEncoder.fit(X)
encoder = ordinalEncoder.transform(X)
encoder

array([[1., 2., 2.],
       [0., 1., 1.],
       [2., 0., 0.]])

In [5]:
ordinalEncoder.categories_

[array(['female', 'male', 'non_binary'], dtype=object),
 array(['from America', 'from Europe', 'from US'], dtype=object),
 array(['uses Chrome', 'uses Firefox', 'uses Safari'], dtype=object)]

In [6]:
original_data = ordinalEncoder.inverse_transform(encoder)
original_data

array([['male', 'from US', 'uses Safari'],
       ['female', 'from Europe', 'uses Firefox'],
       ['non_binary', 'from America', 'uses Chrome']], dtype=object)

In [7]:
print('ok_')

ok_
