# One Hot Encoding

### import the required packages

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### load the data

In [2]:
df = pd.DataFrame([
    {"model": "triber", "color": "silver", "price": 10},
    {"model": "XUV700", "color": "black", "price": 30},
    {"model": "fortuner", "color": "white", "price": 60},
    {"model": "X5", "color": "silver", "price": 50},
    {"model": "alto", "color": "white", "price": 5},
    {"model": "nano", "color": "black", "price": 3},
])
df

Unnamed: 0,model,color,price
0,triber,silver,10
1,XUV700,black,30
2,fortuner,white,60
3,X5,silver,50
4,alto,white,5
5,nano,black,3


In [3]:
# import both the encoders
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

# import the column transformer
from sklearn.compose import ColumnTransformer

# create One hot encoder
one_hot_encoder = OneHotEncoder()

# select column(s) for one hot encoding
one_hot_encoder_columns = ["color"]

In [4]:
# create label encoder
label_encoder = LabelEncoder()

# transform the model column to numeric
df['model'] = label_encoder.fit_transform(df['model'])

In [5]:
label_encoder.classes_

array(['X5', 'XUV700', 'alto', 'fortuner', 'nano', 'triber'], dtype=object)

In [6]:
df['color'].unique()

array(['silver', 'black', 'white'], dtype=object)

In [7]:
# create the transformer
transformer = ColumnTransformer(
    [
        ("OneHotEncoder", one_hot_encoder, one_hot_encoder_columns)
    ],
    remainder="passthrough"
)

# transform the columns
transformer.fit_transform(df)

array([[ 0.,  1.,  0.,  5., 10.],
       [ 1.,  0.,  0.,  1., 30.],
       [ 0.,  0.,  1.,  3., 60.],
       [ 0.,  1.,  0.,  0., 50.],
       [ 0.,  0.,  1.,  2.,  5.],
       [ 1.,  0.,  0.,  4.,  3.]])

In [8]:
one_hot_encoder.categories

'auto'

In [9]:
df

Unnamed: 0,model,color,price
0,5,silver,10
1,1,black,30
2,3,white,60
3,0,silver,50
4,2,white,5
5,4,black,3
