# One Hot Encoding

In [1]:
from sklearn.preprocessing import OneHotEncoder
import pandas as pd

## Using Pandas

In [2]:
df = pd.DataFrame({
          'A':['a','b','a'],
          'B':['b','a','c']
        })
df

Unnamed: 0,A,B
0,a,b
1,b,a
2,a,c


In [3]:
# Get one hot encoding of columns B
one_hot = pd.get_dummies(df['B'])
# Drop column B as it is now encoded
df = df.drop('B',axis = 1)
# Join the encoded df
df = df.join(one_hot)
df

Unnamed: 0,A,a,b,c
0,a,0,1,0
1,b,1,0,0
2,a,0,0,1


## Using Sklearn - OneHotEncoder class

### Example

In [4]:
enc = OneHotEncoder()
enc.fit([[0, 0, 3], [1, 1, 0], [0, 2, 1], [1, 0, 2]])   

OneHotEncoder()

In [19]:
enc.categories_

[array([0, 1]), array([0, 1, 2]), array([0, 1, 2, 3])]

In [20]:
enc.get_feature_names_out()

array(['x0_0', 'x0_1', 'x1_0', 'x1_1', 'x1_2', 'x2_0', 'x2_1', 'x2_2',
       'x2_3'], dtype=object)

In [21]:
enc.transform([[0, 1, 1]]).toarray()

array([[1., 0., 0., 1., 0., 0., 1., 0., 0.]])

### Example

In [22]:
enc = OneHotEncoder(handle_unknown='ignore')
X = [['Male', 1], ['Female', 3], ['Female', 2]]
enc.fit(X)

OneHotEncoder(handle_unknown='ignore')

In [23]:
print(enc.categories_)

[array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)]


In [24]:
enc.transform([['Female', 1], ['Male', 4]]).toarray()

array([[1., 0., 1., 0., 0.],
       [0., 1., 0., 0., 0.]])

In [27]:
enc.inverse_transform([[0, 1, 1, 0, 0], [0, 0, 0, 1, 0]])

array([['Male', 1],
       [None, 2]], dtype=object)