In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.DataFrame(['one', 'two','one', 'two','three', 'three', 'three'], columns=['class'])
df

Unnamed: 0,class
0,one
1,two
2,one
3,two
4,three
5,three
6,three


## with pandas get_dummies

In [3]:
pandas_df= pd.get_dummies(df['class'])
pandas_df

Unnamed: 0,one,three,two
0,1,0,0
1,0,0,1
2,1,0,0
3,0,0,1
4,0,1,0
5,0,1,0
6,0,1,0


In [4]:
#sometimes it needs to be converted to an array
pandas_df.to_numpy()

array([[1, 0, 0],
       [0, 0, 1],
       [1, 0, 0],
       [0, 0, 1],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0]], dtype=uint8)

## with Scikit-Learn

In [5]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

In [6]:
onehot_encoder = OneHotEncoder()

### binary encoding I: without previous integer encoding

In [7]:
enc=df['class'].values.reshape(-1,1)
onehot_encoded_1 = onehot_encoder.fit_transform(enc).toarray()
onehot_encoded_1

array([[1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.]])

In [8]:
pd.DataFrame(onehot_encoded_1)

Unnamed: 0,0,1,2
0,1.0,0.0,0.0
1,0.0,0.0,1.0
2,1.0,0.0,0.0
3,0.0,0.0,1.0
4,0.0,1.0,0.0
5,0.0,1.0,0.0
6,0.0,1.0,0.0


### integer encoding

In [9]:
label_encoder = LabelEncoder()
label_enc = label_encoder.fit_transform(df['class'])

### binary encoding II: with previous integer encoding

In [10]:
label_enc = label_enc.reshape(len(label_enc), 1)
onehot_encoded_2 = onehot_encoder.fit_transform(label_enc).toarray()
onehot_encoded_2

array([[1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.]])

In [11]:
pd.DataFrame(onehot_encoded_2)

Unnamed: 0,0,1,2
0,1.0,0.0,0.0
1,0.0,0.0,1.0
2,1.0,0.0,0.0
3,0.0,0.0,1.0
4,0.0,1.0,0.0
5,0.0,1.0,0.0
6,0.0,1.0,0.0


## ... and keras
Converts a class vector of integers(!) to binary class matrix. 

In [12]:
from keras import utils

Using TensorFlow backend.


In [13]:
#needs integers --> e.g. using sklearns' LabelEncoder
keras_ohe= utils.to_categorical(label_enc)

In [14]:
keras_ohe

array([[1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.]], dtype=float32)

In [15]:
pd.DataFrame(keras_ohe)

Unnamed: 0,0,1,2
0,1.0,0.0,0.0
1,0.0,0.0,1.0
2,1.0,0.0,0.0
3,0.0,0.0,1.0
4,0.0,1.0,0.0
5,0.0,1.0,0.0
6,0.0,1.0,0.0
