# Imports Always First

In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, LabelBinarizer
from collections import defaultdict
import warnings
import numpy as np

# Creating Test Dataset

In [None]:
df_encode1 = pd.DataFrame({'col1': ['this', 'will', 'be', 'encoded'], 'col2': [1, 2, 3, 4]})

In [None]:
df_encode2 = df_encode1.copy()
df_encode1

---
# Encoding Using get_dummies

In [None]:
pd.get_dummies(df_encode1, drop_first=True)

---
# Encoding Using LabelEncoder and OneHotEncoder

In [None]:
# set encoder
lenc = LabelEncoder()
oenc = OneHotEncoder()

In [None]:
# laabel endcoding the categorical column
df_encode1.col1 = lenc.fit_transform(df_encode1.col1)

In [None]:
df_encode1

In [None]:
# one hot encoding, all values are encoded
oenc.fit_transform(df_encode1).toarray()

---
# Multi-column Encoding

## Label Encoding

In [None]:
df_multiencode = pd.concat([df_encode2, 
                            pd.DataFrame({'col3': ['Another', 'encoding', 'attempt', 'here']})], 
                            axis=1)
display(df_multiencode)

In [None]:
df_encoded = pd.DataFrame()
inverse_dict = defaultdict(LabelEncoder)

# for loop version
for column in df_multiencode.columns:
    df_encoded[column] = inverse_dict[column].fit_transform(df_multiencode[column])    

display(df_encoded)

In [None]:
# one line version
df_encoded = df_multiencode.copy()
df_encoded = df_multiencode.apply(lambda x: inverse_dict[x.name].fit_transform(x))
display(df_encoded)

### Label Encoding Inverse

In [None]:
warnings.filterwarnings('ignore')
df_rencoded = pd.DataFrame()
# for loop version
for column in df_encoded.columns:
    df_rencoded[column] = inverse_dict[column].inverse_transform(df_encoded[column])       
df_rencoded

## One Hot Encoding

In [None]:
df_onehot = pd.DataFrame()
one_hot_dict = defaultdict(OneHotEncoder)

for column in df_encoded.columns:    
    one_hot_target = df_encoded[column].values.reshape(-1, 1)
    X = one_hot_dict[column].fit_transform(one_hot_target).toarray()
    one_hot_column = ([column + '_' + str(x) for x in range(X.shape[1])])
    new_onehot = pd.DataFrame(X, columns=one_hot_column)
    df_onehot = pd.concat([df_onehot, new_onehot], axis=1)
    
df_onehot

### One Hot Inverse Deprecated

---
# Label Binarizer Encoder

In [None]:
bina_dict = defaultdict(LabelBinarizer)
df_bina = pd.DataFrame()

for column in df_rencoded.columns:
    X = bina_dict[column].fit_transform(df_rencoded[column].values)
    bina_column = ([column + '_' + str(x) for x in range(X.shape[1])])
    new_bina = pd.DataFrame(X, columns=bina_column)
    df_bina = pd.concat([df_bina, new_bina], axis=1)
    
df_bina

In [None]:
x = bina_dict['col1']

In [None]:
x.classes_

# Bonus

In [None]:
# custome helper function to encode your categorical data
def cust_label_binarizer():
    pass

# Sources:

1. http://www.insightsbot.com/blog/McTKK/python-one-hot-encoding-with-scikit-learn