# Label Encoding

In [1]:
# import required libraries
import pandas as pd
import numpy as np
# creating initial dataframe
bridge_types = ('Arch','Beam','Truss','Cantilever','Tied Arch','Suspension','Cable')
bridge_df1 = pd.DataFrame(bridge_types, columns=['Bridge_Types'])

bridge_df1

Unnamed: 0,Bridge_Types
0,Arch
1,Beam
2,Truss
3,Cantilever
4,Tied Arch
5,Suspension
6,Cable


In [3]:
# converting type of columns to 'category'
bridge_df1['Bridge_Types'] = bridge_df1['Bridge_Types'].astype('category')
# Assigning numerical values and storing in another column
bridge_df1['Bridge_Types_Cat'] = bridge_df1['Bridge_Types'].cat.codes
bridge_df1

Unnamed: 0,Bridge_Types,Bridge_Types_Cat
0,Arch,0
1,Beam,1
2,Truss,6
3,Cantilever,3
4,Tied Arch,5
5,Suspension,4
6,Cable,2


# Using sci-kit learn library approach:

In [4]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
# creating initial dataframe
bridge_types = ('Arch','Beam','Truss','Cantilever','Tied Arch','Suspension','Cable')
bridge_df = pd.DataFrame(bridge_types, columns=['Bridge_Types'])

bridge_df

Unnamed: 0,Bridge_Types
0,Arch
1,Beam
2,Truss
3,Cantilever
4,Tied Arch
5,Suspension
6,Cable


In [5]:
# creating instance of labelencoder
labelencoder = LabelEncoder()
# Assigning numerical values and storing in another column
bridge_df['Bridge_Types_Cat'] = labelencoder.fit_transform(bridge_df['Bridge_Types'])
bridge_df

Unnamed: 0,Bridge_Types,Bridge_Types_Cat
0,Arch,0
1,Beam,1
2,Truss,6
3,Cantilever,3
4,Tied Arch,5
5,Suspension,4
6,Cable,2


In [14]:
# View the labels (if you want)
list(labelencoder.classes_)

['Arch', 'Beam', 'Cable', 'Cantilever', 'Suspension', 'Tied Arch', 'Truss']

# One-Hot Encoder


In [7]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
# creating instance of one-hot-encoder
enc = OneHotEncoder(handle_unknown='ignore')
# passing bridge-types-cat column (label encoded values of bridge_types)
enc_df = pd.DataFrame(enc.fit_transform(bridge_df[['Bridge_Types_Cat']]).toarray())
# merge with main df bridge_df on key values
bridge_df2 = bridge_df.join(enc_df)
bridge_df2

Unnamed: 0,Bridge_Types,Bridge_Types_Cat,0,1,2,3,4,5,6
0,Arch,0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Beam,1,0.0,1.0,0.0,0.0,0.0,0.0,0.0
2,Truss,6,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,Cantilever,3,0.0,0.0,0.0,1.0,0.0,0.0,0.0
4,Tied Arch,5,0.0,0.0,0.0,0.0,0.0,1.0,0.0
5,Suspension,4,0.0,0.0,0.0,0.0,1.0,0.0,0.0
6,Cable,2,0.0,0.0,1.0,0.0,0.0,0.0,0.0


# Using dummies values approach:


This approach is more flexible because it allows encoding as many category columns as you would like and choose how to label the columns using a prefix. Proper naming will make the rest of the analysis just a little bit easier.

In [10]:
import pandas as pd
import numpy as np
# creating initial dataframe
bridge_types = ('Arch','Beam','Truss','Cantilever','Tied Arch','Suspension','Cable')
bridge_df3 = pd.DataFrame(bridge_types, columns=['Bridge_Types'])
# generate binary values using get_dummies
dum_df = pd.get_dummies(bridge_df3, columns=["Bridge_Types"], prefix=["Type_is"] )
# merge with main df bridge_df on key values
bridge_df3 = bridge_df3.join(dum_df)
bridge_df3

Unnamed: 0,Bridge_Types,Type_is_Arch,Type_is_Beam,Type_is_Cable,Type_is_Cantilever,Type_is_Suspension,Type_is_Tied Arch,Type_is_Truss
0,Arch,1,0,0,0,0,0,0
1,Beam,0,1,0,0,0,0,0
2,Truss,0,0,0,0,0,0,1
3,Cantilever,0,0,0,1,0,0,0
4,Tied Arch,0,0,0,0,0,1,0
5,Suspension,0,0,0,0,1,0,0
6,Cable,0,0,1,0,0,0,0


In [11]:
# Replacing dummy vlaues at the category variable
pd.get_dummies(bridge_df)

Unnamed: 0,Type_is_Arch,Type_is_Beam,Type_is_Cable,Type_is_Cantilever,Type_is_Suspension,Type_is_Tied Arch,Type_is_Truss,Bridge_Types_Arch,Bridge_Types_Beam,Bridge_Types_Cable,Bridge_Types_Cantilever,Bridge_Types_Suspension,Bridge_Types_Tied Arch,Bridge_Types_Truss
0,1,0,0,0,0,0,0,1,0,0,0,0,0,0
1,0,1,0,0,0,0,0,0,1,0,0,0,0,0
2,0,0,0,0,0,0,1,0,0,0,0,0,0,1
3,0,0,0,1,0,0,0,0,0,0,1,0,0,0
4,0,0,0,0,0,1,0,0,0,0,0,0,1,0
5,0,0,0,0,1,0,0,0,0,0,0,1,0,0
6,0,0,1,0,0,0,0,0,0,1,0,0,0,0


KeyError: 'bridge_types'

https://towardsdatascience.com/categorical-encoding-using-label-encoding-and-one-hot-encoder-911ef77fb5bd