# Data

In [None]:
import pandas as pd

In [None]:
data = {
    'id_employee' : ['E1','E2','E3','E4','E5','E6','E7','E8','E9','E10'],
    'age' : [22,30,24,26,23,24,27,21,32,29],
    'marital_status' : ['single','married','married','divorced','married','single','married','single','married','divorced'],
    'performance' : ['good','poor','good','excellent','bad','good','poor','excellent','excellent','good']
}

df = pd.DataFrame(data)

In [None]:
df

Unnamed: 0,id_employee,age,marital_status,performance
0,E1,22,single,good
1,E2,30,married,poor
2,E3,24,married,good
3,E4,26,divorced,excellent
4,E5,23,married,bad
5,E6,24,single,good
6,E7,27,married,poor
7,E8,21,single,excellent
8,E9,32,married,excellent
9,E10,29,divorced,good


# Nominal Encoding

In [None]:
# Convert marital_status column using OneHotEncoder

from sklearn.preprocessing import OneHotEncoder

ohe = OneHotEncoder(sparse=False,drop='first')
df_encoded_sk = ohe.fit_transform(df[['marital_status']])
df_encoded_sk = pd.DataFrame(df_encoded_sk, columns=ohe.get_feature_names_out(['marital_status']))
df_encoded_sk




Unnamed: 0,marital_status_married,marital_status_single
0,0.0,1.0
1,1.0,0.0
2,1.0,0.0
3,0.0,0.0
4,1.0,0.0
5,0.0,1.0
6,1.0,0.0
7,0.0,1.0
8,1.0,0.0
9,0.0,0.0


In [None]:
# Convert marital_status column using category_encoders.OneHotEncoder

import category_encoders as ce

encoder = ce.OneHotEncoder(cols=['marital_status'], use_cat_names=True)
df_encoded_ce = encoder.fit_transform(df)
df_encoded_ce


Unnamed: 0,id_employee,age,marital_status_single,marital_status_married,marital_status_divorced,performance
0,E1,22,1,0,0,good
1,E2,30,0,1,0,poor
2,E3,24,0,1,0,good
3,E4,26,0,0,1,excellent
4,E5,23,0,1,0,bad
5,E6,24,1,0,0,good
6,E7,27,0,1,0,poor
7,E8,21,1,0,0,excellent
8,E9,32,0,1,0,excellent
9,E10,29,0,0,1,good


In [None]:
# Convert marital_status column using Dummy Encoding

df_encoded = pd.get_dummies(df, columns=['marital_status'])
df_encoded

Unnamed: 0,id_employee,age,performance,marital_status_divorced,marital_status_married,marital_status_single
0,E1,22,good,False,False,True
1,E2,30,poor,False,True,False
2,E3,24,good,False,True,False
3,E4,26,excellent,True,False,False
4,E5,23,bad,False,True,False
5,E6,24,good,False,False,True
6,E7,27,poor,False,True,False
7,E8,21,excellent,False,False,True
8,E9,32,excellent,False,True,False
9,E10,29,good,True,False,False


# Ordinal Encoding

In [None]:
# Convert performace using Label Encoding

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['performance_numeric'] = le.fit_transform(df['performance'])
df


Unnamed: 0,id_employee,age,marital_status,performance,category
0,E1,22,single,good,2
1,E2,30,married,poor,3
2,E3,24,married,good,2
3,E4,26,divorced,excellent,1
4,E5,23,married,bad,0
5,E6,24,single,good,2
6,E7,27,married,poor,3
7,E8,21,single,excellent,1
8,E9,32,married,excellent,1
9,E10,29,divorced,good,2


In [None]:
# Convert performace using OrdinalEncoder

from sklearn.preprocessing import OrdinalEncoder

categories = [['bad', 'poor', 'good', 'excellent']]
encoder = OrdinalEncoder(categories=categories)
df['performance_encoded'] = encoder.fit_transform(df[['performance']])
df

Unnamed: 0,id_employee,age,marital_status,performance,performance_encoded
0,E1,22,single,good,2.0
1,E2,30,married,poor,1.0
2,E3,24,married,good,2.0
3,E4,26,divorced,excellent,3.0
4,E5,23,married,bad,0.0
5,E6,24,single,good,2.0
6,E7,27,married,poor,1.0
7,E8,21,single,excellent,3.0
8,E9,32,married,excellent,3.0
9,E10,29,divorced,good,2.0


In [None]:
# Convert performance using OrdinalEncoder

import category_encoders as ce

mapping = [{'col': 'performance', 'mapping': {'bad': 1, 'poor': 2, 'good': 3, 'excellent' : 4}}]

encoder = ce.OrdinalEncoder(cols=['performance'], mapping=mapping)
df_encoded = encoder.fit_transform(df)

print(df_encoded)


  id_employee  age marital_status  performance  category
0          E1   22         single            3         2
1          E2   30        married            2         3
2          E3   24        married            3         2
3          E4   26       divorced            4         1
4          E5   23        married            1         0
5          E6   24         single            3         2
6          E7   27        married            2         3
7          E8   21         single            4         1
8          E9   32        married            4         1
9         E10   29       divorced            3         2


In [None]:
pip install category_encoders

Collecting category_encoders
  Downloading category_encoders-2.6.3-py2.py3-none-any.whl (81 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/81.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.9/81.9 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: category_encoders
Successfully installed category_encoders-2.6.3
