### 1. Importing dependencies

In [16]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

| Variable Type | Preferred Encoding | Why?                                                            |
| ------------- | ------------------ | --------------------------------------------------------------- |
| Nominal       | One-Hot Encoding   | No inherent order → avoids implying false ordinal relationships |
| Ordinal       | Label Encoding     | Preserves order → small integers represent increasing levels    |



- Gender <Male, Female>

One-Hot Encoding Example
|        | Gender\_Male | Gender\_Female |
| ------ | ------------ | -------------- |
| Male   | 1            | 0              |
| Female | 0            | 1              |

- Label Encoding (Assume Gender is Ordinal)
Male -> 1 
Female -> 2


Gender - norminal
Geography - norminal
creditscoreBing - ordinal

In [17]:
df = pd.read_csv("data/processed/CEHHbInToW_binning_applied.csv")
df.head(10)

Unnamed: 0,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,CreditScoreBines
0,France,Female,42.0,2,0.0,1,1,1,101348.88,1,Fair
1,Spain,Female,41.0,1,83807.86,1,0,1,112542.58,0,Fair
2,France,Female,42.0,8,159660.8,3,1,0,113931.57,1,Poor
3,France,Female,38.91,1,0.0,2,0,0,93826.63,0,Good
4,Spain,Female,43.0,2,125510.82,1,1,1,79084.1,0,
5,Spain,Male,44.0,8,113755.78,2,1,0,149756.71,1,Fair
6,France,Male,50.0,7,0.0,2,1,1,10062.8,0,Excellent
7,Germany,Female,29.0,4,115046.74,4,1,0,119346.88,1,Poor
8,France,Male,44.0,4,142051.07,2,0,1,74940.5,0,Poor
9,France,Male,27.0,2,134603.88,1,1,1,71725.73,0,Good


##### we can use skit learn to this in simple way

### 2. Norminal Encoding

In [18]:
gender_dummies = pd.get_dummies(df['Gender'], prefix='Gender')
geography_dummies = pd.get_dummies(df['Geography'], prefix='Geography')

df_encoded = pd.concat([df, gender_dummies], axis=1)
del df_encoded['Gender']
df_encoded = pd.concat([df_encoded, geography_dummies], axis=1)
del df_encoded['Geography']
df_encoded


Unnamed: 0,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,CreditScoreBines,Gender_Female,Gender_Male,Geography_France,Geography_Germany,Geography_Spain
0,42.00,2,0.00,1,1,1,101348.88,1,Fair,True,False,True,False,False
1,41.00,1,83807.86,1,0,1,112542.58,0,Fair,True,False,False,False,True
2,42.00,8,159660.80,3,1,0,113931.57,1,Poor,True,False,True,False,False
3,38.91,1,0.00,2,0,0,93826.63,0,Good,True,False,True,False,False
4,43.00,2,125510.82,1,1,1,79084.10,0,,True,False,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,39.00,5,0.00,2,1,0,96270.64,0,Very Good,False,True,True,False,False
9996,35.00,10,57369.61,1,1,1,101699.77,0,Poor,False,True,True,False,False
9997,36.00,7,0.00,1,0,1,42085.58,1,Good,True,False,True,False,False
9998,42.00,3,75075.31,2,1,0,92888.52,1,Very Good,False,True,False,True,False


### 3. Ordinal Encoding

In [19]:
encode_dict_creditscorebin = {
        'Poor' : 0,
        'Fair' : 1,
        'Good' : 2,
        'Very Good' : 3,
        'Excellent' : 4
} 
df_encoded["CreditScoreBines"] = df_encoded["CreditScoreBines"].map(encode_dict_creditscorebin)
df_encoded.head(10
                )

Unnamed: 0,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,CreditScoreBines,Gender_Female,Gender_Male,Geography_France,Geography_Germany,Geography_Spain
0,42.0,2,0.0,1,1,1,101348.88,1,1.0,True,False,True,False,False
1,41.0,1,83807.86,1,0,1,112542.58,0,1.0,True,False,False,False,True
2,42.0,8,159660.8,3,1,0,113931.57,1,0.0,True,False,True,False,False
3,38.91,1,0.0,2,0,0,93826.63,0,2.0,True,False,True,False,False
4,43.0,2,125510.82,1,1,1,79084.1,0,,True,False,False,False,True
5,44.0,8,113755.78,2,1,0,149756.71,1,1.0,False,True,False,False,True
6,50.0,7,0.0,2,1,1,10062.8,0,4.0,False,True,True,False,False
7,29.0,4,115046.74,4,1,0,119346.88,1,0.0,True,False,False,True,False
8,44.0,4,142051.07,2,0,1,74940.5,0,0.0,False,True,True,False,False
9,27.0,2,134603.88,1,1,1,71725.73,0,2.0,False,True,True,False,False


In [21]:
df_encoded.to_csv("data/processed/CEHHbInToW_encoded.csv" ,index=False)