# import dependancies

In [1]:
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder, StandardScaler


In [2]:
df = pd.read_csv('data/dataset.csv')
df.columns

Index(['Geography', 'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts',
       'HasCrCard', 'IsActiveMember', 'EstimatedSalary', 'Exited',
       'CreditScoreBins'],
      dtype='object')

In [3]:
remainder_features = ['NumOfProducts','HasCrCard','IsActiveMember']
numerical_features=['Age','Tenure','Balance','EstimatedSalary']
nominal_features = ['Gender','Geography']
ordinal_features = ['CreditScoreBins']

In [4]:
numerical_transformer = Pipeline(
    steps=[
        ('imputer',SimpleImputer(strategy='median')),
        ('scaler',StandardScaler())
    ]
)

In [5]:
nominal_transformer = Pipeline(
    steps=[
        ('imputer',SimpleImputer(strategy='constant',
        fill_value='missing')),
        ('encoder',OneHotEncoder())
    ]
)

In [6]:
ordinal_transformer = Pipeline(
    steps=[
        ('imputer',SimpleImputer(strategy='constant',
        fill_value='missing')),
        ('encoder',OrdinalEncoder())
    ]
)

In [7]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num',numerical_transformer,numerical_features),
        ('nom',nominal_transformer,nominal_features),
        ('ord',ordinal_transformer,ordinal_features),
    ],
    remainder='drop'
)


nominal_feature_name = []

for feature in nominal_features:
    unique_values = df[feature].unique()
    nominal_feature_name.extend([f"{feature}_{val}" for val in unique_values])

df_cp = df.copy()

df_transformed = pd.DataFrame(
    preprocessor.fit_transform(df_cp),
    columns=numerical_features+nominal_feature_name+ordinal_features
)

df_transformed




Unnamed: 0,Age,Tenure,Balance,EstimatedSalary,Gender_Female,Gender_Male,Geography_France,Geography_Spain,Geography_Germany,CreditScoreBins
0,0.428433,-1.041526,-1.225704,0.021852,1.0,0.0,1.0,0.0,0.0,1.0
1,0.332317,-1.387294,0.117331,0.216474,1.0,0.0,0.0,0.0,1.0,1.0
2,0.428433,1.033083,1.332886,0.240624,1.0,0.0,1.0,0.0,0.0,3.0
3,0.124038,-1.387294,-1.225704,-0.108935,1.0,0.0,1.0,0.0,0.0,2.0
4,0.522339,-1.041526,0.785627,-0.365260,1.0,0.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...
9988,0.133021,-0.004221,-1.225704,-0.066442,0.0,1.0,1.0,0.0,0.0,4.0
9989,-0.297349,1.724620,-0.306347,0.027953,0.0,1.0,1.0,0.0,0.0,3.0
9990,-0.185431,0.687315,-1.225704,-1.008543,1.0,0.0,1.0,0.0,0.0,2.0
9991,0.428433,-0.695758,-0.022610,-0.125246,0.0,1.0,0.0,1.0,0.0,4.0
