# import dependancies

In [10]:
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder, StandardScaler


In [11]:
df = pd.read_csv('data/dataset.csv')
df.columns

Index(['Geography', 'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts',
       'HasCrCard', 'IsActiveMember', 'EstimatedSalary', 'Exited',
       'CreditScoreBins'],
      dtype='object')

In [12]:
remainder_features = ['NumOfProducts','HasCrCard','IsActiveMember']
numerical_features=['Age','Tenure','Balance','EstimatedSalary']
nominal_features = ['Gender','Geography']
ordinal_features = ['CreditScoreBins']

In [13]:
numerical_transformer = Pipeline(
    steps=[
        ('imputer',SimpleImputer(strategy='median')),
        ('scaler',StandardScaler())
    ]
)

In [14]:
nominal_transformer = Pipeline(
    steps=[
        ('imputer',SimpleImputer(strategy='constant',
        fill_value='missing')),
        ('encoder',OneHotEncoder())
    ]
)

In [15]:
ordinal_transformer = Pipeline(
    steps=[
        ('imputer',SimpleImputer(strategy='constant',
        fill_value='missing')),
        ('encoder',OrdinalEncoder())
    ]
)

In [16]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num',numerical_transformer,numerical_features),
        ('nom',nominal_transformer,nominal_features),
        ('ord',ordinal_transformer,ordinal_features),
    ],
    remainder='drop'
)


nominal_feature_name = []

for feature in nominal_features:
    unique_values = df[feature].unique()
    nominal_feature_name.extend([f"{feature}_{val}" for val in unique_values])

df_cp = df.copy()

df_transformed = pd.DataFrame(
    preprocessor.fit_transform(df_cp),
    columns=numerical_features+nominal_feature_name+ordinal_features
)

df_transformed.columns





Index(['Age', 'Tenure', 'Balance', 'EstimatedSalary', 'Gender_Female',
       'Gender_Male', 'Geography_France', 'Geography_Spain',
       'Geography_Germany', 'CreditScoreBins'],
      dtype='object')