<a href="https://colab.research.google.com/github/niteshctrl/credit_lead_prediction/blob/main/Credit_Lead_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Importing Libraries

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import plot_confusion_matrix

import kerastuner as kt
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.metrics import AUC
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import LeakyReLU
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, InputLayer

In [None]:
df_train = pd.read_csv('train.csv')
df_test = pd.read_csv('test.csv')

In [None]:
# Replace NaN values with 'unk_credit' for data analysis

df_train['Credit_Product'] = df_train['Credit_Product'].replace(np.nan, 'unk_credit')
df_test['Credit_Product'] = df_test['Credit_Product'].replace(np.nan, 'unk_credit')

In [None]:
# TBD : Try Gender with 1 and 2

def preprocess(data_in):
    '''This function will process the dataframe columnwise fit to be sent for modelling'''
    
    data_in = data_in.dropna().reset_index(drop=True)      # This is to ensure the sent data is clean. 
                                                           # NaN processing should be done outside this function.
    scaler = StandardScaler()
    data_out = pd.DataFrame(data_in['ID'])
    
    data_out['Gender'] = data_in.Gender.replace(\
                    {'Female':1, 'Male':0})                # Encoding Gender 
    
    data_out['Age'] = scaler.fit_transform(\
                    data_in.Age.values.reshape(-1,1))      # Normalizing Age with mean=0 and SD=1
    
    data_out = pd.concat([data_out, pd.get_dummies(\
                    data_in.Occupation, drop_first=True)], axis=1)
    
    data_out = pd.concat([data_out, pd.get_dummies(\
                    data_in.Channel_Code, drop_first=True)], axis=1)
    
    data_out['Vintage'] = scaler.fit_transform(data_in.Vintage.values.reshape(-1,1))
    
    data_out = pd.concat([data_out, pd.get_dummies(\
                    data_in.Credit_Product, drop_first=True)], axis=1)
    
    data_out['Avg_Account_Balance'] = scaler.fit_transform(\
                    data_in.Avg_Account_Balance.values.reshape(-1,1))
    
    data_out = pd.concat([data_out, pd.get_dummies(\
                    data_in.Is_Active, drop_first=True)], axis=1)
    
    data_out = pd.concat([data_out, pd.get_dummies(\
                    data_in.Region_Code, drop_first=True)], axis=1)
    
    return data_out.drop(['ID'], axis=1)

In [None]:
X = preprocess(df_train)
y = df_train.Is_Lead

In [None]:
# Hyperparameters: #neurons, #layers, dropout rate
def model_builder(hp):
    drop_rate = hp.Float('drop_rate', min_value=0.1, max_value=0.5, step=0.2)
    model = Sequential()
    model.add(Dense(len(X.columns), input_dim=len(X.columns), activation='relu'))
    model.add(Dropout(drop_rate))

    # Tune the number of layers
    num_layers = hp.Int('num_layers', min_value=5, max_value=12, step=3)
    for i in range(num_layers):
        num_units = hp.Int('num_units', min_value=32, max_value=512, step=92)
        
        model.add(Dense(units=num_units, activation='relu', \
                        kernel_initializer="he_normal"))
        model.add(Dropout(drop_rate))
        
    model.add(Dense(1, activation='sigmoid'))     # Output Layer
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[AUC()])
    
    return model

In [None]:
# Build Hyperband instance

tuner = kt.Hyperband(model_builder, objective=kt.Objective('val_auc', direction='max'), max_epochs=50)

INFO:tensorflow:Reloading Oracle from existing project ./untitled_project/oracle.json
INFO:tensorflow:Reloading Tuner from ./untitled_project/tuner0.json


In [None]:
# Define early stopping parameters

stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_auc', patience=7)

In [None]:
# Search the parameter space

tuner.search(X, y, epochs=50, validation_split=0.2, callbacks=[stop_early], batch_size=1024)

INFO:tensorflow:Oracle triggered exit


In [None]:
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]

In [None]:
model = tuner.hypermodel.build(best_hps)
history = model.fit(X, y, epochs=50, validation_split=0.2)

Epoch 28/50
 479/6144 [=>............................] - ETA: 22s - loss: 0.3475 - auc: 0.8703

KeyboardInterrupt: ignored

In [None]:
best_hps

<kerastuner.engine.hyperparameters.HyperParameters at 0x7f5fb432c290>