<a href="https://colab.research.google.com/github/niteshctrl/credit_lead_prediction/blob/main/Credit_Lead_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install keras-tuner

In [13]:
# Importing Libraries

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import plot_confusion_matrix

import kerastuner as kt
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.metrics import AUC
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, InputLayer

In [3]:
df_train = pd.read_csv('train.csv')
# df_test = pd.read_csv('test.csv')

In [4]:
# Replace NaN values with 'unk_credit' for data analysis

df_train['Credit_Product'] = df_train['Credit_Product'].replace(np.nan, 'unk_credit')
# df_test['Credit_Product'] = df_test['Credit_Product'].replace(np.nan, 'unk_credit')

In [27]:
df_train.head()

Unnamed: 0,ID,Gender,Age,Region_Code,Occupation,Channel_Code,Vintage,Credit_Product,Avg_Account_Balance,Is_Active,Is_Lead
0,NNVBBKZB,Female,73,RG268,Other,X3,43,No,1045696,No,0
1,IDD62UNG,Female,30,RG277,Salaried,X1,32,No,581988,No,0
2,HD3DSEMC,Female,56,RG268,Self_Employed,X3,26,No,1484315,Yes,0
3,BF3NC7KV,Male,34,RG270,Salaried,X1,19,No,470454,No,0
4,TEASRWXV,Female,30,RG282,Salaried,X1,33,No,886787,No,0


In [6]:
X_tr = df_train.drop(['ID', 'Is_Lead'], axis=1)
y_tr = df_train.Is_Lead

# Preprocessing

In [5]:
def preprocess(X_tr):
    ''' Input X_tr must exclude the target variable'''

    scaler = StandardScaler()

    data_numerical = scaler.fit_transform(X_tr.select_dtypes(exclude=['object']))
    data_categorical = pd.get_dummies(X_tr.select_dtypes(include=['object']),
                                      drop_first=True)

    return pd.concat([pd.DataFrame(data_numerical),data_categorical], axis=1)

# Modelling

In [None]:
X = preprocess(df_train)
y = df_train.Is_Lead

In [None]:
# Callbacks

early_stop = EarlyStopping(monitor='val_loss', 
                           patience=6,
                           restore_best_weights=True
                          )

lr_scheduler = ReduceLROnPlateau(monitor='val_loss',
                                 factor=0.1, 
                                 patience=3
                                 )

In [24]:
# Hyperparameters: #neurons, #layers, dropout rate
def model_builder(hp):
    dropout_rate = hp.Float('dropout_rate', min_value=0.1, max_value=0.5, step=0.2)
    model = Sequential()
    # model.add(Dense(len(X_tr.columns), input_dim=len(X_tr.columns), activation='relu'))
    # model.add(Dropout(drop_rate))

    # Tune the number of layers
    num_layers = hp.Int('num_layers', min_value=5, max_value=12, step=3)
    for i in range(num_layers):
        num_units = hp.Int('num_units', min_value=32, max_value=512, step=92)
        
        model.add(Dense(units=num_units, activation='relu', \
                        kernel_initializer="he_normal"))
        model.add(Dropout(dropout_rate))
        
    model.add(Dense(1, activation='sigmoid'))     # Output Layer
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[AUC()])
    
    return model

In [29]:
def build_model():
    ###### Prepare and preprocess the data #######
    X_tr = df_train.drop(['ID', 'Is_Lead'], axis=1)
    y_tr = df_train.Is_Lead

    X_tr = preprocess(X_tr)
    ##############################################


    ################## Callbacks ########################
    early_stop = EarlyStopping(monitor='val_loss', 
                           patience=6,
                           restore_best_weights=True
                          )

    lr_scheduler = ReduceLROnPlateau(monitor='val_loss',
                                    factor=0.1, 
                                    patience=3
                                    )
    #####################################################


    ################## Hyperparameter tuning ##########################

    # Build Hyperband instance
    tuner = kt.Hyperband(model_builder, 
                         objective=kt.Objective('val_auc', direction='max'),
                         max_epochs=50)

    # Search the parameter space
    tuner.search(X_tr, y_tr, epochs=50, validation_split=0.2, 
                 callbacks=[early_stop], batch_size=1024)

    best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]
    model = tuner.hypermodel.build(best_hps)
    ###################################################################

    return model, X_tr, y_tr

In [None]:
model, X_tr, y_tr = build_model()

Trial 23 Complete [00h 00m 08s]
val_auc: 0.5

Best val_auc So Far: 0.8709453344345093
Total elapsed time: 00h 18m 20s

Search: Running Trial #24

Hyperparameter    |Value             |Best Value So Far 
dropout_rate      |0.1               |0.1               
num_layers        |11                |8                 
num_units         |400               |308               
tuner/epochs      |2                 |2                 
tuner/initial_e...|0                 |0                 
tuner/bracket     |3                 |3                 
tuner/round       |0                 |0                 

Epoch 1/2
Epoch 2/2


In [None]:
# Proceeding with model training

