### Machine Learning Model Training and Evaluation:

In [None]:
import numpy as np
import pandas as pd
import kerastuner as kt

In [None]:
# Importing Transformed Dataset:
df = pd.read_csv('../Dataset/Loan_default_transformed.csv')
df.sample(4)

In [None]:
df[df.isnull().any(axis=1)]

In [None]:
## Splitting the Dataset:
X = df.drop('Default',axis=1)
y = df[['Default']]

from sklearn.model_selection import train_test_split
X_train,X_test, y_train,y_test = train_test_split(X,y,test_size=0.2)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

#### Create an Evaluate Function to give all metrics after model Training:

In [None]:
## All classification models:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier

models = {
   'Logistic Regression': LogisticRegression(),
   'Decision Tree': DecisionTreeClassifier(),
   # 'SVM': SVC(),
   'Guassian': GaussianNB(),
   # 'KNN': KNeighborsClassifier(),
   'AdaBoost': AdaBoostClassifier(),
   'Gradient Boost': GradientBoostingClassifier(),
   'XGBoost': XGBClassifier()
}

In [None]:
from sklearn.metrics import accuracy_score

model_list = []
acc_list = []

## Fit each model, predict and store it with model's accuracy
for name, model in models.items():
   # Fitting model
   model.fit(X_train,y_train.values.ravel())
   # Prediction
   y_pred = model.predict(X_test)
   
   acc = accuracy_score(y_test,y_pred)
   model_list.append(name)
   acc_list.append(acc)
   
   print(f'Model performance of {name} for Test set')
   print("- Accuracy score: {:.4f}".format(acc))
   
   print('='*35)
   print('\n')

XGboost and Logistic Regression came out to best models

In [None]:
## Fitting Logistic Regression to its best:
LR = LogisticRegression()

param_grid = [
   {
      'penalty': ['l1', 'l2'],
      'solver': ['liblinear'],  # Only solvers that support l1 and elasticnet
      'max_iter': [100, 200, 500]
   },  {
      'penalty': ['l2'],
      'solver': ['lbfgs', 'newton-cg', 'sag'],  # solvers that only support l2
      'max_iter': [100, 200, 500]
   },  {
      'penalty': ['l1','l2','elasticnet'],
      'solver': ['saga'],  # solvers that only support l2
      'max_iter': [100, 200, 500],
      'l1_ratio': [0.1, 0.5, 0.9],
   }
]

In [None]:
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV

clf = GridSearchCV(LR, param_grid=param_grid, cv=3, verbose=2, n_jobs=-1, error_score='raise')
best_LR = clf.fit(X_train, y_train.values.ravel())

In [None]:
Best_LR = best_LR.best_params_

# Get the best parameters and score
print("Best parameters found: ", Best_LR)

pred_LR = best_LR.predict(X_test)
print("Best Accuracy from Logistic Regression: ", accuracy_score(y_test, pred_LR))

In [None]:
## Parameters for XGBoost
Param_dict = {
   'max_depth': [3, 4, 5, 6, 8, 10],
   'min_child_weight': [3, 5, 7],
   'gamma': [0, 0.1, 0.2, 0.3, 0.4],
   'max_iter': [100, 200, 500],
}

In [None]:
from sklearn.model_selection import RandomizedSearchCV

XG = XGBClassifier()

clf = RandomizedSearchCV(XG, param_distributions=Param_dict, n_iter=30, cv=3, verbose=2, n_jobs=-1)
best_XG = clf.fit(X_train, y_train.values.reshape(-1,1))

In [None]:
Best_XG = best_XG.best_params_

# Get the best parameters and score
print("Best parameters found: ", Best_XG)

pred_XG = best_XG.predict(X_test)
print("Best Accuracy XGBoost: ", accuracy_score(y_test, pred_XG))

#### Now, We will Fit a Neural Network for Same problem and compare its performance with Normal Machine Learning:

In [None]:
import tensorflow as tf
from tensorflow import keras as kr
# import optuna

In [None]:
## Creating Neural Network:
model = kr.Sequential() # Network base
model.add(kr.layers.Input(shape=(20,))) # Input layer

## Neural Network
# model.add(kr.layers.Dense(256, activation='relu'))
model.add(kr.layers.Dense(128, activation='relu'))
model.add(kr.layers.Dense(64, activation='relu'))
model.add(kr.layers.Dense(32, activation='relu'))
model.add(kr.layers.Dense(16, activation='relu'))

## Output layer
model.add(kr.layers.Dense(1, activation='sigmoid'))

In [None]:
model.summary()

In [None]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'], )

model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

In [None]:
pred_ANN = model.predict(X_test)
pred_ANN_labels = (pred_ANN > 0.5).astype(int)

print("Best Accuracy Neural Network: ", accuracy_score(y_test, pred_ANN_labels))

Now, we will Tune this Neural Network:

In [None]:
## Function to build mode using various values from hyperparameters
def Build_best_classifier(hp):
   model = kr.Sequential()
   model.add(kr.Input(shape=(20,)))
   
   for i in range(hp.Int('num_layers', min_value=1, max_value=5)):
      ## Adding model layer structure
      model.add(kr.layers.Dense(units=hp.Int("Unit_count",16,128, step=16),
         kernel_initializer='he_normal', kernel_regularizer=kr.regularizers.l2(0.01))
      )
      
      ## Adding activation func and batch normalizer
      model.add(kr.layers.LeakyReLU(negative_slope=0.2))
      model.add(kr.layers.BatchNormalization())
      
      ## Adding a dropout layer
      rate = hp.Float('rate', min_value=0.0, max_value=0.5, step=0.1)
      if hp.Boolean('dropout'):
         model.add(kr.layers.Dropout(rate))
      
   ## Output layer
   model.add(kr.layers.Dense(1, activation='sigmoid'))
   
   model.compile(optimizer=kr.optimizers.Adam(hp.Choice('learning_rate', [0.1, 1e-2, 1e-3])), loss='binary_crossentropy', metrics=['accuracy'])
   
   return model

In [None]:
tuner = kt.RandomSearch(Build_best_classifier, objective='val_accuracy', max_trials=10, directory='project', project_name='Loan_default')

## Fitting model
tuner.search(X_train, y_train, epochs=2, validation_data=(X_test,y_test))

> #### Note:  Hyper tuning Neural network or even using ANN dont show any significant improvement. <br> XGBoost gives Best accuracy with less computations.