In [1]:
# -*- coding: utf-8 -*-
"""
Created on Tue Feb  4 17:28:58 2025

@author: csc6pkj - Taken from Chapter 5 of lDeep Learning with Python by
N. Ketkar and J. Moolayi (with errors corrected!)
"""

#Import required libraries - PKJ commented out those not used
import torch.nn as nn
import torch as tch
import numpy as np
import pandas as pd
#from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score, recall_score, roc_curve, auc, roc_auc_score
from sklearn.model_selection import train_test_split
#from sklearn.utils import shuffle
import matplotlib.pyplot as plt

#Load data into memory using pandas
#PKJ Note... It is assumed that "bank.csv" has been downloaded and that is stored
#in the current working folder
df = pd.read_csv("bank.csv")

print("Shape and head of Dataset -")
print("DF Shape:",df.shape)
print(df.head()) #PKJ embedded in the print

print("Distribution of Target Values in Dataset -")
print(df.deposit.value_counts()) #PKJ embedded in the print

print("na values in Dataset -")
print(df.isna().sum()) #PKJ embedded in the print

print("Distinct datatypes in Dataset -")
print(df.dtypes.value_counts()) #PKJ embedded in the print

#Extract categorical columns from dataset
categorical_columns = df.select_dtypes(include="object").columns
print("Categorical cols:",list(categorical_columns)) #PKJ embedded in the print

#For each categorical column if values in (Yes/No) convert into a 1/0 Flag
for col in categorical_columns:
    if df[col].nunique() == 2:
        df[col] = np.where(df[col]=="yes",1,0)
print(df.head())

#For the remaining cateogrical variables;
#create one-hot encoded version of the dataset
new_df = pd.get_dummies(df)
#Define target and predictors for the model
target = "deposit"
#PKJ fix for error in original code - need to convert set to a list
predictors = list(set(new_df.columns) - set([target]))
print("new_df.shape:",new_df.shape)
print(new_df[predictors].head()) #PKJ embedded in the print

#Convert all datatypes within pandas dataframe to Float32
#(Compatibility with PyTorch tensors)
new_df = new_df.astype(np.float32)
#Split dataset into Train/Test [80:20]
X_train,x_test, Y_train,y_test = train_test_split(new_df[predictors],new_df[target],test_size= 0.2)
#Convert Pandas dataframe, first to numpy and then to Torch Tensors
X_train = tch.from_numpy(X_train.values)
x_test = tch.from_numpy(x_test.values)
Y_train = tch.from_numpy(Y_train.values).reshape(-1,1)
y_test = tch.from_numpy(y_test.values).reshape(-1,1)
#Print the dataset size to verify
print("X_train.shape:",X_train.shape)
print("x_test.shape:",x_test.shape)
print("Y_train.shape:",Y_train.shape)
print("y_test.shape:",y_test.shape)

#Define function to train the network
def train_network(model,optimizer,loss_function,num_epochs,batch_size,X_train,Y_train,lambda_L1=0.0):
    loss_across_epochs = []
    for epoch in range(num_epochs):
        train_loss= 0.0
        #Explicitly start model training
        model.train()
        for i in range(0,X_train.shape[0],batch_size):
            #Extract train batch from X and Y
            input_data = X_train[i:min(X_train.shape[0],i+batch_size)]
            labels = Y_train[i:min(X_train.shape[0],i+batch_size)]
            #set the gradients to zero before starting to do backpropragation
            optimizer.zero_grad()
            #Forward pass
            output_data = model(input_data)
            #Caculate loss
            loss = loss_function(output_data, labels)
            L1_loss = 0
            #Compute L1 penalty to be added with loss
            for p in model.parameters():
                L1_loss = L1_loss + p.abs().sum()
            #Add L1 penalty to loss
            loss = loss + lambda_L1 * L1_loss
            #Backpropogate
            loss.backward()
            #Update weights
            optimizer.step()
            train_loss += loss.item() * input_data.size(0)
        loss_across_epochs.append(train_loss/X_train.size(0))
        #Changed by PKJ - output every 50 (not 500!)
        if epoch%50 == 0:
            print("Epoch: {} - Loss:{:.4f}".format(epoch,train_loss/X_train.size(0) ))
    return(loss_across_epochs)

#Define function for evaluating NN
def evaluate_model(model,x_test,y_test,X_train,Y_train,loss_list):
    model.eval() #Explicitly set to evaluate mode
    #Predict on Train and Validation Datasets
    y_test_prob = model(x_test)
    y_test_pred =np.where(y_test_prob>0.5,1,0)
    Y_train_prob = model(X_train)
    Y_train_pred =np.where(Y_train_prob>0.5,1,0) 
    #Compute Training and Validation Metrics
    print("\n Model Performance -")
    print("Training Accuracy-",round(accuracy_score(Y_train,Y_train_pred),3))
    print("Training Precision-",round(precision_score(Y_train,Y_train_pred),3))
    print("Training Recall-",round(recall_score(Y_train,Y_train_pred),3))
    print("Training ROCAUC", round(roc_auc_score(Y_train ,Y_train_prob.detach().numpy()),3))
    print("Validation Accuracy-",round(accuracy_score(y_test,y_test_pred),3))
    print("Validation Precision-",round(precision_score(y_test,y_test_pred),3))
    print("Validation Recall-",round(recall_score(y_test,y_test_pred),3))
    print("Validation ROCAUC", round(roc_auc_score(y_test,y_test_prob.detach().numpy()),3))
    print("\n")
    #Plot the Loss curve and ROC Curve
    plt.figure(figsize=(20,5))
    plt.subplot(1, 2, 1)
    plt.plot(loss_list)
    plt.title('Loss across epochs')
    plt.ylabel('Loss')
    plt.xlabel('Epochs')
    plt.subplot(1, 2, 2)
    #Validation
    fpr_v, tpr_v, _ = roc_curve(y_test, y_test_prob.detach().numpy()) 
    roc_auc_v = auc(fpr_v, tpr_v)
    #Training
    fpr_t, tpr_t, _ = roc_curve(Y_train, Y_train_prob.detach().numpy())
    roc_auc_t = auc(fpr_t, tpr_t)
    plt.title('Receiver Operating Characteristic:Validation')
    plt.plot(fpr_v, tpr_v, 'b', label = 'Validation AUC = %0.2f' % roc_auc_v)
    plt.plot(fpr_t, tpr_t, 'r', label = 'Training AUC = %0.2f' % roc_auc_t)
    plt.legend(loc = 'lower right')
    plt.plot([0, 1], [0, 1],'r--')
    plt.xlim([0, 1])
    plt.ylim([0, 1])
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
    plt.show()
    
#Define Neural Network
class NeuralNetwork(nn.Module): 
    def __init__(self): 
        super().__init__()
        tch.manual_seed(2020)
        self.fc1 = nn.Linear(48, 96)
        self.fc2 = nn.Linear(96, 192)
        self.fc3 = nn.Linear(192, 384)
        self.out = nn.Linear(384, 1)
        self.relu = nn.ReLU()
        self.final = nn.Sigmoid() 
    def forward(self, x):
        op = self.fc1(x)
        op = self.relu(op)
        op = self.fc2(op)
        op = self.relu(op)
        op = self.fc3(op)
        op = self.relu(op)
        op = self.out(op)
        y = self.final(op)
        return y
    
#Define Network with Dropout Layers
class NeuralNetworkDropOut(nn.Module):
    #Adding dropout layers within Neural Network to reduce overfitting
    def __init__(self):
        super().__init__()
        tch.manual_seed(2020)
        self.fc1 = nn.Linear(48, 96)
        self.fc2 = nn.Linear(96, 192)
        self.fc3 = nn.Linear(192, 384)
        self.relu = nn.ReLU()
        self.out = nn.Linear(384, 1)
        self.final = nn.Sigmoid()
        self.drop = nn.Dropout(0.1)
    #Dropout Layer
    def forward(self, x):
        op = self.drop(x) #Dropout for input layer
        op = self.fc1(op)
        op = self.relu(op)
        op = self.drop(op) #Dropout for hidden layer 1
        op = self.fc2(op)
        op = self.relu(op)
        op = self.drop(op) #Dropout for hidden layer 2
        op = self.fc3(op)
        op = self.relu(op)
        op = self.drop(op) #Dropout for hidden layer 3
        op = self.out(op)
        y = self.final(op)
        return y
        
#Without Dropout... Can use either L1 or L2 regularization (or neither)
num_epochs = 300
batch_size= 128
weight_decay=0.001 #Enables L2 Regularization (0 ==> L2 Reg not used)
lambda_L1 = 0.00 #Enables L1 Regularization (0 ==> L1 Reg not used)
model = NeuralNetwork()
loss_function = nn.BCELoss() #Binary Crosss Entropy Loss
adam_optimizer = tch.optim.Adam(model.parameters(),lr= 0.001,weight_decay=weight_decay)
#Train network
adam_loss = train_network(model,adam_optimizer,loss_function,num_epochs,batch_size,X_train,Y_train,lambda_L1=lambda_L1)
#Evaluate model
evaluate_model(model,x_test,y_test,X_train,Y_train,adam_loss)

#With Dropout... Can use either L1 or L2 regularization (or neither)
num_epochs = 500
batch_size= 128
weight_decay=0.0 #Enables L2 Regularization (0 ==> L2 Reg not used)
lambda_L1 = 0.0 #Enables L1 Regularization (0 ==> L1 Reg not used)
model = NeuralNetworkDropOut()
loss_function = nn.BCELoss() #Binary Crosss Entropy Loss
adam_optimizer = tch.optim.Adam(model.parameters(),lr= 0.001,weight_decay=weight_decay)
#Train network
adam_loss = train_network(model,adam_optimizer,loss_function,num_epochs,batch_size,X_train,Y_train,lambda_L1=lambda_L1)
#Evaluate model
evaluate_model(model,x_test,y_test,X_train,Y_train,adam_loss)


ModuleNotFoundError: No module named 'sklearn'