<a href="https://colab.research.google.com/github/njgeorge000158/Student-Loan-Repayment-Prediction-with-Deep-Learning/blob/main/student_loans_colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#*******************************************************************************************
 #
 #  File Name:  student_loans_colab.ipynb
 #
 #  File Description:
 #      This interactive Python notebook, student_loans_colab.ipynb, reads a csv file,
 #      student_loans.csv, and uses deep learning methods to process the features
 #      in the provided dataset and create a binary classifier that can predict
 #      whether student loan applicants will default or not.
 #
 #
 #  Date            Description                             Programmer
 #  ----------      ------------------------------------    ------------------
 #  04/15/2024      Initial Development                     Nicholas J. George
 #
 #******************************************************************************************/

from google.colab import drive
drive.mount('/content/gdrive/')

import sys
sys.path.insert(0,'./gdrive/MyDrive/student_loan_prediction')

import os
os.environ['HV_DOC_HTML'] = 'true'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

!apt-get update
!apt install firefox
!pip install -U geckodriver
!pip install -U dataframe_image
!pip install -U selenium
!pip install -U kaleido
!pip install -U hvplot
!pip install -U plotly
!pip install -U panel
!pip install -U bokeh
!pip install -U imblearn
!pip install -U silence-tensorflow
!pip install -U keras-tuner

import hvplot
import hvplot.pandas

import pandas as pd

import holoviews as hv
hv.extension('bokeh')

import logx
logx.set_logs_directory_path('./gdrive/MyDrive/student_loan_prediction/logs')
logx.set_images_directory_path('./gdrive/MyDrive/student_loan_prediction/images')
logx.set_resources_directory_path('./gdrive/MyDrive/credit_risk_classification/resources')
logx.set_models_directory_path('./gdrive/MyDrive/credit_risk_classification/models')
logx.create_directory(logx.MODELS_DIRECTORY_PATH)

import pandasx
pandasx.set_google_colab(True)

import deep_learningx
import logx
import pandasx
import student_loans_constants

import copy
import pickle

import pandas as pd
import tensorflow as tf

import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from silence_tensorflow import silence_tensorflow
silence_tensorflow()

pd.options.mode.chained_assignment = None

Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).
Hit:1 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:2 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [119 kB]
Get:3 http://security.ubuntu.com/ubuntu jammy-security InRelease [110 kB]
Hit:4 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:5 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:6 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:7 https://ppa.launchpadcontent.net/c2d4u.team/c2d4u4.0+/ubuntu jammy InRelease
Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:10 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Get:11 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 Packages [2,037 kB]
Fetched 2,266 kB in 12s (188 

In [2]:
CONSTANT_LOCAL_FILE_NAME = 'student_loans_colab.ipynb'


logx.set_log_mode(False)

logx.set_image_mode(False)


logx.begin_program('student_loans_colab')

Program execution begins...



# <br> **Section 1: Extraction**

## **1.1: Read the CSV data into a Pandas DataFrame**

In [3]:
student_loan_dataframe = pd.read_csv(student_loans_constants.CONSTANT_INPUT_FILE_PATH)

logx.log_write_object(student_loan_dataframe)

## **1.2: Display Student Loan DataFrame**

In [4]:
pandasx.return_formatted_table(student_loan_dataframe, 'Table 1.2: Student Loan Table')

payment_history,location_parameter,stem_degree_score,gpa_ranking,alumni_success,study_major_code,time_to_completion,finance_workshop_score,cohort_ranking,total_loan_score,financial_aid_score,credit_ranking
7.4,0.7,0.0,1.9,0.08,11.0,34.0,1.0,3.51,0.56,9.4,5
7.8,0.88,0.0,2.6,0.1,25.0,67.0,1.0,3.2,0.68,9.8,5
7.8,0.76,0.04,2.3,0.09,15.0,54.0,1.0,3.26,0.65,9.8,5
11.2,0.28,0.56,1.9,0.07,17.0,60.0,1.0,3.16,0.58,9.8,6
7.4,0.7,0.0,1.9,0.08,11.0,34.0,1.0,3.51,0.56,9.4,5
7.4,0.66,0.0,1.8,0.07,13.0,40.0,1.0,3.51,0.56,9.4,5
7.9,0.6,0.06,1.6,0.07,15.0,59.0,1.0,3.3,0.46,9.4,5
7.3,0.65,0.0,1.2,0.07,15.0,21.0,0.99,3.39,0.47,10.0,7
7.8,0.58,0.02,2.0,0.07,9.0,18.0,1.0,3.36,0.57,9.5,7
7.5,0.5,0.36,6.1,0.07,17.0,102.0,1.0,3.35,0.8,10.5,5


# <br> **Section 2: Preprocessing**

## **2.1: Create the labels series (`y`)  from the “spam” column, and then create the features (`X`) DataFrame from the remaining columns.**

### **Separate the Y Variable, the Labels**

In [5]:
y_series = student_loan_dataframe['credit_ranking']

logx.log_write_object(y_series)

In [6]:
y_series.value_counts()

credit_ranking
5    681
6    638
7    199
4     53
8     18
3     10
Name: count, dtype: int64

### **Separate the X Variable, the Features**

In [7]:
x_dataframe = student_loan_dataframe.drop(columns = 'credit_ranking', axis = 1)

logx.log_write_object(x_dataframe)

In [8]:
pandasx.return_formatted_table(x_dataframe, 'Table 2.1: Student Loan Features DataFrame')

payment_history,location_parameter,stem_degree_score,gpa_ranking,alumni_success,study_major_code,time_to_completion,finance_workshop_score,cohort_ranking,total_loan_score,financial_aid_score
7.4,0.7,0.0,1.9,0.08,11.0,34.0,1.0,3.51,0.56,9.4
7.8,0.88,0.0,2.6,0.1,25.0,67.0,1.0,3.2,0.68,9.8
7.8,0.76,0.04,2.3,0.09,15.0,54.0,1.0,3.26,0.65,9.8
11.2,0.28,0.56,1.9,0.07,17.0,60.0,1.0,3.16,0.58,9.8
7.4,0.7,0.0,1.9,0.08,11.0,34.0,1.0,3.51,0.56,9.4
7.4,0.66,0.0,1.8,0.07,13.0,40.0,1.0,3.51,0.56,9.4
7.9,0.6,0.06,1.6,0.07,15.0,59.0,1.0,3.3,0.46,9.4
7.3,0.65,0.0,1.2,0.07,15.0,21.0,0.99,3.39,0.47,10.0
7.8,0.58,0.02,2.0,0.07,9.0,18.0,1.0,3.36,0.57,9.5
7.5,0.5,0.36,6.1,0.07,17.0,102.0,1.0,3.35,0.8,10.5


## **2.2: Split the Data into Training and Testing Datasets by Using `train_test_split`.**

In [9]:
x_train_dataframe, x_test_dataframe, \
y_train_series, y_test_series \
    = train_test_split \
        (x_dataframe, y_series,
         random_state = student_loans_constants.CONSTANT_DL_RANDOM_STATE_1)

In [10]:
logx.log_write_object(x_train_dataframe)

logx.log_write_object(x_test_dataframe)

logx.log_write_object(y_train_series)

logx.log_write_object(y_test_series)

## **2.3: Use the StandardScaler to Scale the X Variables**

### **Create a StandardScaler Instance**

In [11]:
current_standard_scalar = StandardScaler()

### **Fit the StandardScaler**

In [12]:
x_standard_scalar = current_standard_scalar.fit(x_train_dataframe)

### **Scale the Data**

In [13]:
x_train_scaled_nparray = x_standard_scalar.transform(x_train_dataframe)

logx.log_write_object(x_train_scaled_nparray)

In [14]:
x_test_scaled_nparray = x_standard_scalar.transform(x_test_dataframe)

logx.log_write_object(x_test_scaled_nparray)

# <br> **Section 3: Compile, Train, Evaluate, and Export the Model**

## **3.1: Compile Model**

### **Model Definition**

In [15]:
number_input_features_integer = len(x_train_scaled_nparray[0])

logx.print_and_log_text \
    ('\033[1m'
     + 'The number of inputs (features) in the model is {:,}.' \
         .format(number_input_features_integer)
     + '\033[0m')

[1mThe number of inputs (features) in the model is 11.[0m


In [16]:
input_layer_units_integer = 97

input_layer_activation_string = 'elu'

logx.print_and_log_text \
    ('\033[1m'
     + 'The number of nodes in the input layer is {:,}.' \
         .format(input_layer_units_integer)
     + '\033[0m')

[1mThe number of nodes in the input layer is 97.[0m


In [17]:
hidden_layer_units_integer = 46

hidden_layer_activation_string = 'elu'

logx.print_and_log_text \
    ('\033[1m'
     + 'The number of nodes in the hidden layer is {:,}.' \
         .format(hidden_layer_units_integer)
     + '\033[0m')

[1mThe number of nodes in the hidden layer is 46.[0m


In [18]:
output_layer_units_integer = 1

output_layer_activation_string = 'linear'

logx.print_and_log_text \
    ('\033[1m'
     + 'The number of nodes in the output layer is {:,}.' \
         .format(output_layer_units_integer)
     + '\033[0m')

[1mThe number of nodes in the output layer is 1.[0m


### **Instantiate the Model**

In [19]:
neural_network_sequential_model = tf.keras.models.Sequential()

### **Layers**

In [20]:
neural_network_sequential_model.add \
    (tf.keras.layers.Dense \
         (units = input_layer_units_integer,
          activation = input_layer_activation_string,
          input_dim = number_input_features_integer))

neural_network_sequential_model.add(tf.keras.layers.Dropout(0.064))

neural_network_sequential_model.add \
    (tf.keras.layers.Dense \
         (units = hidden_layer_units_integer,
          activation = hidden_layer_activation_string))

neural_network_sequential_model.add(tf.keras.layers.Dropout(0.138))

neural_network_sequential_model.add \
    (tf.keras.layers.Dense \
         (units = output_layer_units_integer,
          activation = output_layer_activation_string))

### **Model Summary**

In [21]:
neural_network_sequential_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 97)                1164      
                                                                 
 dropout (Dropout)           (None, 97)                0         
                                                                 
 dense_1 (Dense)             (None, 46)                4508      
                                                                 
 dropout_1 (Dropout)         (None, 46)                0         
                                                                 
 dense_2 (Dense)             (None, 1)                 47        
                                                                 
Total params: 5719 (22.34 KB)
Trainable params: 5719 (22.34 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


### **Compile**

In [22]:
neural_network_sequential_model.compile \
    (loss = 'mean_squared_error',
     optimizer = tf.keras.optimizers.Adam(learning_rate = 0.0024865),
     metrics = ['mse'])

## **3.2: Fit and Train Model**

In [23]:
earlystopping_callback \
    = tf.keras.callbacks.EarlyStopping  \
        (monitor = 'val_mse', mode = 'min', patience = 100, restore_best_weights = True)

neural_network_sequential_model \
    .fit \
        (x_train_scaled_nparray,
         y_train_series.values,
         epochs = 1000,
         validation_data = (x_test_scaled_nparray, y_test_series.values),
         callbacks = [earlystopping_callback])

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<keras.src.callbacks.History at 0x7d8656021e40>

## **3.3: Evaluate Model**

In [24]:
model_loss_float, model_mse_float \
    = neural_network_sequential_model.evaluate(x_test_scaled_nparray, y_test_series.values, verbose = 2)

logx.print_and_log_text \
    (f'\nModel Loss: {round(model_loss_float * 100, 2)}%, '
     + f'Model MSE: {round(model_mse_float * 100, 2)}%')

13/13 - 0s - loss: 0.3395 - mse: 0.3395 - 47ms/epoch - 4ms/step

Model Loss: 33.95%, Model MSE: 33.95%


## **3.4: Save and Export Model**

In [25]:
neural_network_sequential_model.save(student_loans_constants.CONSTANT_NN_MODEL_FILE_PATH)

# <br> **Section 4: Predict Loan Repayment Success**

## **4.1: Reload Model**

In [26]:
reloaded_neural_network_sequential_model \
    = tf.keras.models.load_model \
        (student_loans_constants.CONSTANT_NN_MODEL_FILE_PATH)

## **4.2: Predictions**

In [27]:
predictions_nparray \
    = reloaded_neural_network_sequential_model.predict \
        (x_test_scaled_nparray).round().astype('int32')



## **4.3: Compare Predictions and Actual Values**

In [28]:
comparison_dataframe \
    = pd.DataFrame({'predictions': predictions_nparray.ravel(), 'actual': y_test_series.values})

In [29]:
pandasx.return_formatted_table \
    (comparison_dataframe, 'Table 4.3: Model Predictions vs. Actual Values')

predictions,actual
6,6
6,6
5,5
6,6
6,6
6,7
5,4
5,5
5,5
5,5


In [30]:
# logx.end_program()