<a href="https://colab.research.google.com/github/njgeorge000158/Student-Loan-Repayment-Prediction-with-Deep-Learning/blob/main/student_loans_hyperparameters_optimization_colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#*******************************************************************************************
 #
 #  File Name:  student_loans_hyperparameters_optimization_colab.ipynb
 #
 #  File Description:
 #      This interactive Python notebook,
 #      student_loans_hyperparameters_optimization_colab.ipynb, reads a csv file,
 #      student_loans.csv, and uses deep learning methods to find the optimal model
 #      to process the features in the provided dataset and create a binary classifier
 #      that can predict whether student loan applicants will default or not.
 #
 #
 #  Date            Description                             Programmer
 #  ----------      ------------------------------------    ------------------
 #  04/15/2024      Initial Development                     Nicholas J. George
 #
 #******************************************************************************************/

from google.colab import drive
drive.mount('/content/gdrive/')

import sys
sys.path.insert(0,'./gdrive/MyDrive/student_loan_prediction')

import os
os.environ['HV_DOC_HTML'] = 'true'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

!apt-get update
!apt install firefox
!pip install -U geckodriver
!pip install -U dataframe_image
!pip install -U selenium
!pip install -U kaleido
!pip install -U hvplot
!pip install -U plotly
!pip install -U panel
!pip install -U bokeh
!pip install -U imblearn
!pip install -U silence-tensorflow
!pip install -U keras-tuner

import hvplot
import hvplot.pandas

import holoviews as hv
hv.extension('bokeh')

import logx
logx.set_logs_directory_path('./gdrive/MyDrive/student_loan_prediction/logs')
logx.set_images_directory_path('./gdrive/MyDrive/student_loan_prediction/images')

import pandasx
pandasx.set_google_colab(True)

import deep_learningx
import student_loans_constants

import copy
import pickle

import pandas as pd
import tensorflow as tf

import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from silence_tensorflow import silence_tensorflow
silence_tensorflow()

pd.options.mode.chained_assignment = None

Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).
Hit:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:3 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:4 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
Hit:5 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:6 http://security.ubuntu.com/ubuntu jammy-security InRelease
Hit:7 https://ppa.launchpadcontent.net/c2d4u.team/c2d4u4.0+/ubuntu jammy InRelease
Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:10 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Reading package lists... Done
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
firef

In [2]:
CONSTANT_LOCAL_FILE_NAME = 'student_loans_hyperparameters_optimization_colab'


logx.set_log_mode(False)

logx.set_image_mode(False)


logx.begin_program('student_loans_hyperparameters_optimization_colab')

Program execution begins...



# <br> **Section 1: Extraction**

## **1.1: Read the CSV data into a Pandas DataFrame**

In [3]:
student_loan_dataframe = pd.read_csv(student_loans_constants.CONSTANT_INPUT_FILE_PATH)

logx.log_write_object(student_loan_dataframe)

## **1.2: Display Student Loan DataFrame**

In [4]:
pandasx.return_formatted_table(student_loan_dataframe, 'Table 1.2: Student Loan Table')

payment_history,location_parameter,stem_degree_score,gpa_ranking,alumni_success,study_major_code,time_to_completion,finance_workshop_score,cohort_ranking,total_loan_score,financial_aid_score,credit_ranking
7.4,0.7,0.0,1.9,0.08,11.0,34.0,1.0,3.51,0.56,9.4,5
7.8,0.88,0.0,2.6,0.1,25.0,67.0,1.0,3.2,0.68,9.8,5
7.8,0.76,0.04,2.3,0.09,15.0,54.0,1.0,3.26,0.65,9.8,5
11.2,0.28,0.56,1.9,0.07,17.0,60.0,1.0,3.16,0.58,9.8,6
7.4,0.7,0.0,1.9,0.08,11.0,34.0,1.0,3.51,0.56,9.4,5
7.4,0.66,0.0,1.8,0.07,13.0,40.0,1.0,3.51,0.56,9.4,5
7.9,0.6,0.06,1.6,0.07,15.0,59.0,1.0,3.3,0.46,9.4,5
7.3,0.65,0.0,1.2,0.07,15.0,21.0,0.99,3.39,0.47,10.0,7
7.8,0.58,0.02,2.0,0.07,9.0,18.0,1.0,3.36,0.57,9.5,7
7.5,0.5,0.36,6.1,0.07,17.0,102.0,1.0,3.35,0.8,10.5,5


# <br> **Section 2: Preprocessing**

## **2.1: Create the labels series (`y`)  from the “spam” column, and then create the features (`X`) DataFrame from the remaining columns.**

### **Separate the Y Variable, the Labels**

In [5]:
y_series = student_loan_dataframe['credit_ranking']

logx.log_write_object(y_series)

In [6]:
y_series.value_counts()

credit_ranking
5    681
6    638
7    199
4     53
8     18
3     10
Name: count, dtype: int64

### **Separate the X Variable, the Features**

In [7]:
x_dataframe = student_loan_dataframe.drop(columns = 'credit_ranking', axis = 1)

logx.log_write_object(x_dataframe)

In [8]:
pandasx.return_formatted_table(x_dataframe, 'Table 2.1: Student Loan Features DataFrame')

payment_history,location_parameter,stem_degree_score,gpa_ranking,alumni_success,study_major_code,time_to_completion,finance_workshop_score,cohort_ranking,total_loan_score,financial_aid_score
7.4,0.7,0.0,1.9,0.08,11.0,34.0,1.0,3.51,0.56,9.4
7.8,0.88,0.0,2.6,0.1,25.0,67.0,1.0,3.2,0.68,9.8
7.8,0.76,0.04,2.3,0.09,15.0,54.0,1.0,3.26,0.65,9.8
11.2,0.28,0.56,1.9,0.07,17.0,60.0,1.0,3.16,0.58,9.8
7.4,0.7,0.0,1.9,0.08,11.0,34.0,1.0,3.51,0.56,9.4
7.4,0.66,0.0,1.8,0.07,13.0,40.0,1.0,3.51,0.56,9.4
7.9,0.6,0.06,1.6,0.07,15.0,59.0,1.0,3.3,0.46,9.4
7.3,0.65,0.0,1.2,0.07,15.0,21.0,0.99,3.39,0.47,10.0
7.8,0.58,0.02,2.0,0.07,9.0,18.0,1.0,3.36,0.57,9.5
7.5,0.5,0.36,6.1,0.07,17.0,102.0,1.0,3.35,0.8,10.5


## **2.2: Split the Data into Training and Testing Datasets by Using `train_test_split`.**

In [9]:
x_train_dataframe, x_test_dataframe, \
y_train_series, y_test_series \
    = train_test_split \
        (x_dataframe, y_series,
         random_state = student_loans_constants.CONSTANT_DL_RANDOM_STATE_1)

In [10]:
logx.log_write_object(x_train_dataframe)

logx.log_write_object(x_test_dataframe)

logx.log_write_object(y_train_series)

logx.log_write_object(y_test_series)

## **2.3: Use the StandardScaler to Scale the X Variables**

### **Create a StandardScaler Instance**

In [11]:
current_standard_scalar = StandardScaler()

### **Fit the StandardScaler**

In [12]:
x_standard_scalar = current_standard_scalar.fit(x_train_dataframe)

### **Scale the Data**

In [13]:
x_train_scaled_nparray = x_standard_scalar.transform(x_train_dataframe)

logx.log_write_object(x_train_scaled_nparray)

In [14]:
x_test_scaled_nparray = x_standard_scalar.transform(x_test_dataframe)

logx.log_write_object(x_test_scaled_nparray)

# <br> **Section 3: Find the Optimal Deep Learning Model**

## **3.1: Define and Set Hyperparameter Ranges**

In [15]:
hyperparameters_dictionary \
    = {'tuner_type': 'grid_search',
       'best_model_count': 3,
       'hyperband_iterations': 2,
       'patience': 100,
       'max_epochs': 1000,
       'restore_best_weights': True,
       'activation_choice_list': ['elu'],
       'input_features': len(x_train_scaled_nparray[0]),
       'objective': 'val_mse',
       'objective_direction': 'min',
       'input_layer_units_range': (97, 97),
       'input_units_step': 1,
       'input_dropout_range': (0.064, 0.064),
       'input_dropout_step': 0.001,
       'input_dropout_sampling': 'linear',
       'hidden_layers': 1,
       'hidden_layer_units_range_list': \
           [(46, 46), (20, 29), (10, 19), (2, 9), (8, 8)],
       'hidden_units_step': 1,
       'hidden_dropout_range_list': \
           [(0.138, 0.138), (0.0, 0.0), (0.0, 0.0), (0.0, 0.0), (0.0, 0.0)],
       'hidden_dropout_step': 0.001,
       'hidden_dropout_sampling': 'linear',
       'learning_rate_range': (24865e-7, 24865e-7),
       'learning_rate_step': 1e-7,
       'learning_sampling': 'linear',
       'output_activation_choice_list': ['linear'],
       'output_layer_units': 1,
       'loss': 'mean_squared_error',
       'optimizer': 'adam',
       'metrics': 'mse'}

deep_learningx.set_hyperparameters_dictionary(hyperparameters_dictionary)

## **3.2: Find Optimal Model Hyperparameters**

In [16]:
best_models_dictionary_list \
    = deep_learningx.return_best_nn_sequential_model_hyperparameters \
        (x_train_scaled_nparray, x_test_scaled_nparray, \
         y_train_series.values, y_test_series.values)

Trial 1 Complete [00h 00m 54s]
val_mse: 0.34883788228034973

Best val_mse So Far: 0.34883788228034973
Total elapsed time: 00h 00m 54s
13/13 - 0s - loss: 0.3488 - mse: 0.3488 - 219ms/epoch - 17ms/step
0.34883788228034973 0.34883788228034973 [<keras_tuner.src.engine.hyperparameters.hyperparameters.HyperParameters object at 0x7c6dbd4aeef0>]


In [17]:
list_length_integer = len(best_models_dictionary_list)

for index, model in enumerate(best_models_dictionary_list):

    logx.print_and_log_text \
        (f'MODEL #{index + 1}\n'
         + 'objective: ' + '{:.1f}%\n'.format(model['objective'] * 100) \
         + 'loss: ' + '{:.1f}%\n'.format(model['loss'] * 100) \
         + 'hyperparameters: ' + f"{model['hyperparameters']}\n\n")

MODEL #1
objective: 34.9%
loss: 34.9%
hyperparameters: {'activation': 'elu', 'first_units': 97, 'input_dropout_rate': 0.064, 'units_1': 46, 'hidden_dropout_rate': 0.138, 'learning_rate': 0.0024865}




In [18]:
# logx.end_program()