# Student Loan Risk with Deep Learning

In [113]:
# Imports
import pandas as pd
from pathlib import Path
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

---

## Prepare the data to be used on a neural network model

### Step 1: Read the `student_loans.csv` file into a Pandas DataFrame. Review the DataFrame, looking for columns that could eventually define your features and target variables.   

In [114]:
# Read the csv into a Pandas DataFrame
file_path = "https://static.bc-edx.com/mbc/ai/m6/datasets/student_loans.csv"


# Review the DataFrame
df = pd.read_csv(file_path)
df

Unnamed: 0,payment_history,location_parameter,stem_degree_score,gpa_ranking,alumni_success,study_major_code,time_to_completion,finance_workshop_score,cohort_ranking,total_loan_score,financial_aid_score,credit_ranking
0,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
1,7.8,0.880,0.00,2.6,0.098,25.0,67.0,0.99680,3.20,0.68,9.8,5
2,7.8,0.760,0.04,2.3,0.092,15.0,54.0,0.99700,3.26,0.65,9.8,5
3,11.2,0.280,0.56,1.9,0.075,17.0,60.0,0.99800,3.16,0.58,9.8,6
4,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
...,...,...,...,...,...,...,...,...,...,...,...,...
1594,6.2,0.600,0.08,2.0,0.090,32.0,44.0,0.99490,3.45,0.58,10.5,5
1595,5.9,0.550,0.10,2.2,0.062,39.0,51.0,0.99512,3.52,0.76,11.2,6
1596,6.3,0.510,0.13,2.3,0.076,29.0,40.0,0.99574,3.42,0.75,11.0,6
1597,5.9,0.645,0.12,2.0,0.075,32.0,44.0,0.99547,3.57,0.71,10.2,5


In [115]:
# Review the data types associated with the columns
df.dtypes

payment_history           float64
location_parameter        float64
stem_degree_score         float64
gpa_ranking               float64
alumni_success            float64
study_major_code          float64
time_to_completion        float64
finance_workshop_score    float64
cohort_ranking            float64
total_loan_score          float64
financial_aid_score       float64
credit_ranking              int64
dtype: object

### Step 2: Using the preprocessed data, create the features (`X`) and target (`y`) datasets. The target dataset should be defined by the preprocessed DataFrame column “credit_ranking”. The remaining columns should define the features dataset.

In [116]:
# Define the target set y using the credit_ranking column
y = df["credit_ranking"].values

# Display a sample of y
y[:5]

array([5, 5, 5, 6, 5], dtype=int64)

In [117]:
# Define features set X by selecting all columns but credit_ranking
x = df.drop(columns="credit_ranking").copy()

# Review the features DataFrame
x.head()

Unnamed: 0,payment_history,location_parameter,stem_degree_score,gpa_ranking,alumni_success,study_major_code,time_to_completion,finance_workshop_score,cohort_ranking,total_loan_score,financial_aid_score
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4


### Step 3: Split the features and target sets into training and testing datasets.


In [118]:
# Split the preprocessed data into a training and testing dataset
# Assign the function a random_state equal to 1
xTrain, xTest, yTrain, yTest = train_test_split(x,y, random_state=1)

### Step 4: Use scikit-learn's `StandardScaler` to scale the features data.

In [119]:
# Create a StandardScaler instance
xScaler = StandardScaler()
xScaler.fit(xTrain)
# Fit the scaler to the features training dataset
xTrainScaled = xScaler.transform(xTrain)

# Fit the scaler to the features training dataset
xTestScaled = xScaler.transform(xTest)

---

## Compile and Evaluate a Model Using a Neural Network

### Step 1: Create a deep neural network by assigning the number of input features, the number of layers, and the number of neurons on each layer using Tensorflow’s Keras.

> **Hint** You can start with a two-layer deep neural network model that uses the `relu` activation function for both layers.


In [120]:
# Define the the number of inputs (features) to the model
number_inputs = 11
# Review the number of features
sum(x.columns.value_counts())

11

In [121]:
# Define the number of neurons in the output layer
number_classes = 1

In [122]:
# Define the number of hidden nodes for the first hidden layer
hidden_nodes_layer1 = 10

# Review the number hidden nodes in the first layer
hidden_nodes_layer1

10

In [123]:
# Define the number of hidden nodes for the second hidden layer
hidden_nodes_layer2 = 9

# Review the number hidden nodes in the second layer
hidden_nodes_layer2

9

In [124]:
# Create the Sequential model instance
neuron = Sequential()

In [125]:
# Add the first hidden layer
neuron.add(Dense(units = hidden_nodes_layer1, input_dim = number_inputs, activation = "relu"))


In [126]:
# Add the second hidden layer
neuron.add(Dense(units = hidden_nodes_layer2,  activation = "relu"))


In [127]:
# Add the output layer to the model specifying the number of output neurons and activation function
neuron.add(Dense(units = number_classes, activation = "linear"))

In [128]:
# Display the Sequential model summary
neuron.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_10 (Dense)            (None, 10)                120       
                                                                 
 dense_11 (Dense)            (None, 9)                 99        
                                                                 
 dense_12 (Dense)            (None, 1)                 10        
                                                                 
Total params: 229 (916.00 Byte)
Trainable params: 229 (916.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


### Step 2: Compile and fit the model using the `mse` loss function, the `adam` optimizer, and the `mse` evaluation metric.


In [129]:
# Compile the Sequential model
neuron.compile(loss = "mean_squared_error", optimizer= "adam", metrics = ["mse"])

In [130]:
# Fit the model using 50 epochs and the training data
model = neuron.fit(xTrainScaled, yTrain, epochs= 100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100


Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
E

### Step 3: Evaluate the model using the test data to determine the model’s loss and accuracy.


In [131]:
# Evaluate the model loss and accuracy metrics using the evaluate method and the test data
modelLoss, modelMSE= neuron.evaluate(xTestScaled, yTest, verbose= 2)

# Display the model loss and accuracy results
print(f"Model Loss : {modelLoss}    Model MSE : {modelMSE}")

13/13 - 0s - loss: 0.4111 - mse: 0.4111 - 106ms/epoch - 8ms/step
Model Loss : 0.41105231642723083    Model MSE : 0.41105231642723083


### Step 4: Save and export your model to an HDF5 file, and name the file `student_loans.h5`.


In [132]:
# Set the model's file path
file_path = Path("saved_models/student_loans.h5")

# Export your model to a HDF5 file
neuron.save(file_path)

  saving_api.save_model(


---
## Predict Loan Repayment Success by Using your Neural Network Model

### Step 1: Reload your saved model.

In [133]:
# Set the model's file path
file_path = Path("saved_models/student_loans.h5")

# Load the model to a new object
neuron_imported = tf.keras.models.load_model(file_path)

### Step 2: Make predictions on the testing data.

In [134]:
# Make predictions on the testing data
predictions = neuron_imported.predict(xTestScaled).round().astype("int32")



### Step 3: Create a DataFrame to compare the predictions with the actual values.

In [135]:
# Create a DataFrame to compare the predictions with the actual values
results = pd.DataFrame({"predictions" : predictions.ravel(), "actual": yTest})



### Step 4: Display a sample of the DataFrame you created in step 3.

In [136]:
# Display sample data
results.head(20)

Unnamed: 0,predictions,actual
0,5,5
1,5,6
2,6,6
3,6,6
4,6,6
5,6,6
6,6,6
7,5,5
8,6,5
9,5,5
