In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf

#  Import and read the charity_data.csv.
import pandas as pd
application_df = pd.read_csv("/content/drive/MyDrive/datasets/mainDS.csv")
application_df.head()

Unnamed: 0,ID,Loan Amount,Funded Amount,Funded Amount Investor,Term,Batch Enrolled,Interest Rate,Grade,Sub Grade,Employment Duration,...,Recoveries,Collection Recovery Fee,Collection 12 months Medical,Application Type,Last week Pay,Accounts Delinquent,Total Collection Amount,Total Current Balance,Total Revolving Credit Limit,Loan Status
0,65087372,10000,32236,12329.36286,59,BAT2522922,11.135007,B,C4,MORTGAGE,...,2.498291,0.793724,0,INDIVIDUAL,49,0,31,311301,6619,0
1,1450153,3609,11940,12191.99692,59,BAT1586599,12.237563,C,D3,RENT,...,2.377215,0.974821,0,INDIVIDUAL,109,0,53,182610,20885,0
2,1969101,28276,9311,21603.22455,59,BAT2136391,12.545884,F,D4,MORTGAGE,...,4.316277,1.020075,0,INDIVIDUAL,66,0,34,89801,26155,0
3,6651430,11170,6954,17877.15585,59,BAT2428731,16.731201,C,C3,MORTGAGE,...,0.10702,0.749971,0,INDIVIDUAL,39,0,40,9189,60214,0
4,14354669,16890,13226,13539.92667,59,BAT5341619,15.0083,C,D4,MORTGAGE,...,1294.818751,0.368953,0,INDIVIDUAL,18,0,430,126029,22579,0


In [3]:
application_df.keys()

Index(['ID', 'Loan Amount', 'Funded Amount', 'Funded Amount Investor', 'Term',
       'Batch Enrolled', 'Interest Rate', 'Grade', 'Sub Grade',
       'Employment Duration', 'Home Ownership', 'Verification Status',
       'Payment Plan', 'Loan Title', 'Debit to Income',
       'Delinquency - two years', 'Inquires - six months', 'Open Account',
       'Public Record', 'Revolving Balance', 'Revolving Utilities',
       'Total Accounts', 'Initial List Status', 'Total Received Interest',
       'Total Received Late Fee', 'Recoveries', 'Collection Recovery Fee',
       'Collection 12 months Medical', 'Application Type', 'Last week Pay',
       'Accounts Delinquent', 'Total Collection Amount',
       'Total Current Balance', 'Total Revolving Credit Limit', 'Loan Status'],
      dtype='object')

In [4]:
# Drop the non-beneficial ID columns, 'ID'.
application_df = application_df.drop({'ID'},axis=1)
application_df.head()

Unnamed: 0,Loan Amount,Funded Amount,Funded Amount Investor,Term,Batch Enrolled,Interest Rate,Grade,Sub Grade,Employment Duration,Home Ownership,...,Recoveries,Collection Recovery Fee,Collection 12 months Medical,Application Type,Last week Pay,Accounts Delinquent,Total Collection Amount,Total Current Balance,Total Revolving Credit Limit,Loan Status
0,10000,32236,12329.36286,59,BAT2522922,11.135007,B,C4,MORTGAGE,176346.6267,...,2.498291,0.793724,0,INDIVIDUAL,49,0,31,311301,6619,0
1,3609,11940,12191.99692,59,BAT1586599,12.237563,C,D3,RENT,39833.921,...,2.377215,0.974821,0,INDIVIDUAL,109,0,53,182610,20885,0
2,28276,9311,21603.22455,59,BAT2136391,12.545884,F,D4,MORTGAGE,91506.69105,...,4.316277,1.020075,0,INDIVIDUAL,66,0,34,89801,26155,0
3,11170,6954,17877.15585,59,BAT2428731,16.731201,C,C3,MORTGAGE,108286.5759,...,0.10702,0.749971,0,INDIVIDUAL,39,0,40,9189,60214,0
4,16890,13226,13539.92667,59,BAT5341619,15.0083,C,D4,MORTGAGE,44234.82545,...,1294.818751,0.368953,0,INDIVIDUAL,18,0,430,126029,22579,0


In [5]:
# Determine the number of unique values in each column.
application_df.nunique()

Loan Amount                     27871
Funded Amount                   24912
Funded Amount Investor          70341
Term                                3
Batch Enrolled                     41
Interest Rate                   70350
Grade                               7
Sub Grade                          35
Employment Duration                 3
Home Ownership                  70357
Verification Status                 3
Payment Plan                        1
Loan Title                        109
Debit to Income                 70356
Delinquency - two years             9
Inquires - six months               6
Open Account                       36
Public Record                       5
Revolving Balance               20862
Revolving Utilities             70361
Total Accounts                     69
Initial List Status                 2
Total Received Interest         70354
Total Received Late Fee         70279
Recoveries                      70282
Collection Recovery Fee         70210
Collection 1

In [6]:
# Look at Loan Title value counts for binning
app_type_count = application_df.value_counts('Loan Title')
app_type_count

Loan Title
Credit card refinancing    32061
Debt consolidation         25937
Debt Consolidation          3671
Other                       2565
Home improvement            2308
                           ...  
CC                             6
Personal loan                  5
Getting Ahead                  5
bills                          4
Credit                         4
Length: 109, dtype: int64

In [7]:
# Choose a cutoff value and create a list of application types to be replaced
# use the variable name `application_types_to_replace`
application_types_to_replace = list(app_type_count[app_type_count<10].index)

# Replace in dataframe
for app in application_types_to_replace:
    application_df['Loan Title'] = application_df['Loan Title'].replace(app,"Other")

# Check to make sure binning was successful
application_df['Loan Title'].value_counts()

Credit card refinancing    32061
Debt consolidation         25937
Debt Consolidation          3671
Other                       2674
Home improvement            2308
                           ...  
CONSOLIDATE                   10
vacation                      10
conso                         10
Credit Loan                   10
cards                         10
Name: Loan Title, Length: 93, dtype: int64

In [8]:
class_count = application_df.value_counts('Sub Grade')
class_count

Sub Grade
B4    4660
C1    4374
B3    4178
A5    3694
B2    3689
B5    3562
D1    3440
C4    3368
C2    3339
C3    3248
B1    3046
C5    2572
A4    2372
D4    2131
D2    2056
D5    2042
A2    1933
D3    1897
E2    1824
A3    1760
A1    1416
E3    1366
E1    1342
E4    1157
F2     990
F1     861
E5     810
F5     608
F3     602
F4     468
G2     466
G1     385
G5     300
G3     251
G4     159
dtype: int64

In [9]:
# Convert categorical data to numeric with `pd.get_dummies`
application_dummies = pd.get_dummies(application_df)
application_dummies.head()


Unnamed: 0,Loan Amount,Funded Amount,Funded Amount Investor,Term,Interest Rate,Home Ownership,Debit to Income,Delinquency - two years,Inquires - six months,Open Account,...,Loan Title_loan1,Loan Title_pay off bills,Loan Title_payoff,Loan Title_personal,Loan Title_refi,Loan Title_vacation,Initial List Status_f,Initial List Status_w,Application Type_INDIVIDUAL,Application Type_JOINT
0,10000,32236,12329.36286,59,11.135007,176346.6267,16.284758,1,0,13,...,0,0,0,0,0,0,0,1,1,0
1,3609,11940,12191.99692,59,12.237563,39833.921,15.412409,0,0,12,...,0,0,0,0,0,0,1,0,1,0
2,28276,9311,21603.22455,59,12.545884,91506.69105,28.137619,0,0,14,...,0,0,0,0,0,0,0,1,1,0
3,11170,6954,17877.15585,59,16.731201,108286.5759,18.04373,1,0,7,...,0,0,0,0,0,0,0,1,1,0
4,16890,13226,13539.92667,59,15.0083,44234.82545,17.209886,1,3,13,...,0,0,0,0,0,0,0,1,1,0


In [10]:
application_dummies.keys()

Index(['Loan Amount', 'Funded Amount', 'Funded Amount Investor', 'Term',
       'Interest Rate', 'Home Ownership', 'Debit to Income',
       'Delinquency - two years', 'Inquires - six months', 'Open Account',
       ...
       'Loan Title_loan1', 'Loan Title_pay off bills', 'Loan Title_payoff',
       'Loan Title_personal', 'Loan Title_refi', 'Loan Title_vacation',
       'Initial List Status_f', 'Initial List Status_w',
       'Application Type_INDIVIDUAL', 'Application Type_JOINT'],
      dtype='object', length=212)

In [11]:
# Split our preprocessed data into our features and target arrays
X = application_dummies.drop('Loan Status', axis=1).values
y = application_dummies['Loan Status'].values


# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 42)

In [12]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [13]:
# Get the input shape
X_train_scaled.shape

(52774, 211)

## Compile, Train and Evaluate the Model

In [14]:
from keras.layers import Dense, Dropout
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
input_features = X_train_scaled.shape[1]

hidden_node1 = 100
hidden_node2 = 100
#hidden_node3 = 256


nn_model_1 = tf.keras.models.Sequential()

# First hidden layer
nn_model_1.add(tf.keras.layers.Dense(units=hidden_node1, activation='relu', input_dim = input_features))

# Second hidden layer
nn_model_1.add(tf.keras.layers.Dense(units=hidden_node2, activation='relu'))

# Third hidden layer
#nn_model_1.add(tf.keras.layers.Dense(units=hidden_node3, activation='relu'))
#nn_model_1.add(Dropout(0.5))

# Output layer
nn_model_1.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

# Check the structure of the model
nn_model_1.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 100)               21200     
                                                                 
 dense_1 (Dense)             (None, 100)               10100     
                                                                 
 dense_2 (Dense)             (None, 1)                 101       
                                                                 
Total params: 31401 (122.66 KB)
Trainable params: 31401 (122.66 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [15]:
# Compile the model
nn_model_1.compile(loss='binary_crossentropy', optimizer='Adam', metrics=['accuracy'])

In [16]:
import os
from tensorflow.keras.callbacks import ModelCheckpoint

# Define checkpoints variables
os.makedirs('/content/drive/MyDrive/Colab_Notebooks/MyModels/checkpoints/', exist_ok=True)
checkpoint_path= '/content/drive/MyDrive/Colab_Notebooks/MyModels/checkpoints/weights.{epoch}.hdf5'

# Create callback
cp_callback = ModelCheckpoint(filepath=checkpoint_path,
                              verbose=1,
                              save_weights_only=True,
                              save_freq='epoch',
                              period=5)



In [17]:
fit_model_1 = nn_model_1.fit(X_train_scaled, y_train, epochs=55, batch_size=128, validation_split=0.2, callbacks=[cp_callback])

Epoch 1/55
Epoch 2/55
Epoch 3/55
Epoch 4/55
Epoch 5/55
Epoch 5: saving model to /content/drive/MyDrive/Colab_Notebooks/MyModels/checkpoints/weights.5.hdf5
Epoch 6/55
Epoch 7/55
Epoch 8/55
Epoch 9/55
Epoch 10/55
Epoch 10: saving model to /content/drive/MyDrive/Colab_Notebooks/MyModels/checkpoints/weights.10.hdf5
Epoch 11/55
Epoch 12/55
Epoch 13/55
Epoch 14/55
Epoch 15/55
Epoch 15: saving model to /content/drive/MyDrive/Colab_Notebooks/MyModels/checkpoints/weights.15.hdf5
Epoch 16/55
Epoch 17/55
Epoch 18/55
Epoch 19/55
Epoch 20/55
Epoch 20: saving model to /content/drive/MyDrive/Colab_Notebooks/MyModels/checkpoints/weights.20.hdf5
Epoch 21/55
Epoch 22/55
Epoch 23/55
Epoch 24/55
Epoch 25/55
Epoch 25: saving model to /content/drive/MyDrive/Colab_Notebooks/MyModels/checkpoints/weights.25.hdf5
Epoch 26/55
Epoch 27/55
Epoch 28/55
Epoch 29/55
Epoch 30/55
Epoch 30: saving model to /content/drive/MyDrive/Colab_Notebooks/MyModels/checkpoints/weights.30.hdf5
Epoch 31/55
Epoch 32/55
Epoch 33/55
Epo

In [18]:
df_training_record = pd.DataFrame(fit_model_1.history)
df_training_record


Unnamed: 0,loss,accuracy,val_loss,val_accuracy
0,0.320991,0.908904,0.307618,0.911322
1,0.298721,0.910396,0.305836,0.911227
2,0.291678,0.910372,0.30948,0.911416
3,0.28451,0.910325,0.312468,0.911227
4,0.277373,0.910704,0.318162,0.910848
5,0.26996,0.910988,0.326003,0.910943
6,0.260353,0.912101,0.333943,0.910469
7,0.250855,0.913309,0.348529,0.909995
8,0.23854,0.915654,0.35316,0.903932
9,0.228563,0.918544,0.361539,0.904879


In [20]:
#Save Training History for documentation
from pathlib import Path
filepath = Path('/content/drive/MyDrive/Colab Notebooks/Saved Records/df_training_record.csv')
filepath.parent.mkdir(parents=True, exist_ok=True)
df_training_record.to_csv(filepath)

In [22]:
import numpy as np
from google.colab import autoviz

def value_plot(df, y, figscale=1):
  from matplotlib import pyplot as plt
  df[y].plot(kind='line', figsize=(8 * figscale, 4 * figscale), title=y)
  plt.gca().spines[['top', 'right']].set_visible(False)
  plt.tight_layout()
  return autoviz.MplChart.from_current_mpl_state()

chart = value_plot(df_training_record, *['loss'], **{})
chart

In [24]:
import numpy as np
from google.colab import autoviz

def value_plot(df, y, figscale=1):
  from matplotlib import pyplot as plt
  df[y].plot(kind='line', figsize=(8 * figscale, 4 * figscale), title=y)
  plt.gca().spines[['top', 'right']].set_visible(False)
  plt.tight_layout()
  return autoviz.MplChart.from_current_mpl_state()

chart = value_plot(df_training_record, *['accuracy'], **{})
chart

In [25]:
# Save H5 model files to drive
nn_model_1.save('/content/drive/MyDrive/Colab_Notebooks/Saved_Models/raph-model.h5')
print("Successfully saved H5File model to path")


  saving_api.save_model(


Successfully saved H5File model to path


In [173]:
# install tfjs if not yet installed
# !pip install tensorflowjs


In [27]:
#!mkdir MyModels
!tensorflowjs_converter --input_format keras \/content/drive/MyDrive/Colab_Notebooks/Saved_Models/raph-model.h5 \content/drive/MyDrive/Colab_Notebooks/MyModels/

2023-10-04 04:29:10.222082: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-10-04 04:29:10.222148: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-10-04 04:29:10.222193: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [28]:
#Looking up the shape of X_test
X_test.shape

(17592, 211)

In [29]:
#randomize number base on the length of X_test
from random import randint

randomizer = randint(1, len(X_test))
print(randomizer)

3709


In [30]:
#make 1 prediction based on randomized X_test Row
prediction =  nn_model_1.predict(X_test[[randomizer]])
print("X_test row %s=%s, Predicted=%s" % (randomizer,X_test[[randomizer]], prediction[0]))

X_test row 3709=[[1.76480000e+04 2.05460000e+04 1.24135571e+04 5.90000000e+01
  8.93616983e+00 5.89316180e+04 3.16112538e+01 0.00000000e+00
  0.00000000e+00 1.40000000e+01 0.00000000e+00 8.47400000e+03
  8.62960368e+01 1.60000000e+01 8.04883422e+02 6.96678860e-02
  5.56936899e+00 9.34533386e-01 0.00000000e+00 3.60000000e+01
  0.00000000e+00 3.00000000e+01 2.64020000e+05 1.74950000e+04
  0.00000000e+00 0.00000000e+00 1.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000