In [53]:
%load_ext autoreload
%autoreload 2
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Dense, LeakyReLU, PReLU
from tensorflow.keras.layers import Activation
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler
import functions as f
import json

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Develop a model using Tensor Flow that will predict Loan Default. 
For your model, do the following:
Try at least three different Activation Functions
Try one and two hidden layers
Try using a Dropout Layer
Explore using a variable selection technique
For each of the models
Calculate the accuracy of the model on both the training and test data set
Create a graph that shows the ROC curves for both the training and test data set. Clearly label each curve and display the Area Under the ROC curve.
Display a ROC curve for the test data with all your models on the same graph (tree based, regression, and TF). Discuss which one is the most accurate. Which one would you recommend using?

In [2]:
# Opening JSON file and save as a dict
winners_dict = open('../Module_3/winners.json')
winners_dict = json.load(winners_dict)


In [3]:

target_a = 'TARGET_BAD_FLAG'
target_b = 'IMP_O_TARGET_LOSS_AMT'

keep_features= winners_dict['sfs_best_model']

In [4]:
df = f.clean_df(file_path= '../../data_sets/SA_clean_O_fixed_HMEQ_Loss.csv', remove_cols=['z_JOB', 'z_REASON','flag_LOAN', 'Unnamed: 0'])

In [5]:
#Pre Processing 

In [6]:
train_cols = list(df.columns.difference((target_a, target_b)))
x_train, x_test, y_train, y_test = f.split_df(df = df.copy(), train_cols=train_cols, 
test_cols = [target_a, target_b], test_size=0.2, rand_seed=1)

In [7]:
#Filtering the train and test splits only on values with amount > 0 
#in clean up steps, missing amounts were filled with 0s...
#there were no zeros in the column prior to that
reg_y_train = y_train[y_train[target_a]==1]
reg_y_test = y_test[y_test[target_a]==1]
#subsetting based on index
reg_x_train =x_train[x_train.index.isin(reg_y_train.index)]  
reg_x_test = x_test[x_test.index.isin(reg_y_test.index)] 

In [8]:
theScaler = MinMaxScaler()
theScaler.fit(x_train)

In [9]:
u_train = pd.DataFrame(theScaler.transform(reg_x_train))
u_test = pd.DataFrame(theScaler.transform(reg_x_test))
u_train.columns = list(reg_x_train.columns.values)
u_test.columns = list(reg_x_train.columns.values)
u_train_sub= u_train[keep_features]
u_test_sub= u_test[keep_features]

In [None]:
#defaults 
theShapeSize_1 = u_train_sub.shape[1] #the number of input variables 
theActivation_1 = tf.keras.activations.relu
output_activation_1 = tf.keras.activations.linear
theLossMetric_1 = tf.keras.losses.MeanAbsoluteError()
theOptimizer_1 = tf.keras.optimizers.Adam()
theEpochs_1 = 800
theUnits_1 = int(2*theShapeSize_1)


In [None]:
#Full model 
#I hidden layer
#relu activation function



In [60]:
u_train.shape

(941, 35)

In [61]:
reg_y_train[target_b].shape

(941,)

In [64]:
theShapeSize_full = u_train.shape[1] #the number of input variables 
theUnits_full = int(2*theShapeSize_full)

LAYER_01 = tf.keras.layers.Dense( units=theUnits_full, activation=theActivation_1, input_dim=theShapeSize_full )
LAYER_OUTPUT = tf.keras.layers.Dense(units=1, activation=output_activation_1 )

full_model = tf.keras.Sequential()
full_model.add( LAYER_01)
full_model.add( LAYER_OUTPUT)

full_model.compile( loss=theLossMetric_1,optimizer=theOptimizer_1)
full_model.fit( u_train, reg_y_train[target_b], epochs=theEpochs_1, verbose=False )


<keras.callbacks.History at 0x24c8e7d91e0>

In [66]:
train_acc_full = f.getAmtAccuracyScores('TF Train', full_model, u_train, reg_y_train[target_b])
test_acc_full = f.getAmtAccuracyScores('TF Test', full_model, u_test, reg_y_test[target_b])
f.print_Accuracy('TF Accuracy', [train_acc_full,test_acc_full])

TF Accuracy
TF Train  =  5893.94806635929
TF Test  =  6293.573737216157
------




In [None]:
#Model 1
#I hidden layer
#relu activation function
#feature selected from forward variable selection 

In [40]:

LAYER_01_01 = tf.keras.layers.Dense( units=theUnits_1, activation=theActivation_1, input_dim=theShapeSize_1 )
LAYER_OUTPUT_01 = tf.keras.layers.Dense( units=1, activation=output_activation_1 )

model_01 = tf.keras.Sequential()
model_01.add( LAYER_01_01)
model_01.add( LAYER_OUTPUT_01 )

model_01.compile( loss=theLossMetric_1,optimizer=theOptimizer_1)
model_01.fit( u_train_sub, reg_y_train[target_b], epochs=theEpochs_1, verbose=False )


<keras.callbacks.History at 0x24c84439c00>

In [41]:
train_acc_01 = f.getAmtAccuracyScores('TF Train', model_01, u_train_sub, reg_y_train[target_b])
test_acc_01 = f.getAmtAccuracyScores('TF Test', model_01, u_test_sub, reg_y_test[target_b])



In [42]:
f.print_Accuracy('TF Accuracy', [train_acc_01,test_acc_01])

TF Accuracy
TF Train  =  7116.658156762224
TF Test  =  7345.703117185917
------




In [65]:
#Model 2
#2 hidden layer
#relu activation function
#feature selected from forward variable selection 

In [43]:
LAYER_02_01 = tf.keras.layers.Dense( units=theUnits_1, activation=theActivation_1, input_dim=theShapeSize_1 )
LAYER_02_02 = tf.keras.layers.Dense( units=theUnits_1, activation=theActivation_1 )
LAYER_OUTPUT_02 = tf.keras.layers.Dense( units=1, activation=output_activation_1 )

model_02 = tf.keras.Sequential()
model_02.add( LAYER_02_01)
model_02.add( LAYER_02_02)
model_02.add( LAYER_OUTPUT_02 )

model_02.compile( loss=theLossMetric_1,optimizer=theOptimizer_1)
model_02.fit( u_train_sub, reg_y_train[target_b], epochs=theEpochs_1, verbose=False )



<keras.callbacks.History at 0x24c82115fc0>

In [44]:
train_acc_02= f.getAmtAccuracyScores('TF Train', model_02, u_train_sub, reg_y_train[target_b])
test_acc_02 = f.getAmtAccuracyScores('TF Test', model_02, u_test_sub, reg_y_test[target_b])
f.print_Accuracy('TF Accuracy', [train_acc_02,test_acc_02])


TF Accuracy
TF Train  =  3379.978538926695
TF Test  =  3121.0223721770053
------




In [69]:
#Model 3
#2 hidden layer
#drop layer
#relu activation function
#feature selected from forward variable selection 

In [46]:
LAYER_03_01 = tf.keras.layers.Dense( units=theUnits_1, activation=theActivation_1, input_dim=theShapeSize_1 )
LAYER_03_02 = tf.keras.layers.Dense( units=theUnits_1, activation=theActivation_1 )
LAYER_DROP = tf.keras.layers.Dropout( 0.4)
LAYER_OUTPUT_03 = tf.keras.layers.Dense( units=1, activation=output_activation_1 )

model_03 = tf.keras.Sequential()
model_03.add( LAYER_03_01)
model_03.add( LAYER_03_02)
model_03.add( LAYER_DROP)
model_03.add( LAYER_OUTPUT_03 )

model_03.compile( loss=theLossMetric_1,optimizer=theOptimizer_1)
model_03.fit( u_train_sub, reg_y_train[target_b], epochs=theEpochs_1, verbose=False )



<keras.callbacks.History at 0x24c89e150c0>

In [47]:
train_acc_03= f.getAmtAccuracyScores('TF Train', model_03, u_train_sub, reg_y_train[target_b])
test_acc_03 = f.getAmtAccuracyScores('TF Test', model_03, u_test_sub, reg_y_test[target_b])
f.print_Accuracy('TF Accuracy', [train_acc_03,test_acc_03])


TF Accuracy
TF Train  =  3134.120978978606
TF Test  =  3057.785608558624
------




In [None]:
#repeating best model () with LeakyReLU activation function 

In [54]:
theActivation_04 = LeakyReLU(alpha=0.01)
LAYER_04_01 = tf.keras.layers.Dense( units=theUnits_1, activation=theActivation_04, input_dim=theShapeSize_1 )
LAYER_04_02 = tf.keras.layers.Dense( units=theUnits_1, activation=theActivation_04 )
LAYER_DROP = tf.keras.layers.Dropout( 0.4)
LAYER_OUTPUT_04 = tf.keras.layers.Dense( units=1, activation=output_activation_1 )

model_04 = tf.keras.Sequential()
model_04.add( LAYER_04_01)
model_04.add( LAYER_04_02)
model_04.add( LAYER_DROP)
model_04.add( LAYER_OUTPUT_04 )

model_04.compile( loss=theLossMetric_1,optimizer=theOptimizer_1)
model_04.fit( u_train_sub, reg_y_train[target_b], epochs=theEpochs_1, verbose=False )

<keras.callbacks.History at 0x24c821011e0>

In [55]:
train_acc_04= f.getAmtAccuracyScores('TF Train', model_04, u_train_sub, reg_y_train[target_b])
test_acc_04 = f.getAmtAccuracyScores('TF Test', model_04, u_test_sub, reg_y_test[target_b])
f.print_Accuracy('TF Accuracy', [train_acc_04,test_acc_04])


TF Accuracy
TF Train  =  2786.499700316053
TF Test  =  2690.7944224460034
------




In [None]:
#repeating best model () with PRelue activation function 

In [56]:
theActivation_05 = PReLU()
LAYER_05_01 = tf.keras.layers.Dense( units=theUnits_1, activation=theActivation_05, input_dim=theShapeSize_1 )
LAYER_05_02 = tf.keras.layers.Dense( units=theUnits_1, activation=theActivation_05 )
LAYER_DROP = tf.keras.layers.Dropout( 0.4)
LAYER_OUTPUT_05 = tf.keras.layers.Dense( units=1, activation=output_activation_1 )

model_05 = tf.keras.Sequential()
model_05.add( LAYER_05_01)
model_05.add( LAYER_05_02)
model_05.add( LAYER_DROP)
model_05.add( LAYER_OUTPUT_05 )

model_05.compile( loss=theLossMetric_1,optimizer=theOptimizer_1)
model_05.fit( u_train_sub, reg_y_train[target_b], epochs=theEpochs_1, verbose=False )

<keras.callbacks.History at 0x24c8554b190>

In [57]:
train_acc_05= f.getAmtAccuracyScores('TF Train', model_05, u_train_sub, reg_y_train[target_b])
test_acc_05 = f.getAmtAccuracyScores('TF Test', model_05, u_test_sub, reg_y_test[target_b])
f.print_Accuracy('TF Accuracy', [train_acc_05,test_acc_05])


TF Accuracy
TF Train  =  2806.6837766267204
TF Test  =  2658.520880560739
------


