# Bringing the data in and arranging it nice and pretty like

In [1]:
# Set the seed value for the notebook so the results are reproducible
from numpy.random import seed
seed(1)

# Input a shit ton of dependencies
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import sklearn
import sklearn.datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [2]:
# Import Data
LC = pd.read_csv('~/Desktop/Project3byDefault/Loan_Stats.csv')

# And change the interest rate into an actual float, rather than an object
LC.int_rate = LC.int_rate.str.replace(r'%', '')
LC.int_rate = LC.int_rate.astype(float)
LC.int_rate = LC.int_rate/100

In [3]:
# Separate the X values (predictors) from the Y targets (classes)
LC_X = LC.drop(['loan_status', 'addr_state'], axis=1)
LC_Y = LC['loan_status']
LC_Y_cat, levels = pd.factorize(LC_Y)

# Then create the training and test data
X_train, X_test, Y_train, Y_test = train_test_split(LC_X, LC_Y_cat, random_state = 1)

In [4]:
# Transform the training and testing data using the X_scaler
X_scaler = StandardScaler().fit(X_train)

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [5]:
# One-hot encoding (i.e. transform the Y targets into numbers, rather than "Charged off" and "Fully Paid")
Y_train_categorical = to_categorical(Y_train)
Y_test_categorical = to_categorical(Y_test)

# Start with a basic neural network, tossing all 27 variables into the mix to predict our two targest

In [7]:
# first, create a normal neural network with 27 inputs, 6 hidden nodes, and 2 outputs
model_all = Sequential()
model_all.add(Dense(units=6, activation='relu', input_dim=27))
model_all.add(Dense(units=2, activation='softmax'))

# Take a look at the model summary
model_all.summary()

Instructions for updating:
Colocations handled automatically by placer.


In [9]:
# Compile the model
model_all.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [10]:
# Fit the model to the training data
model_all.fit(
    X_train_scaled,
    Y_train_categorical,
    epochs=100,
    shuffle=True,
    verbose=2)

Instructions for updating:
Use tf.cast instead.
Epoch 1/100
 - 10s - loss: 0.4899 - acc: 0.7784
Epoch 2/100
 - 10s - loss: 0.4760 - acc: 0.7848
Epoch 3/100
 - 9s - loss: 0.4750 - acc: 0.7855
Epoch 4/100
 - 10s - loss: 0.4746 - acc: 0.7858
Epoch 5/100
 - 10s - loss: 0.4743 - acc: 0.7862
Epoch 6/100
 - 10s - loss: 0.4742 - acc: 0.7856
Epoch 7/100
 - 9s - loss: 0.4740 - acc: 0.7858
Epoch 8/100
 - 9s - loss: 0.4740 - acc: 0.7857
Epoch 9/100
 - 9s - loss: 0.4738 - acc: 0.7856
Epoch 10/100
 - 10s - loss: 0.4737 - acc: 0.7858
Epoch 11/100
 - 10s - loss: 0.4737 - acc: 0.7856
Epoch 12/100
 - 10s - loss: 0.4736 - acc: 0.7854
Epoch 13/100
 - 9s - loss: 0.4736 - acc: 0.7857
Epoch 14/100
 - 9s - loss: 0.4735 - acc: 0.7859
Epoch 15/100
 - 9s - loss: 0.4735 - acc: 0.7858
Epoch 16/100
 - 9s - loss: 0.4736 - acc: 0.7858
Epoch 17/100
 - 9s - loss: 0.4735 - acc: 0.7859
Epoch 18/100
 - 10s - loss: 0.4735 - acc: 0.7860
Epoch 19/100
 - 9s - loss: 0.4734 - acc: 0.7857
Epoch 20/100
 - 10s - loss: 0.4734 - acc

<tensorflow.python.keras.callbacks.History at 0x1a3cfc5fd0>

# Deep Learning- Add an additional layer

In [6]:
# Now we add another layer for fun.
# Again, we are tossing all 27 variables into the mix
deep_model_all = Sequential()
deep_model_all.add(Dense(units=6, activation='relu', input_dim=27))
deep_model_all.add(Dense(units=6, activation='relu'))
deep_model_all.add(Dense(units=2, activation='softmax'))

# Print your model summary
deep_model_all.summary()

Instructions for updating:
Colocations handled automatically by placer.


In [None]:
# Compile
deep_model_all.compile(optimizer='adam',
                   loss='categorical_crossentropy',
                   metrics=['accuracy'])

# And run
deep_model_all.fit(
    X_train_scaled,
    Y_train_categorical,
    epochs=100,
    shuffle=True,
    verbose=2)

# Deep Learning, limited model

# Included only 10 different variables 
#### 1- Interest rate 
#### 2- loan amount 
#### 3- annual income 
#### 4- dti 
#### 5- delinquencies in 2 years 
#### 6- total accounts
#### 7- open accounts 
#### 8- percent total never delinquent
#### 9- bankrupcies 
#### 10- total balance ex mort

In [8]:
# NEED TO REMAKE THE X-MATRIX
LC_X_remake = LC[['int_rate', 'loan_amnt', 'annual_inc', 'dti', 'delinq_2yrs', 'total_acc', 'open_acc',
           'pct_tl_nvr_dlq', 'pub_rec_bankruptcies', 'total_bal_ex_mort']].copy()

# Then create the training and text data
X_train_remake, X_test_remake, Y_train_remake, Y_test_remake = train_test_split(LC_X_remake, LC_Y_cat, 
                                                                                random_state=1)

In [9]:
# Transform the training and testing data using the X_scaler
X_scaler_remake = StandardScaler().fit(X_train_remake)

X_train_scaled_remake = X_scaler_remake.transform(X_train_remake)
X_test_scaled_remake = X_scaler_remake.transform(X_test_remake)

Y_train_categorical_remake = to_categorical(Y_train_remake)
Y_test_categorical_remake = to_categorical(Y_test_remake)

In [10]:
# Remaking the deep learning model, just with fewer inputs
deep_model_remake = Sequential()
deep_model_remake.add(Dense(units=6, activation='relu', input_dim=10))
deep_model_remake.add(Dense(units=6, activation='relu'))
deep_model_remake.add(Dense(units=2, activation='softmax'))

# And summarize
deep_model_remake.summary()

In [12]:
# RUN RUN RUN!
deep_model_remake.compile(optimizer='adam',
                   loss='categorical_crossentropy',
                   metrics=['accuracy'])

deep_model_remake.fit(
    X_train_scaled_remake,
    Y_train_categorical_remake,
    epochs=100,
    shuffle=True,
    verbose=2)

Instructions for updating:
Use tf.cast instead.
Epoch 1/100
 - 11s - loss: 0.4938 - acc: 0.7758
Epoch 2/100
 - 10s - loss: 0.4814 - acc: 0.7832
Epoch 3/100
 - 10s - loss: 0.4810 - acc: 0.7834
Epoch 4/100
 - 10s - loss: 0.4808 - acc: 0.7835
Epoch 5/100
 - 11s - loss: 0.4806 - acc: 0.7837
Epoch 6/100
 - 11s - loss: 0.4805 - acc: 0.7838
Epoch 7/100
 - 11s - loss: 0.4803 - acc: 0.7836
Epoch 8/100
 - 11s - loss: 0.4801 - acc: 0.7840
Epoch 9/100
 - 11s - loss: 0.4799 - acc: 0.7837
Epoch 10/100
 - 10s - loss: 0.4798 - acc: 0.7838
Epoch 11/100
 - 10s - loss: 0.4796 - acc: 0.7839
Epoch 12/100
 - 10s - loss: 0.4796 - acc: 0.7840
Epoch 13/100
 - 10s - loss: 0.4794 - acc: 0.7840
Epoch 14/100
 - 10s - loss: 0.4794 - acc: 0.7836
Epoch 15/100
 - 11s - loss: 0.4794 - acc: 0.7838
Epoch 16/100
 - 10s - loss: 0.4793 - acc: 0.7841
Epoch 17/100
 - 10s - loss: 0.4793 - acc: 0.7840
Epoch 18/100
 - 10s - loss: 0.4792 - acc: 0.7840
Epoch 19/100
 - 11s - loss: 0.4792 - acc: 0.7840
Epoch 20/100
 - 10s - loss: 0.

<tensorflow.python.keras.callbacks.History at 0x1a52448f28>

# Yet another model
#### let's add another layer to try to get the accuracy up and the loss down
#### This uses the same limited dataset, but adds another layer to the deep learning model with the hopes of getting the data to do better 

In [13]:
deep_model_remake2 = Sequential()
deep_model_remake2.add(Dense(units=6, activation='relu', input_dim=10))
deep_model_remake2.add(Dense(units=6, activation='relu'))
deep_model_remake2.add(Dense(units=6, activation='relu'))
deep_model_remake2.add(Dense(units=2, activation='softmax'))

deep_model_remake2.summary()

In [15]:
deep_model_remake2.compile(optimizer='adam',
                   loss='categorical_crossentropy',
                   metrics=['accuracy'])

deep_model_remake2.fit(
    X_train_scaled_remake,
    Y_train_categorical_remake,
    epochs=100,
    shuffle=True,
    verbose=2)

Epoch 1/100
 - 13s - loss: 0.4907 - acc: 0.7818
Epoch 2/100
 - 13s - loss: 0.4814 - acc: 0.7834
Epoch 3/100
 - 13s - loss: 0.4806 - acc: 0.7835
Epoch 4/100
 - 12s - loss: 0.4802 - acc: 0.7834
Epoch 5/100
 - 13s - loss: 0.4799 - acc: 0.7837
Epoch 6/100
 - 13s - loss: 0.4799 - acc: 0.7837
Epoch 7/100
 - 12s - loss: 0.4798 - acc: 0.7836
Epoch 8/100
 - 12s - loss: 0.4796 - acc: 0.7837
Epoch 9/100
 - 12s - loss: 0.4794 - acc: 0.7836
Epoch 10/100
 - 12s - loss: 0.4793 - acc: 0.7838
Epoch 11/100
 - 12s - loss: 0.4792 - acc: 0.7836
Epoch 12/100
 - 12s - loss: 0.4791 - acc: 0.7838
Epoch 13/100
 - 12s - loss: 0.4790 - acc: 0.7836
Epoch 14/100
 - 12s - loss: 0.4790 - acc: 0.7834
Epoch 15/100
 - 12s - loss: 0.4789 - acc: 0.7834
Epoch 16/100
 - 12s - loss: 0.4789 - acc: 0.7838
Epoch 17/100
 - 12s - loss: 0.4789 - acc: 0.7837
Epoch 18/100
 - 12s - loss: 0.4789 - acc: 0.7838
Epoch 19/100
 - 12s - loss: 0.4789 - acc: 0.7836
Epoch 20/100
 - 12s - loss: 0.4788 - acc: 0.7837
Epoch 21/100
 - 13s - loss: 0

<tensorflow.python.keras.callbacks.History at 0x1a53e321d0>

# And another model!
This one changes the number of units in each layer. Maybe that'll help?

In [None]:
deep_model_remake3 = Sequential()
deep_model_remake3.add(Dense(units=30, activation='relu', input_dim=10))
deep_model_remake3.add(Dense(units=30, activation='relu'))
deep_model_remake3.add(Dense(units=30, activation='relu'))
deep_model_remake3.add(Dense(units=2, activation='softmax'))

deep_model_remake3.summary()

In [None]:
deep_model_remake3.compile(optimizer='adam',
                   loss='categorical_crossentropy',
                   metrics=['accuracy'])

deep_model_remake3.fit(
    X_train_scaled_remake,
    Y_train_categorical_remake,
    epochs=100,
    shuffle=True,
    verbose=2)

# Let's try a random forest so we can get some information on which variables may be of the greatest importance.

In [10]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris
from sklearn.datasets import load_iris
from sklearn import tree

In [21]:
rf = RandomForestClassifier(n_estimators=200)
rf = rf.fit(X_train_scaled_remake, Y_train_categorical_remake)
rf.score(X_train_scaled_remake, Y_train_categorical_remake)

1.0

In [25]:
puppy = tree.DecisionTreeClassifier()
puppy = puppy.fit(X_train_scaled_remake, Y_train_categorical_remake)
puppy.score(X_train_scaled_remake, Y_train_categorical_remake)

1.0

In [26]:
# Random Forests in sklearn will automatically calculate feature importance
importances = puppy.feature_importances_
importances

array([0.13894127, 0.10153158, 0.12854172, 0.16441935, 0.01920979,
       0.09915146, 0.08813683, 0.07247583, 0.01546972, 0.17212243])

In [27]:
# We can sort the features by their importance
sorted(zip(puppy.feature_importances_, LC_X_remake.columns), reverse=True)

[(0.17212243419365747, 'total_bal_ex_mort'),
 (0.16441935464494556, 'dti'),
 (0.13894127229138456, 'int_rate'),
 (0.12854172332202599, 'annual_inc'),
 (0.10153158194982906, 'loan_amnt'),
 (0.09915146153869298, 'total_acc'),
 (0.08813682766613534, 'open_acc'),
 (0.07247583030105688, 'pct_tl_nvr_dlq'),
 (0.01920978979316624, 'delinq_2yrs'),
 (0.015469724299105907, 'pub_rec_bankruptcies')]

In [1]:
# Only run the cell below IF YOU HAVE 45-60 Minutes to wait for a useless picture!!!

In [2]:
# import graphviz 
# dot_data = tree.export_graphviz(puppy, out_file=None, 
#                      feature_names=LC_X_remake.columns,  
#                      class_names=levels,  
#                      filled=True, rounded=True,  
#                      special_characters=True)  
# graph = graphviz.Source(dot_data)  
# graph.render("iris")

# The random forest plot did give us some information that we can use. Let's start to widdle our predictors down based on how useful they are in the actual model for explaining variance

# Let's see what happens if we only do 8 variables in the model
#### it could be that we have hit a performance plateau since both our models were able to get 78% accuracy

In [25]:
# NEED TO REMAKE THE X-MATRIX
LC_X_remake8 = LC[['int_rate', 'loan_amnt', 'annual_inc', 'dti', 'total_acc', 'open_acc',
           'pct_tl_nvr_dlq', 'total_bal_ex_mort']].copy()

# Then create the training and text data
X_train_remake8, X_test_remake8, Y_train_remake8, Y_test_remake8 = train_test_split(LC_X_remake8, 
                                                                                    LC_Y_cat, random_state=1)


In [26]:
# Transform the training and testing data using the X_scaler
X_scaler_remake8 = StandardScaler().fit(X_train_remake8)

X_train_scaled_remake8 = X_scaler_remake8.transform(X_train_remake8)
X_test_scaled_remake8 = X_scaler_remake8.transform(X_test_remake8)

Y_train_categorical_remake8 = to_categorical(Y_train_remake8)
Y_test_categorical_remake8 = to_categorical(Y_test_remake8)

In [30]:
deep_model_remake8 = Sequential()
deep_model_remake8.add(Dense(units=6, activation='relu', input_dim=8))
deep_model_remake8.add(Dense(units=6, activation='relu'))
deep_model_remake8.add(Dense(units=6, activation='relu'))
deep_model_remake8.add(Dense(units=2, activation='softmax'))

deep_model_remake8.summary()

In [32]:
deep_model_remake8.compile(optimizer='adam',
                   loss='categorical_crossentropy',
                   metrics=['accuracy'])

deep_model_remake8.fit(
    X_train_scaled_remake8,
    Y_train_categorical_remake8,
    epochs=100,
    shuffle=True,
    verbose=2)

Epoch 1/100
 - 13s - loss: 0.4926 - acc: 0.7804
Epoch 2/100
 - 12s - loss: 0.4821 - acc: 0.7827
Epoch 3/100
 - 13s - loss: 0.4811 - acc: 0.7827
Epoch 4/100
 - 12s - loss: 0.4806 - acc: 0.7827
Epoch 5/100
 - 13s - loss: 0.4803 - acc: 0.7827
Epoch 6/100
 - 13s - loss: 0.4801 - acc: 0.7835
Epoch 7/100
 - 12s - loss: 0.4799 - acc: 0.7838
Epoch 8/100
 - 14s - loss: 0.4798 - acc: 0.7838
Epoch 9/100
 - 12s - loss: 0.4797 - acc: 0.7836
Epoch 10/100
 - 12s - loss: 0.4796 - acc: 0.7836
Epoch 11/100
 - 12s - loss: 0.4795 - acc: 0.7837
Epoch 12/100
 - 12s - loss: 0.4795 - acc: 0.7839
Epoch 13/100
 - 12s - loss: 0.4795 - acc: 0.7836
Epoch 14/100
 - 13s - loss: 0.4793 - acc: 0.7839
Epoch 15/100
 - 16s - loss: 0.4793 - acc: 0.7838
Epoch 16/100
 - 15s - loss: 0.4793 - acc: 0.7839
Epoch 17/100
 - 14s - loss: 0.4794 - acc: 0.7840
Epoch 18/100
 - 13s - loss: 0.4793 - acc: 0.7837
Epoch 19/100
 - 13s - loss: 0.4794 - acc: 0.7836
Epoch 20/100
 - 13s - loss: 0.4793 - acc: 0.7837
Epoch 21/100
 - 13s - loss: 0

<tensorflow.python.keras.callbacks.History at 0x1a5b9b3400>

### Let's also look at the probabilities for each instance. How likely are they to be be charged off or a paid fully?
#### the first number will be probability of "Charged Off" and the second will be the probability of "Fully Paid"

In [33]:
deep_model_remake8.predict_proba(X_test_scaled_remake8,
    batch_size=32,
    verbose=2)

 - 1s


array([[0.22897516, 0.7710248 ],
       [0.2535477 , 0.7464523 ],
       [0.37220138, 0.6277986 ],
       ...,
       [0.3171849 , 0.6828151 ],
       [0.31307566, 0.68692434],
       [0.13353716, 0.8664628 ]], dtype=float32)

# OKAY! 
### That did nothing for our model performance. Going down to 6 predictors just to see

In [35]:
# NEED TO REMAKE THE X-MATRIX
LC_X_remake6= LC[['int_rate', 'loan_amnt', 'annual_inc', 'dti', 'total_acc', 'total_bal_ex_mort']].copy()

# Then create the training and text data
X_train_remake6, X_test_remake6, Y_train_remake6, Y_test_remake6 = train_test_split(LC_X_remake6, 
                                                                                    LC_Y_cat, random_state=1)


In [36]:
# Transform the training and testing data using the X_scaler
X_scaler_remake6 = StandardScaler().fit(X_train_remake6)

X_train_scaled_remake6 = X_scaler_remake6.transform(X_train_remake6)
X_test_scaled_remake6 = X_scaler_remake6.transform(X_test_remake6)

Y_train_categorical_remake6 = to_categorical(Y_train_remake6)
Y_test_categorical_remake6 = to_categorical(Y_test_remake6)

In [61]:
deep_model_remake6 = Sequential()
deep_model_remake6.add(Dense(units=18, activation='relu', input_dim=6))
deep_model_remake6.add(Dense(units=18, activation='relu'))
deep_model_remake6.add(Dense(units=2, activation='softmax'))

deep_model_remake6.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_28 (Dense)             (None, 18)                126       
_________________________________________________________________
dense_29 (Dense)             (None, 18)                342       
_________________________________________________________________
dense_30 (Dense)             (None, 2)                 38        
Total params: 506
Trainable params: 506
Non-trainable params: 0
_________________________________________________________________


In [None]:
deep_model_remake6.compile(optimizer='adam',
                   loss='categorical_crossentropy',
                   metrics=['accuracy'])

deep_model_remake6.fit(
    X_train_scaled_remake6,
    Y_train_categorical_remake6,
    epochs=100,
    shuffle=True,
    verbose=2)

In [None]:
# And again, let's predict each outcome
deep_model_remake6.predict_proba(X_test_scaled_remake6,
    batch_size=32,
    verbose=2)

# 3 Predictor model?

In [55]:
# NEED TO REMAKE THE X-MATRIX
LC_X_remake3p= LC[['int_rate', 'dti','total_bal_ex_mort']].copy()

# Then create the training and text data
X_train_remake3p, X_test_remake3p, Y_train_remake3p, Y_test_remake3p = train_test_split(LC_X_remake3p, 
                                                                                    LC_Y_cat, random_state=1)


In [58]:
# Transform the training and testing data using the X_scaler
X_scaler_remake3p = StandardScaler().fit(X_train_remake3p)

X_train_scaled_remake3p = X_scaler_remake3p.transform(X_train_remake3p)
X_test_scaled_remake3p = X_scaler_remake3p.transform(X_test_remake3p)

Y_train_categorical_remake3p = to_categorical(Y_train_remake3p)
Y_test_categorical_remake3p = to_categorical(Y_test_remake3p)

In [68]:
deep_model_remake3p = Sequential()
deep_model_remake3p.add(Dense(units=9, activation='relu', input_dim=3))
deep_model_remake3p.add(Dense(units=9, activation='relu'))
deep_model_remake3p.add(Dense(units=2, activation='softmax'))

deep_model_remake3p.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_40 (Dense)             (None, 9)                 36        
_________________________________________________________________
dense_41 (Dense)             (None, 9)                 90        
_________________________________________________________________
dense_42 (Dense)             (None, 2)                 20        
Total params: 146
Trainable params: 146
Non-trainable params: 0
_________________________________________________________________


In [70]:
deep_model_remake3p.compile(optimizer='adam',
                   loss='categorical_crossentropy',
                   metrics=['accuracy'])

deep_model_remake3p.fit(
    X_train_scaled_remake3p,
    Y_train_categorical_remake3p,
    epochs=50,
    shuffle=True,
    verbose=2)

Epoch 1/50
 - 12s - loss: 0.4910 - acc: 0.7822
Epoch 2/50
 - 11s - loss: 0.4866 - acc: 0.7830
Epoch 3/50
 - 11s - loss: 0.4864 - acc: 0.7829
Epoch 4/50
 - 11s - loss: 0.4864 - acc: 0.7830
Epoch 5/50
 - 11s - loss: 0.4862 - acc: 0.7831
Epoch 6/50
 - 11s - loss: 0.4862 - acc: 0.7829
Epoch 7/50
 - 11s - loss: 0.4862 - acc: 0.7830
Epoch 8/50
 - 11s - loss: 0.4861 - acc: 0.7829
Epoch 9/50
 - 13s - loss: 0.4862 - acc: 0.7834
Epoch 10/50
 - 14s - loss: 0.4861 - acc: 0.7829
Epoch 11/50
 - 12s - loss: 0.4861 - acc: 0.7829
Epoch 12/50
 - 11s - loss: 0.4861 - acc: 0.7830
Epoch 13/50
 - 11s - loss: 0.4860 - acc: 0.7829
Epoch 14/50
 - 11s - loss: 0.4860 - acc: 0.7833
Epoch 15/50
 - 12s - loss: 0.4860 - acc: 0.7830
Epoch 16/50
 - 13s - loss: 0.4859 - acc: 0.7829
Epoch 17/50
 - 12s - loss: 0.4860 - acc: 0.7833
Epoch 18/50
 - 11s - loss: 0.4860 - acc: 0.7832
Epoch 19/50
 - 11s - loss: 0.4859 - acc: 0.7831
Epoch 20/50
 - 11s - loss: 0.4860 - acc: 0.7833
Epoch 21/50
 - 11s - loss: 0.4859 - acc: 0.7830
E

<tensorflow.python.keras.callbacks.History at 0x1a6cf24940>

In [71]:
deep_model_remake6.predict_proba(X_test_scaled_remake6,
    batch_size=32,
    verbose=2)

 - 1s


array([[0.6541528 , 0.34584722],
       [0.29476583, 0.70523417],
       [0.62546283, 0.37453717],
       ...,
       [0.48484823, 0.51515174],
       [0.5791336 , 0.42086634],
       [0.38080683, 0.6191932 ]], dtype=float32)

# Compare the models below

In [9]:
model_loss, model_accuracy = model_all.evaluate(
    X_test_scaled, Y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

NameError: name 'model_all' is not defined

In [16]:
deep_model_loss, deep_model_accuracy = deep_model_all.evaluate(
    X_test_scaled, Y_test_categorical_remake, verbose=2)
print(f"Deep Neural Network with limited columns - Loss: {deep_model_remake_loss}, Accuracy: {deep_model_remake_accuracy}")

Deep Neural Network - Loss: 0.4753891135798912, Accuracy: 0.7852221131324768


In [53]:
deep_model_remake_loss, deep_model_remake_accuracy = deep_model_remake.evaluate(
    X_test_scaled_remake, Y_test_categorical_remake, verbose=2)
print(f"Deep Neural Network - Loss: {deep_model_remake_loss}, Accuracy: {deep_model_remake_accuracy}")

Deep Neural Network - Loss: 0.4810933615025697, Accuracy: 0.784142255783081


In [54]:
deep_model_remake2_loss, deep_model_remake2_accuracy = deep_model_remake2.evaluate(
    X_test_scaled_remake, Y_test_categorical_remake, verbose=2)
print(f"Deep Neural Network - Loss: {deep_model_remake2_loss}, Accuracy: {deep_model_remake2_accuracy}")

Deep Neural Network - Loss: 0.4805595624580498, Accuracy: 0.7838380336761475


In [42]:
deep_model_remake3_loss, deep_model_remake3_accuracy = deep_model_remake3.evaluate(
    X_test_scaled_remake, Y_test_categorical_remake, verbose=2)
print(f"Deep Neural Network - Loss: {deep_model_remake3_loss}, Accuracy: {deep_model_remake3_accuracy}")

Deep Neural Network - Loss: 0.48323890313483236, Accuracy: 0.7824843525886536


In [55]:
# Save the model
# Not all the models above are saved here. but you can save whichever ones you want!
model_all.save("All27_model_trained.h5")
deep_model_all.save("All27_Deepmodel_trained.h5")
deep_model_remake.save("10Columns_DeepModel_trained.h5")
deep_model_remake2.save("10Columns_DeepModel2_trained.h5")
deep_model_remake8.save("8Columns_DeepModel8_Trained.h5")

In [72]:
deep_model_remake8.save("8Columns_DeepModel8_Trained.h5")

combine the decision tree with logistic regression. use decision tree as a clustering algorithm. or you can do it with k means. 
GY for git source tree