## Deep Learning Classifier 

### Melinda Xiao-Devins

Implement deep learning neural network classifier


In [2]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import RobustScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from keras.models import model_from_json
import os

Using TensorFlow backend.
  return f(*args, **kwds)


In [3]:
from distutils.version import LooseVersion
import warnings
import tensorflow as tf
#from tensorflow.python.layers.core import Dense
# Check TensorFlow Version
assert LooseVersion(tf.__version__) >= LooseVersion('1.1'), 'Please use TensorFlow version 1.1 or newer'
print('TensorFlow Version: {}'.format(tf.__version__))
# Check for a GPU
if not tf.test.gpu_device_name():
 warnings.warn('No GPU found. Please use a GPU to train your neural network.')
else:
 print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))

TensorFlow Version: 1.4.0


  # Remove the CWD from sys.path while we load stuff.


In [4]:
# fix random seed for reproducibility
seed = 7
np.random.seed(seed)

In [5]:
# load dataset
#df = pd.read_csv('./data/ny_hmda_2015_normalize.csv', low_memory=False, header=0, delimiter=",")
df = pd.read_csv('./data/ny_hmda_2015_normalize.csv', low_memory=False, header=0, delimiter=",")

#print(dataframe.loc[:,:])
num_rows = df.shape[0]
num_col = df.shape[1]
print ("Total number of records: {}".format(num_rows))
print ("Toatl numver of features: {}".format(num_col))

X = np.array(df.drop(['action_taken'],1)) 
Y = np.array(df['action_taken'])

#Split into train and test set
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

Total number of records: 376516
Toatl numver of features: 61


In [6]:
# Create a fully connected network with 4 layers
model = Sequential()

#input layer, it has 60 neurons, it must have right number of inputs, which is the number of features
model.add(Dense(60, input_dim=num_col-1, kernel_initializer='normal', activation='relu'))

# hideen layer has 32 neurons
model.add(Dense(32, kernel_initializer='normal', activation='relu'))

# hideen layer has 16 neurons
model.add(Dense(16, kernel_initializer='normal', activation='relu'))

# hideen layer has 8 neurons
model.add(Dense(8, kernel_initializer='normal', activation='relu'))

# output layer has 1 neuron to predict
# Use sigmoid for output layer activation function to ensure network output is bw. 0 and 1
model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))

In [7]:
#Compile model

# loss function: logarithmic loss, which is binary_crossentropy for binary classification
# use 'adam' optimizer for gradient descent algorithm 
# collect accuracy during training
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [8]:
# Train the model
# epochs: a fixed number of iterations through the dataset
# batch size: the number of instances that are evaluated before a weight update in the network is performed 
model.fit(X_train, Y_train, epochs = 10, batch_size=100)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x11a840860>

In [9]:
# evaluate the model
scores = model.evaluate(X_test, Y_test)
print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))


acc: 83.58%


In [10]:
# calculate predictions
predictions = model.predict(X)
print("the prediction bw. 0 and 1")
print(predictions)

# round predictions
rounded = [round(x[0]) for x in predictions]
print("the rounded prediction")
print(rounded[0:10])


the prediction bw. 0 and 1
[[ 0.55178559]
 [ 1.        ]
 [ 0.58622521]
 ..., 
 [ 1.        ]
 [ 1.        ]
 [ 1.        ]]
the rounded prediction
[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0]


In [11]:
from sklearn.metrics import precision_recall_fscore_support as score
precision, recall, fscore, support = score(Y_test, [round(x[0]) for x in model.predict(X_test)],average="macro")
print("precision={}, recall={}, fscore={}, support={}".format(precision, recall, fscore, support))


precision=0.8129603280663558, recall=0.8097893105578868, fscore=0.811331497783764, support=None


## Save Trained Model
Save the trained model to disk

In [14]:
 
# serialize model to JSON
model_json = model.to_json()
with open("models/model.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("models/model.h5")
print("Saved model to disk")

Saved model to disk


## Load Saved Model
Load the saved model, and used it. It saves training time

In [16]:
# load json and create model
json_file = open('models/model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("models/model.h5")
print("Loaded model from disk")
 
print("Use loaded model to predict")
# evaluate loaded model on test data
loaded_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# calculate predictions
predictions = model.predict(X)
print("the prediction bw. 0 and 1")
print(predictions)

# round predictions
rounded = [round(x[0]) for x in predictions]
print("the rounded prediction")
print(rounded[0:10])



Loaded model from disk
Use loaded model to predict
the prediction bw. 0 and 1
[[ 0.55178559]
 [ 1.        ]
 [ 0.58622521]
 ..., 
 [ 1.        ]
 [ 1.        ]
 [ 1.        ]]
the rounded prediction
[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0]


In [17]:
score = loaded_model.evaluate(X_test, Y_test, verbose=0)
print("From saved model,  %s: %.2f%%" % (loaded_model.metrics_names[1], score[1]*100))



From saved model,  acc: 83.58%


In [18]:
from sklearn.metrics import precision_recall_fscore_support as score
precision, recall, fscore, support = score(Y_test, [round(x[0]) for x in model.predict(X_test)],average="macro")
print("precision={}, recall={}, fscore={}, support={}".format(precision, recall, fscore, support))


precision=0.8129603280663558, recall=0.8097893105578868, fscore=0.811331497783764, support=None
