# Custom model

In [78]:
# Import dependencies
import os
import time
import random
import numpy as np
import tensorflow as tf
from numpy.random import seed
import matplotlib.pyplot as plt
from keras.optimizers import SGD
from keras.models import Sequential
from tensorflow import set_random_seed
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, roc_curve, classification_report
from keras.layers import Dense, MaxPooling1D, Convolution1D, Dropout, Flatten, BatchNormalization

# fix random seed for reproducibility
seeds=42
random.seed(seeds)
seed(seeds)
set_random_seed(seeds)

In [79]:
# Load 3140 data that excludes the body
X = np.load('../2_pipeline/3140-xyzl.npy')
y = np.load('../2_pipeline/labels.npy')

#split data into 1: train+validation set and 2: test set 
X_train_val, X_test, y_train_val, y_test = \
train_test_split(X, y, random_state=0, test_size=0.2)

# split train+validation set into 1a) training and 1b) validation sets
X_train, X_val, y_train, y_val = \
train_test_split(X_train_val, y_train_val, random_state=1, test_size=0.2)

#from keras.utils import to_categorical
#y_test = to_categorical(y_test)
#y_train = to_categorical(y_train)

print('Training shape is: ', X_train.shape)
print('Validation shape is: ', X_val.shape)
print('Test shape is: ', X_test.shape)

Training shape is:  (182, 3140, 4)
Validation shape is:  (46, 3140, 4)
Test shape is:  (58, 3140, 4)


In [80]:
num_points=3140
max_epochs=20
batch_size=10
dropout_rate = 0.5
opt = 'adam'
#opt = SGD(lr=0.0001, momentum=0.9)

# Class weights
class_weight = {0: 0.2,
                1: 0.8}

In [84]:
392/2

196.0

## Build model

In [87]:
model = Sequential()
model.add(Dense(1570, input_shape=(num_points,4), activation='relu'))
model.add(MaxPooling1D(pool_size=num_points))
model.add(Dense(392, activation='relu'))
model.add(Dense(196, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.add(Flatten())

# Compile model
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_58 (Dense)             (None, 3140, 1570)        7850      
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 1, 1570)           0         
_________________________________________________________________
dense_59 (Dense)             (None, 1, 392)            615832    
_________________________________________________________________
dense_60 (Dense)             (None, 1, 196)            77028     
_________________________________________________________________
dense_61 (Dense)             (None, 1, 1)              197       
_________________________________________________________________
flatten_10 (Flatten)         (None, 1)                 0         
Total params: 700,907
Trainable params: 700,907
Non-trainable params: 0
_________________________________________________________________


In [88]:
# Fit the model
history = model.fit(X_train, y_train, batch_size=batch_size, epochs=max_epochs,\
                    shuffle=True, verbose=0, validation_data=(X_val, y_val),\
                    class_weight=class_weight)

Instructions for updating:
Use tf.cast instead.


## Performance
Classification Report

In [None]:
################################################################################
# Classification Report

# make predictions on the test set
y_pred = model.predict(X_test)
 
# show a nicely formatted classification report
print("[INFO] evaluating network...")
print(classification_report(y_true=y_test, y_pred=y_pred.round()))

# evaluate the model
_, train_acc = model.evaluate(X_train, y_train, verbose=0)
_, test_acc = model.evaluate(X_test, y_test, verbose=0)
print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))

################################################################################

# plot loss during training
plt.subplot(211)
plt.title('Loss')
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.legend()

################################################################################

# plot accuracy during training
plt.subplot(212)
plt.title('Accuracy')
plt.plot(history.history['acc'], label='train')
plt.plot(history.history['val_acc'], label='test')
plt.legend()
plt.show()

################################################################################
from sklearn.metrics import accuracy_score, confusion_matrix
print("\n###################### Model Performance ############################")
print("\n#####################################################################")
# evaluate the model
_, train_acc = model.evaluate(X_train, y_train, verbose=0)
_, test_acc = model.evaluate(X_test, y_test, verbose=0)
print('\nTrain: %.3f, Test: %.3f' % (train_acc, test_acc))
print("\n#####################################################################")
################################################################################
import matplotlib.pyplot as plt
# plot loss during training
plt.subplot(211)
plt.title('Loss')
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.legend()

################################################################################

# plot accuracy during training
plt.subplot(212)
plt.title('Accuracy')
plt.plot(history.history['acc'], label='train')
plt.plot(history.history['val_acc'], label='test')
plt.legend()
plt.show()

################################################################################
print("\n#####################################################################")
# Create the confusion matrix
ann_cm = confusion_matrix(y_true = y_test, y_pred = y_pred.round())
print("\nOur test confusion matrix yields: ")
print(ann_cm)
print("\n#####################################################################")

#Classification report
ann_report = classification_report(y_test, y_pred.round())
print("\nClassfication Report for test:\n", ann_report)
print("\n#####################################################################")

#Calculate AUC score
ann_auc = roc_auc_score(y_test, y_pred.round())
print("\nOur testing AUC for ann is: ", ann_auc)

# Calculate false positive and true positive rates
fpr_ann, tpr_ann, thresholds_ann = roc_curve(y_test, y_pred.round())

# Plot AUC 
plt.figure()
plt.plot(fpr_ann, tpr_ann, color='purple', lw=2, label='ANN (area = {:.3f})'.format(ann_auc))
plt.plot([0, 1], [0, 1], color='blue', lw=2, linestyle='--')
plt.xlabel('FPR')
plt.ylabel('TPR')
plt.title('ROC curve')
plt.legend(loc='best')
plt.show()
print("\n#####################################################################")

[INFO] evaluating network...
              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00        36
         1.0       0.38      1.00      0.55        22

   micro avg       0.38      0.38      0.38        58
   macro avg       0.19      0.50      0.27        58
weighted avg       0.14      0.38      0.21        58



  'precision', 'predicted', average, warn_for)
