In [None]:
%load_ext autoreload
%autoreload

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import OneHotEncoder
import re
from keras.layers import Input, Dense
from keras.models import Model, load_model
from keras.callbacks import EarlyStopping, TensorBoard
from keras import metrics
import json
from helper_functions import import_data, neuralnet_model, batch_generator, count_stars, test_creator
from sklearn.metrics import confusion_matrix
from os import listdir
import seaborn as sn
from sklearn.metrics import roc_curve

plt.style.use('seaborn')

In [None]:
model = load_model('models/first_one')

In [None]:
X_test, y_test = test_creator('test_data', 6000)
size_gb = X_test.nbytes / 1000000000
print("Test file size: {:0.2f} Gb".format(size_gb))

In [None]:
y_predictions = model.predict(X_test)

In [None]:
confusions = confusion_matrix(y_test, y_predictions > 0.5)
true_negative = confusions[0,0]
false_positive = confusions[0,1]
false_negative = confusions[1, 0]
true_positive = confusions[1, 1]
accuracy = (true_positive + true_negative) / (true_positive + true_negative + false_negative + false_positive)
print('accuracy:', accuracy)
print('true_positive:', true_positive)
print('true_negatives:', true_negative)
print('false_positive:', false_positive)
print('false_negative:', false_negative)

precision = true_positive / (true_positive + false_negative)
recall = true_positive / (true_positive + false_positive)
f1 = (2 * precision * recall) / (precision + recall)
print('*'*50)
print('accuracy: {:0.4f}'.format(accuracy))
print('Recall: {:0.4f}'.format(true_positive / (true_positive + false_negative)))
print('Precision: {:0.4f}'.format(true_positive / (true_positive + false_positive)))
print('f1: {:0.4f}'.format(f1))

In [None]:
fpr, tpr, thresholds = roc_curve(y_test, y_predictions)

In [None]:
fig, ax = plt.subplots(figsize=(8, 6))
ax.plot([0, 1], [0, 1], linestyle='--')
ax.plot(fpr, tpr)
ax.set_xlabel('False Positive Rate')
ax.set_ylabel('True Positive Rate')
ax.set_title('ROC curve')

In [None]:
confusions = confusion_matrix(y_test, y_predictions > 0.5)

In [None]:
true_negative = confusions[0,0]
false_positive = confusions[0,1]
false_negative = confusions[1, 0]
true_positive = confusions[1, 1]

In [None]:
conf_df = pd.DataFrame([[true_positive, false_positive], [false_negative, true_negative]], \
                       ['predicted positive', 'predicted negative'], \
                       ['condition positive', 'condition negative'])


In [None]:
plt.figure(figsize=(15,10))
sn.set(font_scale=3)
sn.heatmap(conf_df, annot=True, annot_kws={'size': 26}, fmt='g')