In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.metrics import confusion_matrix

In [2]:
# Fit Logistic Model
from logisticmodel import logisticmodel
y_test, y_pred = logisticmodel()



In [3]:
y_pred = y_pred.astype('float')
y_test = y_test.astype('float')

In [4]:
### Evaluating the Model using the Testing Dataset
# Plot normalized confusion matrix
labels = ['50-60', '60-70', '70-80', '80-90', '90-100', '100-110', '110-120', '120-130', '130-140', '140-150', '150-160', '160-170', '170+']
from plot_conf_matrix import plot_confusion_matrix

# Compute confusion matrix
cnf_matrix = confusion_matrix(y_test, y_pred)
np.set_printoptions(precision=2)

In [33]:
# Plot normalized confusion matrix
# plt.figure()
fig, ax = plt.subplots( nrows=1, ncols=1 , figsize=(8, 8)) 
plot_confusion_matrix(cnf_matrix, normalize = True, classes=labels, title='Normalized Confusion Matrix')
fig.savefig('norm_conf_matrix.png')
plt.close(fig)

Normalized confusion matrix
[[0.26 0.   0.05 0.23 0.05 0.15 0.13 0.08 0.05 0.   0.   0.   0.  ]
 [0.21 0.   0.08 0.31 0.01 0.14 0.08 0.11 0.06 0.   0.   0.   0.  ]
 [0.02 0.   0.07 0.19 0.08 0.17 0.31 0.12 0.03 0.   0.   0.   0.  ]
 [0.01 0.   0.08 0.22 0.04 0.21 0.22 0.13 0.1  0.   0.   0.   0.  ]
 [0.01 0.   0.03 0.16 0.03 0.21 0.28 0.18 0.09 0.   0.   0.   0.  ]
 [0.   0.   0.02 0.06 0.02 0.17 0.31 0.28 0.13 0.   0.   0.   0.  ]
 [0.   0.   0.   0.04 0.01 0.09 0.34 0.35 0.16 0.01 0.   0.   0.  ]
 [0.   0.   0.   0.02 0.   0.07 0.27 0.4  0.23 0.   0.   0.   0.  ]
 [0.   0.   0.   0.01 0.   0.04 0.18 0.37 0.4  0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.05 0.16 0.34 0.45 0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.01 0.13 0.27 0.59 0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.   0.11 0.18 0.68 0.04 0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.05 0.15 0.3  0.5  0.   0.   0.   0.  ]]


In [24]:
cm = confusion_matrix(y_test, y_pred)
cm = np.asmatrix(cm)
tp = np.trace(cm)
fn = np.triu(cm).sum()-np.trace(cm)
fp = np.tril(cm).sum()-np.trace(cm)

In [25]:
# Precision (if we want to minimize false positives)
precision = tp / (tp + fp)
print("Precision {:0.2f}".format(precision))

# Recall (least false negatives)
recall = tp / (tp + fn)
print("Recall {:0.2f}".format(recall))

Precision 0.38
Recall 0.36


In [26]:
# F1 Score
# Harmonic mean of PR, used to indicate a balance between 
# PR providing each equal weightage, it ranges from 0 to 1. 
# F1 Score reaches its best value at 1 (perfect PR) and worst at 0.
# Relations between data’s positive labels and those given by a classifier based on sums of per-text decisions
f1 = (2*precision*recall)/(precision + recall)
print("F1 Score {:0.2f}".format(f1))

### Calculate RSME
from sklearn.metrics import mean_squared_error
from math import sqrt
rmse = sqrt(mean_squared_error(y_test.astype(np.float), y_pred.astype(np.float)))
print("RMSE {:0.2f}".format(rmse))

F1 Score 0.37
RMSE 19.94
