In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
from matplotlib import style
style.use("ggplot")
from sklearn.model_selection import GridSearchCV
from sklearn import svm, metrics
import pickle
from sklearn.model_selection import train_test_split

# environment settings
cwd = os.getcwd()
data = os.path.join(cwd, 'data', '1.0-ag-data-exploration.csv')

# read in data
df = pd.read_csv(data)

# drop not needed columns
df = df.drop('Data Year - Fiscal', axis = 1)
df = df.drop('CompanyID', axis = 1)
df = df.drop('Return on Equity', axis = 1)

min = 0
max = len(df)

X = np.array(df.iloc[min:, df.columns != "BK"])
y = df.iloc[min:, df.columns == "BK"].values.reshape(-1,)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

In [None]:
with open(os.path.join(cwd, 'classifiers', '3.0-sh-svm-linear.pkl'), 'rb') as f:
    clf = pickle.load(f)

predicted = clf.predict(X_test)

In [14]:
print(metrics.classification_report(y_test, predicted))

             precision    recall  f1-score   support

          0       0.99      1.00      1.00     17861
          1       1.00      0.09      0.16       101

avg / total       0.99      0.99      0.99     17962



In [15]:
print('Accuracy: ', metrics.accuracy_score(y_test, predicted))

Accuracy:  0.9948780759380915


In [16]:
cm = metrics.confusion_matrix(y_test, predicted)
print('Confusion matrix: ', cm)

Confusion matrix:  [[17861     0]
 [   92     9]]


In [17]:
print('True positive: ', cm[0][0])

True positive:  17861


In [18]:
print('False positive: ', cm[1][0])

False positive:  92


In [19]:
print('True negative: ', cm[1][1])

True negative:  9


In [20]:
print('False negative: ', cm[0][1])

False negative:  0


In [21]:
print('F1 Score: ', metrics.f1_score(y_test, predicted))

F1 Score:  0.1636363636363636


In [22]:
print('Recall Score (aka sensitivity): ', metrics.recall_score(y_test, predicted))

Recall Score (aka sensitivity):  0.0891089108910891


In [23]:
print('Precision Score: ', metrics.precision_score(y_test, predicted))

Precision Score:  1.0


In [24]:
print('Hamming loss: ', metrics.hamming_loss(y_test, predicted))

Hamming loss:  0.005121924061908473


In [27]:
print("Summary:")
col_names =  ['Classifier', 'TP', 'TN', 'FP', 'FN', 'Accuracy', 'Precision', 'Recall', 'F1']
table = pd.DataFrame(columns = col_names)
table.loc[0] = ['SVM - RBF', cm[0][0], cm[1][1], cm[1][0], cm[0][1], metrics.accuracy_score(y_test, predicted), metrics.precision_score(y_test, predicted), metrics.recall_score(y_test, predicted), metrics.f1_score(y_test, predicted)]
table

Summary:


Unnamed: 0,Classifier,TP,TN,FP,FN,Accuracy,Precision,Recall,F1
0,SVM - RBF,17861,9,92,0,0.994878,1.0,0.089109,0.163636
