# Model Comparison

In [None]:
from pickle import load
from tensorflow.keras.models import load_model
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn import metrics
import numpy as np
from sklearn.metrics import classification_report

In [None]:
# Dependencies for cross val code
import matplotlib.pyplot as plt
from sklearn import model_selection
plt.style.use('ggplot')

## Load training sets

In [None]:
X_train_scaled = pd.read_csv("test_train_data/X_train_scaled.csv")
X_train_scaled_sk = pd.read_csv("test_train_data/X_train_scaled_sk.csv")
X_train_scaled_cs = pd.read_csv("test_train_data/X_train_scaled_cs.csv")
y_train_categorical = pd.read_csv("test_train_data/y_train_categorical.csv")
encoded_y_train = pd.read_csv("test_train_data/encoded_y_train.csv")

## Load testing set

In [None]:
X_test_scaled = pd.read_csv("test_train_data/X_test_scaled.csv")
X_test_scaled_sk = pd.read_csv("test_train_data/X_test_scaled_sk.csv")
X_test_scaled_cs = pd.read_csv("test_train_data/X_test_scaled_cs.csv")
encoded_y_test = pd.read_csv("test_train_data/encoded_y_test.csv")
y_test_categorical = pd.read_csv("test_train_data/y_test_categorical.csv")

## Load all Models

In [None]:
# knn = load(open('knn_trained.pkl', 'rb'))
knn_all = load(open('knn_trained_all.pkl', 'rb'))
knn_sk = load(open('knn_trained_sk.pkl', 'rb'))
knn_cs = load(open('knn_trained_cs.pkl', 'rb'))

In [None]:
# log_reg = load(open('logistic_model_trained.pkl', 'rb'))
log_reg_all = load(open('logistic_model_trained_all.pkl', 'rb'))
log_reg_sk = load(open('logistic_model_trained_sk.pkl', 'rb'))
log_reg_cs = load(open('logistic_model_trained_cs.pkl', 'rb'))

In [None]:
# deep_model = load_model("deep_model_trained.h5")
deep_model_all = load_model("deep_model_trained_all.h5")
deep_model_sk = load_model("deep_model_trained_sk.h5")
deep_model_cs = load_model("deep_model_trained_cs.h5")

In [None]:
# random_forest = load(open('randomforest_trained.pkl', 'rb'))
random_forest_all = load(open('randomforest_trained_all.pkl', 'rb'))
random_forest_sk = load(open('randomforest_trained_sk.pkl', 'rb'))
random_forest_cs = load(open('randomforest_trained_cs.pkl', 'rb'))

In [None]:
# svm = load(open('svm_model_trained.pkl', 'rb'))
svm_all = load(open('svm_model_trained_all.pkl', 'rb'))
svm_sk = load(open('svm_model_trained_sk.pkl', 'rb'))
svm_cs = load(open('svm_model_trained_cs.pkl', 'rb'))

## Evaluate all tuned Models against same test set

### KNN

In [None]:
knn_accuracy_all = metrics.accuracy_score(encoded_y_test, knn_all.predict(X_test_scaled))
knn_accuracy_all

In [None]:
knn_accuracy_sk = metrics.accuracy_score(encoded_y_test, knn_sk.predict(X_test_scaled_sk))
knn_accuracy_sk

In [None]:
knn_accuracy_cs = metrics.accuracy_score(encoded_y_test, knn_cs.predict(X_test_scaled_cs))
knn_accuracy_cs

### Logistic Regression

In [None]:
log_reg_accuracy_all = accuracy_score(encoded_y_test,log_reg_all.predict(X_test_scaled))
log_reg_accuracy_all

In [None]:
log_reg_accuracy_sk = accuracy_score(encoded_y_test,log_reg_sk.predict(X_test_scaled_sk))
log_reg_accuracy_sk

In [None]:
log_reg_accuracy_cs = accuracy_score(encoded_y_test,log_reg_cs.predict(X_test_scaled_cs))
log_reg_accuracy_cs

### Neural Netwrok

In [None]:
deep_model_loss_all, deep_model_accuracy_all = deep_model_all.evaluate(X_test_scaled, y_test_categorical, verbose=2)

In [None]:
deep_model_loss_sk, deep_model_accuracy_sk = deep_model_sk.evaluate(X_test_scaled_sk, y_test_categorical, verbose=2)

In [None]:
deep_model_loss_cs, deep_model_accuracy_cs = deep_model_cs.evaluate(X_test_scaled_cs, y_test_categorical, verbose=2)

### Random Forest

In [None]:
random_forest_accuracy_all = random_forest_all.score(X_test_scaled, y_test_categorical)
random_forest_accuracy_all

In [None]:
random_forest_accuracy_sk = random_forest_sk.score(X_test_scaled_sk, y_test_categorical)
random_forest_accuracy_sk

In [None]:
random_forest_accuracy_cs = random_forest_cs.score(X_test_scaled_cs, y_test_categorical)
random_forest_accuracy_cs

### SVM

In [None]:
svm_accuracy_all = metrics.accuracy_score(encoded_y_test, svm_all.predict(X_test_scaled))
svm_accuracy_all

In [None]:
svm_accuracy_sk = metrics.accuracy_score(encoded_y_test, svm_sk.predict(X_test_scaled_sk))
svm_accuracy_sk

In [None]:
svm_accuracy_cs = metrics.accuracy_score(encoded_y_test, svm_cs.predict(X_test_scaled_cs))
svm_accuracy_cs 

In [None]:
model_labels = ['knn_accuracy_all', 'log_reg_accuracy_all', 'deep_model_accuracy_all', 'random_forest_accuracy_all', 'svm_accuracy_all',
               'knn_accuracy_sk', 'log_reg_accuracy_sk', 'deep_model_accuracy_sk', 'random_forest_accuracy_sk', 'svm_accuracy_sk',
               'knn_accuracy_cs', 'log_reg_accuracy_cs', 'deep_model_accuracy_cs', 'random_forest_accuracy_cs', 'svm_accuracy_cs']


In [None]:
model_accuracy = [knn_accuracy_all, log_reg_accuracy_all, deep_model_accuracy_all, random_forest_accuracy_all, svm_accuracy_all,
 knn_accuracy_sk, log_reg_accuracy_sk, deep_model_accuracy_sk, random_forest_accuracy_sk, svm_accuracy_sk,
 knn_accuracy_cs, log_reg_accuracy_cs, deep_model_accuracy_cs, random_forest_accuracy_cs, svm_accuracy_cs
]

In [None]:

x_pos = np.arange(len(model_labels))

# Create bars
plt.bar(x_pos, model_accuracy, color=(0.2, 0.4, 0.6, 0.6))

# Create names on the x-axis
plt.xticks(x_pos, model_labels)

# Show graph
plt.show()

## Comparison using KFold Cross Validation with All Features

In [None]:
models = []
models.append(('KNN', knn_all))
models.append(('LR', log_reg_all))
# models.append(('RF', random_forest_all))
models.append(('SVM', svm_all))

In [None]:
models

In [None]:
encoded_y_train

In [None]:
encoded_y_train_array = encoded_y_train.values.reshape(-1)

In [None]:
results = []
names = []
scoring = 'accuracy'
for name, model in models:
    print(model)
    kfold = model_selection.KFold(n_splits=10, random_state=None)
    
    if (model in (random_forest_all)):
        print(f'{model} - Model found')
        cv_results = model_selection.cross_val_score(model, X_train_scaled, y_train_categorical, cv=kfold, scoring=scoring)
    
    cv_results = model_selection.cross_val_score(model, X_train_scaled, encoded_y_train, cv=kfold, scoring=scoring)
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)

In [None]:
figure = plt.figure(figsize=(10,10))
figure.suptitle('Model Comparison using Kfold Cross Validation')
ax = figure.add_subplot(111)
box = ax.boxplot(results,
                     notch=True,  
                     vert=True,  
                     patch_artist=True,
                     labels=names) 

colors = ['lightyellow', 'lightblue', 'lightgreen']
for patch, color in zip(box['boxes'], colors):
    patch.set_facecolor(color)

plt.show()