In [2]:
import joblib
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score
from time import time

In [19]:
X_validation = pd.read_csv("Data\X_validation.csv")
y_validation = pd.read_csv("Data\y_validation.csv")

X_test = pd.read_csv("Data\X_test.csv")
y_test = pd.read_csv("Data\y_test.csv")

In [4]:
models = dict()

for m in ["LR", "SVM", "MLP", "RF", "GB"]:
    models[m] = joblib.load(f"Models\\{m}_model.pkl".format(m))

In [5]:
print(models)

{'LR': LogisticRegression(C=1), 'SVM': SVC(C=0.1, gamma=0.01, kernel='poly'), 'MLP': MLPClassifier(activation='tanh'), 'RF': RandomForestClassifier(max_depth=16, n_estimators=50), 'GB': GradientBoostingClassifier(learning_rate=100, n_estimators=250)}


In [17]:
#evaluate accuracy, precision, and recall

def evaluate_model (name, model, features, labels): 
    start = time()
    pred = model.predict(features)
    end = time()
    totalTime = round((end - start) * 1000, 3)
    accuracy = round(accuracy_score(labels, pred), 3)
    precision = round(precision_score(labels, pred), 3)
    recall = round(recall_score(labels, pred), 3)
    print(f"{name}\t Accuracy: {accuracy} / Precision: {precision} / Recall: {recall} / Latency: {totalTime}ms")

In [18]:
for name, model in models.items():
    evaluate_model(name, model, X_validation, y_validation)

LR	 Accuracy: 0.855 / Precision: 0.862 / Recall: 0.98 / Latency: 4.109ms
SVM	 Accuracy: 0.871 / Precision: 0.877 / Recall: 0.98 / Latency: 4.002ms
MLP	 Accuracy: 0.823 / Precision: 0.823 / Recall: 1.0 / Latency: 2.85ms
RF	 Accuracy: 0.871 / Precision: 0.877 / Recall: 0.98 / Latency: 24.374ms
GB	 Accuracy: 0.839 / Precision: 0.86 / Recall: 0.961 / Latency: 4.052ms


### Evaluate best model on test dataset

In [20]:
evaluate_model("SVM", models["SVM"], X_test, y_test)

SVM	 Accuracy: 0.839 / Precision: 0.875 / Recall: 0.942 / Latency: 4.883ms
