In [1]:
import joblib
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score
from time import time

In [2]:
X_validation = pd.read_csv("Data\X_validation.csv")
y_validation = pd.read_csv("Data\y_validation.csv")

X_test = pd.read_csv("Data\X_test.csv")
y_test = pd.read_csv("Data\y_test.csv")

In [3]:
models = dict()

for m in ["LR", "SVM", "MLP", "RF", "GB"]:
    models[m] = joblib.load(f"Models\\{m}_model.pkl".format(m))

In [4]:
print(models)

{'LR': LogisticRegression(C=10), 'SVM': SVC(C=10, gamma=0.001, kernel='poly'), 'MLP': MLPClassifier(activation='tanh'), 'RF': RandomForestClassifier(max_depth=8, n_estimators=5), 'GB': GradientBoostingClassifier(learning_rate=1, max_depth=1, n_estimators=50)}


In [5]:
#evaluate accuracy, precision, and recall

def evaluate_model (name, model, features, labels): 
    start = time()
    pred = model.predict(features)
    end = time()
    totalTime = round((end - start) * 1000, 3)
    accuracy = round(accuracy_score(labels, pred), 3)
    precision = round(precision_score(labels, pred), 3)
    recall = round(recall_score(labels, pred), 3)
    print(f"{name}\t Accuracy: {accuracy} / Precision: {precision} / Recall: {recall} / Latency: {totalTime}ms")

In [6]:
for name, model in models.items():
    evaluate_model(name, model, X_validation, y_validation)

LR	 Accuracy: 0.984 / Precision: 0.983 / Recall: 1.0 / Latency: 2.519ms
SVM	 Accuracy: 0.968 / Precision: 0.967 / Recall: 1.0 / Latency: 2.335ms
MLP	 Accuracy: 0.952 / Precision: 0.951 / Recall: 1.0 / Latency: 25.921ms
RF	 Accuracy: 0.919 / Precision: 0.949 / Recall: 0.966 / Latency: 1.041ms
GB	 Accuracy: 0.968 / Precision: 0.983 / Recall: 0.983 / Latency: 0.0ms


### Evaluate best model on test dataset

In [7]:
evaluate_model("LR", models["LR"], X_test, y_test)

LR	 Accuracy: 0.935 / Precision: 0.932 / Recall: 1.0 / Latency: 3.001ms
