In [None]:
import pandas as pd
import pickle
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from sklearn import metrics
import csv
from tensorflow.keras.models import load_model
from sklearn.metrics import (
    recall_score,
    precision_score,
    f1_score,
    accuracy_score,
    plot_confusion_matrix,
    confusion_matrix,
)
import warnings

# Ignore the warning message
warnings.filterwarnings("ignore", category=FutureWarning)

In [None]:
def calculateMetrics(y_test, pred):
    """
    function calculate metrics for dte, logreg, svm
    returns scores
    """
    accuracy = accuracy_score(y_test, pred)
    precision = precision_score(y_test, pred)
    recall = recall_score(y_test, pred)
    f1 = f1_score(y_test, pred)

    print("Accuracy: %.8f" % accuracy)
    print("Precision: %.8f" % precision)
    print("Recall: %.8f" % recall)
    print("F1: %.8f" % f1)

    return accuracy, precision, recall, f1


def calculateMetricsNN(clf, X_test, y_test):
    """
    function calculates metrics for the nn model
    returns scores
    """
    predicted = clf.predict(X_test, verbose=0)
    predicted_classes = (clf.predict(X_test) > 0.5).astype("int32")

    # reduce to 1d array
    predicted = predicted[:, 0]
    predicted_classes = predicted_classes[:, 0]

    accuracy = accuracy_score(y_test, predicted_classes)
    precision = precision_score(y_test, predicted_classes)
    recall = recall_score(y_test, predicted_classes)
    f1 = f1_score(y_test, predicted_classes)

    print("Accuracy: %.8f" % accuracy)
    print("Precision: %.8f" % precision)
    print("Recall: %.8f" % recall)
    print("F1: %.8f" % f1)

    return accuracy, precision, recall, f1, predicted, predicted_classes


def featureImportance(clf):
    """
    function calculates featureImportance
    """
    featureImportance = pd.DataFrame(columns=["Feature", "Score"])
    importance = clf.feature_importances_
    for p, v in enumerate(importance):
        featureImportance = featureImportance.append(
            {"Feature": p, "Score": v}, ignore_index=True
        )
    plt.figure(figsize=(20, 5))
    ax = sns.barplot(
        x=featureImportance.Feature,
        y=featureImportance.Score,
        data=featureImportance,
        color="#00338d",
    )

    # save to csv
    featureImportance.to_csv("featureImportance_dte.csv")


def confusionMatrix(y_test, predicted, clf, X_test):
    """
    plot confusion matric
    """
    print(metrics.confusion_matrix(y_test, predicted))
    plot_confusion_matrix(clf, X_test, y_test)
    plt.show()


def predict_y(clf, X_test):
    """
    predict y
    """
    predicted = clf.predict(X_test)
    return predicted


def getProbability(clf, X_test):
    """
    get probability
    """
    probability = clf.predict_proba(X_test)
    t = probability[:, :1].tolist()
    df_prob = pd.DataFrame(t)
    return df_prob


def precision_recall_curve(y_test, df_prob):
    """
    plot precision recall curve
    """
    lr_precision, lr_recall, _ = precision_recall_curve(y_test, df_prob)

    # plot the precision-recall curves
    no_skill = len(y_test[y_test == 1]) / len(y_test)
    plt.plot([0, 1], [no_skill, no_skill], linestyle="--", label="No Skill")
    plt.plot(lr_recall, lr_precision, marker=".")
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.legend()
    plt.show()

Decision Tree

In [None]:
# load
DTE_model = pickle.load(open("../01_decision_tree/Decisiontreemodel_3months.pkl", "rb"))
X_test_DTE = pickle.load(open("../01_decision_tree/X_test_3months.pkl", "rb"))
y_test_DTE = pickle.load(open("../01_decision_tree/y_test_3months.pkl", "rb"))

# get scores and prediction
pred = predict_y(DTE_model, X_test_DTE)
accuracy, precision, recall, f1 = calculateMetrics(y_test_DTE, pred)

# plot confusion matrix
confusionMatrix(y_test_DTE, pred, DTE_model, X_test_DTE)

# plot feature importance
featureImportance(DTE_model)

Logistic Regression

In [None]:
# load
logreg_model = pickle.load(
    open("../02_logistic_regression/Logregmodel_3months.pkl", "rb")
)
X_test_logreg = pickle.load(open("../02_logistic_regression/X_test_3months.pkl", "rb"))
y_test_logreg = pickle.load(open("../02_logistic_regression/y_test_3months.pkl", "rb"))

# get scores and prediction
pred = predict_y(logreg_model, X_test_logreg)
accuracy, precision, recall, f1 = calculateMetrics(y_test_logreg, pred)

# plot confusion matrix
confusionMatrix(y_test_logreg, pred, logreg_model, X_test_logreg)

# plot histogram with porbabilities
df_prob = pickle.load(open("../02_logistic_regression/df_prob_3months.pkl", "rb"))
sns.histplot(data=df_prob, x=df_prob[0], palette="dark:#5A9_r")

SVM

In [None]:
# load
svm_model = pickle.load(open("../03_svm/SVMmodel_3months.pkl", "rb"))
X_test_svm = pickle.load(open("../03_svm/X_test_3months.pkl", "rb"))
y_test_svm = pickle.load(open("../03_svm/y_test_3months.pkl", "rb"))

# get scores and prediction
pred = predict_y(svm_model, X_test_svm)
accuracy, precision, recall, f1 = calculateMetrics(y_test_svm, pred)

# plot confusion matrix
confusionMatrix(y_test_svm, pred, svm_model, X_test_svm)

Neural Network

In [None]:
# load
nn_model = load_model("../04_nn/nn/")
X_test_nn = pickle.load(open("../04_nn/X_test_3months.pkl", "rb"))
y_test_nn = pickle.load(open("../04_nn/y_test_3months.pkl", "rb"))

# get scores and prediction
accuracy, precision, recall, f1, predicted, predicted_classes = calculateMetricsNN(
    nn_model, X_test_nn, y_test_nn
)

# plot confusion matrix
cm = metrics.confusion_matrix(y_test_nn, predicted_classes)
f = sns.heatmap(cm, annot=True, fmt="d")