Import Libraries

In [18]:
import joblib
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay

Model Evaluation

In [None]:
# loading models from saved .pkl files ->
lr_model = joblib.load("../models/lr_model.pkl")
gb_model = joblib.load("../models/knn_model.pkl")
dt_model = joblib.load("../models/dt_model.pkl")
rf_model = joblib.load("../models/rf_model.pkl")
gb_model = joblib.load("../models/gb_model.pkl")
svm_model = joblib.load("../models/svm_model.pkl")

# loading testing data from stored processed files ->
X_test = pd.read_csv("../data/processed/X_test.csv")
y_test = pd.read_csv("../data/processed/y_test.csv")

Logistic Regression

In [None]:
y_pred_lr = lr_model.predict(X_test)

accuracy_lr = accuracy_score(y_test,y_pred_lr)
print(f"accuracy : {accuracy_lr*100:.4f}%")
# accuracy gives an overall performance snapshot, but is not a good metric since our dataset involves imbalanced classes
# baseline accuracy for our model : accuracy of making a correct prediction by random quessing
# in our case if we just predict the majority class -> 0 -> the accuracy of the model = 44.68%
# therefore we need to analyse other metrics as well 


# these scores are expected as logistic regression is a linear model and medical data in general have non-linear behaviour

cm = confusion_matrix(y_test,y_pred_lr)
disp = ConfusionMatrixDisplay(confusion_matrix=cm,display_labels=[0,1,2,3,4])
plt.figure(figsize=(6,6))
disp.plot(cmap='Purples', values_format='d')
plt.title('Confusion Matrix for Logistic Regression Model')
plt.show()

In [None]:
gb_model = joblib.load("../models/knn_model.pkl")

y_pred_knn = gb_model.predict(X_test)

cm = confusion_matrix(y_test,y_pred_knn)
disp = ConfusionMatrixDisplay(confusion_matrix=cm,display_labels=[0,1,2,3,4])
plt.figure(figsize=(6,6))
disp.plot(cmap='Purples', values_format='d')
plt.title('Confusion Matrix for k-nearest neighbours Model')
plt.show()

In [None]:
dt_model = joblib.load("../models/dt_model.pkl")

y_pred_dt = dt_model.predict(X_test)

cm = confusion_matrix(y_test,y_pred_dt)
disp = ConfusionMatrixDisplay(confusion_matrix=cm,display_labels=[0,1,2,3,4])
plt.figure(figsize=(6,6))
disp.plot(cmap='Purples', values_format='d')
plt.title('Confusion Matrix for decision trees Model')
plt.show()

In [None]:
gb_model = joblib.load("../models/gb_model.pkl")

y_pred_gb = gb_model.predict(X_test)

cm = confusion_matrix(y_test,y_pred_gb)
disp = ConfusionMatrixDisplay(confusion_matrix=cm,display_labels=[0,1,2,3,4])
plt.figure(figsize=(6,6))
disp.plot(cmap='Purples', values_format='d')
plt.title('Confusion Matrix for gradient boosting Model')
plt.show()

In [None]:
rf_model = joblib.load("../models/rf_model.pkl")

y_pred_rf = rf_model.predict(X_test)

cm = confusion_matrix(y_test,y_pred_rf)
disp = ConfusionMatrixDisplay(confusion_matrix=cm,display_labels=[0,1,2,3,4])
plt.figure(figsize=(6,6))
disp.plot(cmap='Purples', values_format='d')
plt.title('Confusion Matrix for Random Forest Model')
plt.show()