In [5]:
import os
import glob
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt

In [None]:
def get_tunning_reports(classifier_name: str) -> pd.DataFrame:
    tunning_reports = glob.glob(os.path.join(f"../data/results/{classifier_name}/tunning", "*.csv"))

    df = pd.concat((pd.read_csv(f, index_col=0) for f in tunning_reports), ignore_index=True)
    return df

In [None]:
knn = pd.read_csv("../data/results/k-nearest_neighbors/validation.csv", index_col=0)
plt.figure(figsize=(15, 5), dpi=200)
g = sns.boxplot(knn[["train_accuracy", "validation_accuracy",
                     "train_precision", "validation_precision",
                     "train_recall", "validation_recall",
                     "train_f1_score", "validation_f1_score"]])

g.set_xticklabels(["Train Accuracy", "Validation Accuracy",
                     "Train Precision", "Validation Precision",
                     "Train Recall", "Validation Recall",
                     "Train F1 score", "Validation F1 score"])
plt.title("KNN: train and validation metrics for balanced dataset (1/1)")
plt.savefig('../images/k-nearest_neighbors_validation_balanced.jpg', bbox_inches='tight');

In [None]:
knn_tunning = get_tunning_reports("k-nearest_neighbors")
g = sns.catplot(
    data=knn_tunning, x="n_neighbors", y="f1_mean_score", col="metric",
    kind="box", dodge=False,
)
g.fig.suptitle('KNN tunning for balanced dataset (1/1)')
g.fig.subplots_adjust(top=0.85)
g.set(xlabel="Number of Neighbors", ylabel="mean F1 score")
g.set_titles("Distance Metric = {col_name}")
plt.savefig('../images/k-nearest_neighbors_tunning_balanced.jpg', bbox_inches='tight')

In [None]:
logistic = pd.read_csv("../data/results/logistic_regression/validation.csv", index_col=0)
plt.figure(figsize=(15, 5), dpi=200)
g = sns.boxplot(logistic[["train_accuracy", "validation_accuracy",
                     "train_precision", "validation_precision",
                     "train_recall", "validation_recall",
                     "train_f1_score", "validation_f1_score"]])
g.set_xticklabels(["Train Accuracy", "Validation Accuracy",
                     "Train Precision", "Validation Precision",
                     "Train Recall", "Validation Recall",
                     "Train F1 score", "Validation F1 score"])
plt.title("Logistic Regression: train and validation metrics for balanced dataset (1/1)")
plt.savefig('../images/logistic_regression_validation_balanced.jpg', bbox_inches='tight');

In [None]:
logistic_tunning = get_tunning_reports("logistic_regression")
g = sns.catplot(
    data=logistic_tunning, x="C", y="f1_mean_score", col="penalty",
    kind="box", dodge=False,
)
g.fig.suptitle('Logistic Regression tunning for balanced dataset (1/1)')
g.fig.subplots_adjust(top=0.85)
plt.ylim([0.69,0.76])
g.set(xlabel="C", ylabel="mean F1 score")
g.set_titles("Penalty = {col_name}")
plt.savefig('../images/logistic_regression_tunning_balanced.jpg', bbox_inches='tight')

In [None]:
random_forest = pd.read_csv("../data/results/random_forest/validation.csv", index_col=0)
plt.figure(figsize=(15, 5), dpi=200)
g = sns.boxplot(random_forest[["train_accuracy", "validation_accuracy",
                     "train_precision", "validation_precision",
                     "train_recall", "validation_recall",
                     "train_f1_score", "validation_f1_score"]])
g.set_xticklabels(["Train Accuracy", "Validation Accuracy",
                     "Train Precision", "Validation Precision",
                     "Train Recall", "Validation Recall",
                     "Train F1 score", "Validation F1 score"])
plt.title("Random Forest: train and validation metrics for balanced dataset (1/1)")
plt.savefig('../images/random_forest_validation_balanced.jpg', bbox_inches='tight');

In [None]:
random_forest_tunning = get_tunning_reports("random_forest")
g = sns.catplot(
    data=random_forest_tunning, x="n_estimators", y="f1_mean_score", col="criterion",
    kind="box", dodge=False,
)
g.fig.suptitle('Random Forest tunning for balanced dataset (1/1)')
g.fig.subplots_adjust(top=0.85)
g.set(xlabel="Number of estimators", ylabel="mean F1 score")
g.set_titles("Criterion = {col_name}")
plt.savefig('../images/random_forest_tunning_balanced.jpg', bbox_inches='tight')