# Classification Model Metrics - Tables and Visualizations

## Implementation

The purpose of this notebook is to generate the tables and visualizations for classification metrics results from each scenario based on the .csv files.

This notebook is addressed to the complete 4.2 section of the thesis.

## Step 1 - Importing Dependencies

- Importing the necessary libraries to execute the code.

In [2]:
import pandas as pd
import glob
import matplotlib.pyplot as plt
import seaborn as sns

## Step 2 - Data Loading

- Loading multiple csv files for the desired analysis.

In [63]:
path = r'/kaggle/input/422-analysis-10'
all_files = glob.glob(path + "/*.csv")

dfs = []
for filename in all_files:
    dfs.append(pd.read_csv(filename))

## Step 3 - Defining the Table Builder Function

- Builds the table with the metric results for the thesis analysis.

In [64]:
def table_builder(f1_type, precision_type, recall_type, df):
    filtered_result = df[df["key"] == f1_type]

    max_index = filtered_result["value_mean"].idxmax()

    max_step = filtered_result.loc[max_index, "step"]

    same_step_df = df[df["step"] == max_step]

    scenario = filtered_result.loc[max_index, "scenario"]
    max_f1_score = filtered_result.loc[max_index, "value_mean"]
    max_std = filtered_result.loc[max_index, "value_std"]
    max_precision = same_step_df[same_step_df["key"] == precision_type]["value_mean"].values[0]
    std_precision = same_step_df[same_step_df["key"] == precision_type]["value_std"].values[0]
    max_recall = same_step_df[same_step_df["key"] == recall_type]["value_mean"].values[0]
    std_recall = same_step_df[same_step_df["key"] == recall_type]["value_std"].values[0]

    f1_result = pd.DataFrame({"scenario": [scenario],
                            f1_type: [max_f1_score],
                           "std_f1": [max_std],
                           precision_type: [max_precision],
                           "std_precision": [std_precision],
                           recall_type: [max_recall],
                           "std_recall": [std_recall],
                           "step": [max_step]})

    return f1_result

- Macro F1-Score table

In [None]:
f1_macro = []
for i in range(len(all_files)):
    f1_macro.append(table_builder(f1_type="test_f1_score_macro",
                         precision_type="test_precision_macro",
                         recall_type="test_recall_macro",
                         df = dfs[i]))

macro_df = pd.concat(f1_macro, axis=0, ignore_index=True)
macro_df.sort_values('scenario', ascending=True)

- Weighted F1-Score table

In [None]:
f1_weight = []
for i in range(len(all_files)):
    f1_weight.append(table_builder(f1_type="test_f1_score_weight",
                         precision_type="test_precision_weight",
                         recall_type="test_recall_weight",
                         df = dfs[i]))

weight_df = pd.concat(f1_weight, axis=0, ignore_index=True)
weight_df.sort_values('scenario', ascending=True)

## Step 4 - Definig the Chart Builder Data and Function

- Transforming the data for the charts

In [None]:
df = pd.concat(dfs, axis=0, ignore_index=True).drop('Unnamed: 0', axis=1)

- Defining the function to plot the charts visualizations for the thesis.

In [80]:
def plot_chart(key, df, axis):
    key = key
    value_col = 'value_mean'
    scenario_col = 'scenario'

    filtered_result = df[df['key'] == key]

    scenarios = filtered_result[scenario_col].unique().tolist()
    scenarios.sort()

    sns.set_style("whitegrid")
    sns.set_palette("bright")
    
    fig, ax = plt.subplots()
    for scenario in scenarios:
        scenario_result = filtered_result[filtered_result[scenario_col] == scenario]
        if "scenario 3" in scenario:
            linestyle = "dotted"
        elif "scenario 4" in scenario:
            linestyle = "--"
        else:
            linestyle = "-"
        ax.plot(scenario_result['step'], scenario_result[value_col], linewidth=1, 
                label=scenario.upper(), linestyle=linestyle)
        ax.fill_between(scenario_result['step'], 
                        scenario_result[value_col] - scenario_result['value_std'], 
                        scenario_result[value_col] + scenario_result['value_std'], 
                        alpha=0.15)

        
    max_val = filtered_result[value_col].max()
    min_val = filtered_result[value_col].min()
    
    ax.xaxis.label.set_color('black')
    ax.yaxis.label.set_color('black')
    ax.title.set_color('black')
    ax.tick_params(axis='x', colors='black')
    ax.tick_params(axis='y', colors='black')
    
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width, box.height])
    ax.set_xlabel('Training epochs')
    ax.set_ylabel(f'{axis} test F1-Score')
    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), frameon=False)
    
    for spine in ax.spines.values():
        spine.set_edgecolor('black')

    plt.savefig(f'{key}.png', dpi=500, bbox_inches='tight')
    plt.savefig(f'{key}.pdf', format='pdf', dpi=800, bbox_inches='tight')
    plt.show()

- Macro F1-Score chart

In [None]:
plot_chart(key="test_f1_score_macro", df=df, axis="Macro")

- Weighted F1-Score chart

In [None]:
plot_chart(key="test_f1_score_weight", df=df, axis="Weighted")

## Important!

- All the results of the previouly defined tables and charts can be found in the section 4.2 of the thesis.