In [None]:
%load_ext autoreload
%autoreload 2

import os
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from tweet_classification.constants import (
    GRAPHS_PATH,
    HUMAN_EXPERIMENT_NR,
    INFO_EXPERIMENT_NR,
    RESULTS_PATH,
)

HUMAN_RESULTS_PATH = RESULTS_PATH / "human_results" / f"exp_{HUMAN_EXPERIMENT_NR}"
HUMAN_GRAPHS_PATH = GRAPHS_PATH / "human" / f"exp_{HUMAN_EXPERIMENT_NR}"

INFO_RESULTS_PATH = RESULTS_PATH / "info_results" / f"exp_{INFO_EXPERIMENT_NR}"
INFO_GRAPHS_PATH = GRAPHS_PATH / "info" / f"exp_{INFO_EXPERIMENT_NR}"

In [None]:
def plot_performance_comparison(df: pd.DataFrame):
    metrics = df.columns

    x = np.arange(len(metrics))

    width = 0.4

    paper_results = df.loc[df.index[0]].tolist()
    bert_results = df.loc[df.index[1]].tolist()

    fig, ax = plt.subplots(layout="constrained")

    ax.bar(x - width / 2, paper_results, width=width, label="Paper", color="#4C72B0")
    ax.bar(x + width / 2, bert_results, width=width, label="Bert", color="#55A868")

    ax.set_xticks(x)
    ax.set_xticklabels(metrics)
    ax.set_ylabel("Metric Value")
    ax.set_title("Model Performance Comparison")
    ax.legend()

    # plt.ylim(0.6, 1.05)

    plt.show()

In [None]:
paper_human_perf = pd.DataFrame.from_dict(
    {"accuracy": [0.835], "precision": [0.827], "recall": [0.840], "f1": [0.829]}
)
paper_human_perf.index = ["cnn_paper_results"]

perf_columns = ["accuracy", "precision", "recall", "f1"]
human_perf = pd.read_csv(HUMAN_RESULTS_PATH / "performance_metrics.csv")[perf_columns]
for col in perf_columns:
    human_perf[col] = human_perf[col].map(lambda x: round(x, 3))
human_perf.index = ["bert_fine_tuned_results"]

human_results_comparison = pd.concat([paper_human_perf, human_perf])
print(human_results_comparison)

In [None]:
plot_performance_comparison(human_results_comparison)

In [None]:
paper_info_perf = pd.DataFrame.from_dict(
    {"accuracy": [0.872], "precision": [0.866], "recall": [0.870], "f1": [0.866]}
)

paper_info_perf.index = ["cnn_paper_results"]

info_perf = pd.read_csv(INFO_RESULTS_PATH / "performance_metrics.csv")[perf_columns]
for col in perf_columns:
    info_perf[col] = info_perf[col].map(lambda x: round(x, 3))
info_perf.index = ["bert_fine_tuned_results"]

info_results_comparison = pd.concat([paper_info_perf, info_perf])
print(info_results_comparison)