In [None]:
%load_ext autoreload
%autoreload 2

from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import metrics

from tweet_classification.constants import (
    GRAPHS_PATH,
    HUMAN_EXPERIMENT_NR,
    INFO_EXPERIMENT_NR,
    RESULTS_PATH,
)
from tweet_classification.utils import read_en_humanitarian_data as read_human_data
from tweet_classification.utils import read_en_informativeness_data as read_info_data

HUMAN_RESULTS_PATH = RESULTS_PATH / "human_results" / f"exp_{HUMAN_EXPERIMENT_NR}"
HUMAN_GRAPHS_PATH = GRAPHS_PATH / "human" / f"exp_{HUMAN_EXPERIMENT_NR}"

INFO_RESULTS_PATH = RESULTS_PATH / "info_results" / f"exp_{INFO_EXPERIMENT_NR}"
INFO_GRAPHS_PATH = GRAPHS_PATH / "info" / f"exp_{INFO_EXPERIMENT_NR}"

# Comparison

In [None]:
def plot_performance_comparison(df: pd.DataFrame, title: str, savepath: Path | None = None):
    metrics = df.columns

    x = np.arange(len(metrics))

    width = 0.4

    paper_results = df.loc[df.index[0]].tolist()
    bert_results = df.loc[df.index[1]].tolist()

    fig, ax = plt.subplots(layout="constrained", figsize=(10, 6))

    ax.set_axisbelow(True)
    ax.grid(True, linestyle="dotted")

    ax.bar(x - width / 2, paper_results, width=width, label="CNN Results (Paper)", color="#4C72B0")
    ax.bar(
        x + width / 2,
        bert_results,
        width=width,
        label="Pretrained Bert + Fine-tuning",
        color="#55A868",
    )

    for container in ax.containers:
        ax.bar_label(container, padding=3, fontsize=12, fontweight="bold")

    ax.set_xticks(x)
    ax.set_xticklabels(metrics, fontsize=13, fontweight="bold")
    ax.set_ylabel("Metric Value (%)", fontsize=16, fontweight="bold")
    ax.set_title(title, fontsize=18, fontweight="bold", pad=15)
    ax.legend(fontsize=16)
    plt.ylim(min(paper_results + bert_results) - 0.05, max(bert_results + paper_results) + 0.05)
    plt.show()

    if savepath:
        fig.savefig(savepath)

In [None]:
paper_human_perf = pd.DataFrame.from_dict(
    {"accuracy": [0.835], "precision": [0.827], "recall": [0.840], "f1": [0.829]}
)
paper_human_perf.index = ["cnn_paper_results"]

perf_columns = ["accuracy", "precision", "recall", "f1"]
human_perf = pd.read_csv(HUMAN_RESULTS_PATH / "performance_metrics.csv")[perf_columns]
for col in perf_columns:
    human_perf[col] = human_perf[col].map(lambda x: round(x, 3))
human_perf.index = ["bert_fine_tuned_results"]

human_results_comparison = pd.concat([paper_human_perf, human_perf])
print(human_results_comparison)

In [None]:
plot_performance_comparison(
    human_results_comparison,
    title="Model Performance Comparison (Humanitarian)",
    savepath=HUMAN_GRAPHS_PATH / "performance_comparison.png",
)

In [None]:
paper_info_perf = pd.DataFrame.from_dict(
    {"accuracy": [0.872], "precision": [0.866], "recall": [0.870], "f1": [0.866]}
)

paper_info_perf.index = ["cnn_paper_results"]

info_perf = pd.read_csv(INFO_RESULTS_PATH / "performance_metrics.csv")[perf_columns]
for col in perf_columns:
    info_perf[col] = info_perf[col].map(lambda x: round(x, 3))
info_perf.index = ["bert_fine_tuned_results"]

info_results_comparison = pd.concat([paper_info_perf, info_perf])
print(info_results_comparison)

In [None]:
plot_performance_comparison(
    info_results_comparison,
    title="Model Performance Comparison (Informativeness)",
    savepath=INFO_GRAPHS_PATH / "performance_comparison.png",
)

# Prediction Matrix

In [None]:
human_train_df, _, _ = read_human_data()

human_labels_to_nrs = {
    label: i for i, label in enumerate(human_train_df["class_label"].unique().tolist())
}

In [None]:
human_pred_vs_label = pd.read_csv(HUMAN_RESULTS_PATH / "predictions.csv")
human_preds = human_pred_vs_label["predictions"].tolist()
human_actual = human_pred_vs_label["true_labels"].tolist()

human_confusion_matrix = metrics.confusion_matrix(human_actual, human_preds)
human_labels = list(human_labels_to_nrs.keys())

human_cm_display = metrics.ConfusionMatrixDisplay(
    confusion_matrix=human_confusion_matrix, display_labels=human_labels
)

fig, ax = plt.subplots(figsize=(12, 8))
human_cm_display.plot(
    ax=ax, cmap="Blues", colorbar=True, values_format="d", xticks_rotation=45, include_values=True
)

ax.set_xticks(np.arange(len(human_labels)))
ax.set_yticks(np.arange(len(human_labels)))
ax.set_xticklabels(human_labels, rotation=45, fontsize=13, ha="right", fontweight="bold")
ax.set_yticklabels(human_labels, fontsize=13, fontweight="bold")
ax.set_title("(Humanitarian) Predictions vs True Labels", fontsize=20, fontweight="bold", pad=20)
plt.grid(False)
plt.ylabel("True Label", fontsize=20, fontweight="bold")
plt.xlabel("Predicted Label", fontsize=20, fontweight="bold")
plt.savefig(HUMAN_GRAPHS_PATH / "conf_matrix.png")
plt.show();

In [None]:
info_train_df, _, _ = read_info_data()

info_labels_to_nrs = {
    label: i for i, label in enumerate(info_train_df["class_label"].unique().tolist())
}

In [None]:
info_pred_vs_label = pd.read_csv(INFO_RESULTS_PATH / "predictions.csv")
info_preds = info_pred_vs_label["predictions"].tolist()
info_actual = info_pred_vs_label["true_labels"].tolist()

info_conf_matrix = metrics.confusion_matrix(info_actual, info_preds)
info_cm_display = metrics.ConfusionMatrixDisplay(
    confusion_matrix=info_conf_matrix, display_labels=[0, 1]
)

fig, ax = plt.subplots(figsize=(8, 6))
info_cm_display.plot(ax=ax, cmap="Blues", colorbar=True, values_format="d")
plt.title("(Informativeness) Predictions vs True Labels", fontsize=20, fontweight="bold", pad=20)
plt.xticks(fontsize=15, fontweight="bold")
plt.yticks(fontsize=15, fontweight="bold")
plt.grid(False)
plt.ylabel("True Label", fontsize=20, fontweight="bold", labelpad=15)
plt.xlabel("Predicted Label", fontsize=20, fontweight="bold", labelpad=15)
plt.savefig(INFO_GRAPHS_PATH / "conf_matrix.png")
plt.show();