In [1]:
import json
from pathlib import Path
from random import shuffle
import baycomp
import numpy as np
import polars as pl

from config import ModelConfig
from util import create_compute_metrics



model_dir = Path("./Models/")

In [2]:
def select_metric(metric_name, metrics, int2model):
    return np.array([[float(m[metric_name]) for m in metrics[int2model[i]]] for i in int2model]).T

In [3]:
def sample(evals, num_samples, seed=42):
    n = list(evals.values())[0].shape[0]
    idx = list(range(n))
    rng = np.random.default_rng(seed)
    rng.shuffle(idx)
    sample_size = n // num_samples
    evals = { k : [v[idx[i:i+sample_size]] for i in range(0, sample_size*num_samples, sample_size)] for k, v in evals.items()}
    return evals
    

In [4]:
compute_metrics = create_compute_metrics(5, argmax_first=False)

In [5]:
evals = {}
for root in Path("Models").iterdir():
    for model in root.iterdir():
        evals[model.name] = pl.read_csv(model / 'eval.csv')

In [6]:
s = sample(evals, 50)
metrics = { 
    '-'.join(k.split('-')[:2]) : [
    compute_metrics(
        y['y_pred'].to_numpy(),
        y['y_true'].to_numpy()) for y in v
    ] for k, v in s.items()
}

In [7]:
models2int = {m : i for i, m in enumerate(metrics.keys()) }
int2model = { i :  m for m, i in models2int.items()}
N = len(metrics[int2model[0]])
M = len(models2int)
ALPHA = 0.05

In [8]:
selected_metrics = ['f1_micro']
computed_metrics = {}
for m in selected_metrics:
    ys = select_metric(m, metrics, int2model)
    computed_metrics[m] = [ys.mean(axis=0), ys.std(axis=0)]


In [9]:
cms = { k : np.array(compute_metrics(v['y_pred'].to_numpy(), v['y_true'].to_numpy())['cm']).reshape(M, M) for k, v in evals.items()}

In [10]:
from sklearn.metrics import ConfusionMatrixDisplay

In [11]:
import matplotlib.pyplot as plt

In [12]:
labels = sorted(["Entertainment", "Juridical", "Instructional", "Journalistic", "Virtual"])

In [13]:
computed_metrics['f1_micro']

[array([0.7313722 , 0.88504933, 0.87902018, 0.8761491 , 0.87884417]),
 array([0.00303537, 0.00223536, 0.00215734, 0.002282  , 0.00217145])]

In [14]:
def plot_confusion_matrix(model_name, title):
    cmd = ConfusionMatrixDisplay(
        cms[model_name],
        display_labels=labels,
    )
    #plt.xticks(rotation=45)
    #plt.figure(figsize=(20, 20))
    disp = cmd.plot(values_format='d', cmap='terrain')
    plt.xticks(fontsize=14, rotation=45)
    plt.yticks(fontsize=14)
    plt.ylabel('Domínio Verdadeiro', fontsize=16)
    plt.xlabel('Domínio Previsto', fontsize=16)
    plt.title(title, fontsize=18)
 
    plt.savefig(f'{model_name}.pdf', bbox_inches="tight")

In [15]:
%%capture
plot_confusion_matrix('naive-bayes', 'Naive Bayes')
plot_confusion_matrix('bert-large-portuguese-cased', 'BERT Large')
plot_confusion_matrix('bert-base-portuguese-cased', 'BERT Base')
plot_confusion_matrix('albertina-900m-portuguese-ptbr-encoder', 'Albertina 900m')
plot_confusion_matrix('albertina-100m-portuguese-ptbr-encoder', 'Albertina 100m')
!zip -r confusao.zip *.pdf && rm *.pdf

In [16]:
def stringfy(metrics):
    out = [f"{metrics[0][i]:.3f} ({metrics[1][i]:.3f})" for i in range(M)]
    return out

cols = dict(
    models=[int2model[i] for i in range(M)],
    **{ k : stringfy(v) for k, v in computed_metrics.items()}
)

In [17]:
avgs = pl.DataFrame(cols)

In [18]:
avgs.write_csv("avgs.csv")

In [19]:
from baycomp import SignedRankTest, SignTest

In [21]:
%%capture
def baycomp_latex_table(metric_name, *, highlight_color="EF8C40"):
    xs = select_metric(metric_name, metrics, int2model)
    begin = "\\begin{tabular}{@{}lllll@{}}\\hline\n\\toprule\n"
    center = lambda s: f"\\multicolumn{{1}}{{c|}}{{{s}}}"
    rightr = lambda s: f"\\multicolumn{{1}}{{r|}}{{{s}}}"
    lefter = lambda s: f"\\multicolumn{{1}}{{l}}{{{s}}}"
    header = [
        rightr('Modelo 1'),
        center('\\textgreater{}'),
        center('='),
        center('\\textless{}'),
        lefter('Modelo 2') + '\\\\ \\midrule\n'
    ]
    header = f"\\multicolumn{{5}}{{c}}{{{metric_name.title().replace('_', ' ')}}}\\\\ \\midrule\n" + ' & '.join(header)

    ism = lambda t, i: f"\\cellcolor[HTML]{{{highlight_color}}}{t[i]:.2f}" if max(t) == t[i] else f"{t[i]:.2f}"
    create_row = lambda t: f"%s & {center(ism(t, 0))} & {center(ism(t, 1))} & {center(ism(t, 2))} & %s\\\\\n"
    end = f"\n\\end{{tabular}}\n"

    output = begin + header
    for i in range(M):
        for j in range(i+1, M):
            t = SignedRankTest.probs(xs[:, i], xs[:, j], rope=0.001)
            output += create_row(t) % (rightr(int2model[i]), lefter(int2model[j]))
    output += end
    return output

for cm in computed_metrics.keys():
    with open(cm+'.tex', 'w') as f:
        f.write(baycomp_latex_table(cm))
!zip -r tables.zip *.tex && rm *.tex