In [None]:
from pathlib import Path
import json
from matplotlib import pyplot as plt
import numpy as np

In [None]:
resfile = Path(f"results/sumec/230111T152922/_FINAL")
assert resfile.exists()

In [None]:
def load_resfile(pth):
    with resfile.open('rt') as fin:
        cum_res = json.loads(next(fin))
        info = json.loads(next(fin))
    return {k: np.array(v)/info["processed_points"] for k, v in cum_res.items()}, info
    
res, info = load_resfile(resfile)
info, f"failed percentage: {info['failed_points_ids']/info['processed_points']*100:.2f}%"

# Evaluation on SumeCzech


* each subplot (bluish matrix) evaluates a single experiment
* **within** the subplot:
    * the rows correspond to a ROUGE metric type: unigram / bigram / longest common subsequence
    * the columns correspond to: precision / recall / F measure



## Baseline results
reported here: https://ufal.mff.cuni.cz/sumeczech (text -> abstract, test set)

In [None]:
%matplotlib inline
fig, axs = plt.subplots(1, 4, figsize=(16, 4))
reported_results = {
"first": np.array([13.1, 17.9, 14.4, 01.9, 02.8, 02.1, 08.8, 12.0, 09.6,]).reshape(3,3) / 100,
"random": np.array([11.7, 15.5, 12.7, 01.2, 01.7, 01.3, 07.7, 10.3, 08.4,]).reshape(3,3) / 100,
"textrank": np.array([11.1, 20.8, 13.8, 01.6, 03.1, 02.0, 07.1, 13.4, 08.9,]).reshape(3,3) / 100,
"tensor2tensor": np.array([13.2, 10.5, 11.3, 01.2, 00.9, 01.0, 10.2, 08.1, 08.7,]).reshape(3,3) / 100,
}
for col_id, baseline_type in enumerate(reported_results.keys()):
    ax = axs[col_id]
    cell = reported_results[baseline_type]
    im = ax.imshow(cell, vmin=0, vmax=1)
    for rouge_id in range(3):
        for type_id in range(3):
            ax.text(type_id, rouge_id, f"{cell[rouge_id, type_id]:.2f}", ha="center", va="center", color="w")
    ax.set(xticks=[0,1,2], xticklabels=["Prec","Rec","F1"], yticks=[0,1,2], yticklabels=["1grm","2grm","Long"]);
    ax.set_title(baseline_type)


## This method

evaluation for each combination of two hyperparameters:
* number of clusters (rows)
* number of sentences per cluster (columns)

In [None]:
%matplotlib inline

def dictkey_to_cellcoord(k, is_multinews):
    rouge_type, measure_type = k.split("_")
    metric_type_options = ('1', '2', 'su4') if is_multinews else ('1', '2', 'L')
    row = metric_type_options.index(rouge_type)
    col = ('p', 'r', 'f').index(measure_type)
    return row, col


def cell_from_hparams(n_clust_id, n_sent_id, is_multinews):
    cell = np.zeros((3, 3))
    for k, v in res.items():
        row, col = dictkey_to_cellcoord(k, is_multinews)
        cell[row, col] = v[n_clust_id][n_sent_id]
    return cell

fig, axs = plt.subplots(5, 5, figsize=(20, 20))
for n_clust_id in range(5):
    for n_sent_id in range(5):  
        cell = cell_from_hparams(n_clust_id,n_sent_id, is_multinews=False)
        ax = axs[n_clust_id,n_sent_id]
        im = ax.imshow(cell, vmin=0, vmax=1)
        for rouge_id in range(3):
            for type_id in range(3):
                ax.text(type_id, rouge_id, f"{cell[rouge_id, type_id]:.2f}", ha="center", va="center", color="w")
        ax.set(xticks=[0,1,2], xticklabels=["Prec","Rec","F1"], yticks=[0,1,2], yticklabels=["1grm","2grm","Long"]);
        if n_clust_id == 0:
            ax.set(title=f"sents per cluster = {n_sent_id+1}")
    axs[n_clust_id, 0].set(ylabel=f"clusters = {n_clust_id+1}")

# Evaluation on Multi-News

## Baseline results

| Method | ROUGE - unigrams | ROUGE - bigrams | ROUGE - SU |
|--------| -----------------|-----------------|------------|
|First-3 |             39.41|            11.77| 14.51 |
|LexRank |             38.27|            12.70| 13.20 |
|TextRank|             38.44|            13.10| 13.50 |


reported here: https://arxiv.org/pdf/1906.01749v3.pdf

## This method

In [None]:
resfile = Path(f"results/multinews/230111T133039/_FINAL")
assert resfile.exists()

res, info = load_resfile(resfile)
info, f"failed percentage: {info['failed_points_ids']/info['processed_points']*100:.2f}%"

In [None]:
fig, axs = plt.subplots(5, 5, figsize=(20, 20))
for n_clust_id in range(5):
    for n_sent_id in range(5):  
        cell = cell_from_hparams(n_clust_id,n_sent_id, is_multinews=True)
        ax = axs[n_clust_id,n_sent_id]
        im = ax.imshow(cell, vmin=0, vmax=1)
        for rouge_id in range(3):
            for type_id in range(3):
                ax.text(type_id, rouge_id, f"{cell[rouge_id, type_id]:.2f}", ha="center", va="center", color="w")
        ax.set(xticks=[0,1,2], xticklabels=["Prec","Rec","F1"], yticks=[0,1,2], yticklabels=["1grm","2grm","SU"]);
        if n_clust_id == 0:
            ax.set(title=f"sents per cluster = {n_sent_id+1}")
    axs[n_clust_id, 0].set(ylabel=f"clusters = {n_clust_id+1}")