In [1]:
import numpy as np
import collections
import matplotlib.pyplot as plt
import regex as re

In [2]:
from result_utils import read_results_task

In [3]:
row_labels = ['Overall', 'Familiar', 'Unfamiliar']

##### Generation

In [3]:
models = ['T5-base', 'T5-small', 'BART']
gen_metrics = ['bleu_precision', 'rougeL_precision', 'bert_precision']
gen_metrics = [f'{i}{j}' for j in ['', ':Top', ':MRR'] for i in gen_metrics]

In [5]:
results = read_results_task('gen', models)

In [7]:
for model in models:
    print('\midrule')
    for idx, dim in enumerate(['overall', 'known', 'unknown']):
            
        for class_num, class_name in enumerate(['Accurate', 'Incongruous', 'Nonfactual']):
            if idx==0 and class_num==0:
                line = f'\multirow{{9}}{{*}}{{\\rotatebox[origin=c]{{90}}{{{model}}}}} & '
            else:
                line = f'& '
                
            if class_num==0:
                line += f'\multirow{{3}}{{*}}{{{row_labels[idx]}}} & '
            else:
                line += f'& '
                
            line += f"{class_name} & "
            line += " & ".join([f'${round(results[model][metric][dim][class_num], 3)}$' for metric in gen_metrics])
        
            line += ' \\\\'
            print(line)

\midrule
\multirow{9}{*}{\rotatebox[origin=c]{90}{T5-base}} & \multirow{3}{*}{Overall} & Accurate & $0.483$ & $0.456$ & $0.662$ & $0.508$ & $0.517$ & $0.658$ & $0.706$ & $0.714$ & $0.795$ \\
& & Incongruous & $0.362$ & $0.337$ & $0.462$ & $0.189$ & $0.185$ & $0.091$ & $0.62$ & $0.635$ & $0.463$ \\
& & Nonfactual & $0.463$ & $0.436$ & $0.503$ & $0.303$ & $0.298$ & $0.251$ & $0.745$ & $0.752$ & $0.576$ \\
& \multirow{3}{*}{Familiar} & Accurate & $0.629$ & $0.596$ & $0.738$ & $0.619$ & $0.614$ & $0.748$ & $0.766$ & $0.766$ & $0.845$ \\
& & Incongruous & $0.523$ & $0.502$ & $0.483$ & $0.238$ & $0.252$ & $0.104$ & $0.696$ & $0.706$ & $0.485$ \\
& & Nonfactual & $0.393$ & $0.383$ & $0.485$ & $0.142$ & $0.134$ & $0.148$ & $0.631$ & $0.639$ & $0.503$ \\
& \multirow{3}{*}{Unfamiliar} & Accurate & $0.398$ & $0.376$ & $0.618$ & $0.444$ & $0.461$ & $0.605$ & $0.67$ & $0.684$ & $0.766$ \\
& & Incongruous & $0.269$ & $0.242$ & $0.449$ & $0.16$ & $0.146$ & $0.084$ & $0.576$ & $0.594$ & $0.449$ \\
& &

In [8]:
### Without known and unknown

for model in models:
    print('\midrule')
    for idx, dim in enumerate(['overall']):
            
        for class_num, class_name in enumerate(['Accurate', 'Incongruous', 'Nonfactual']):
            if idx==0 and class_num==0:
                line = f'\multirow{{3}}{{*}}{{\\rotatebox[origin=c]{{90}}{{{model}}}}} & '
            else:
                line = f'& '
                
            # if class_num==0:
            #     line += f'\multirow{{3}}{{*}}{{{row_labels[idx]}}} & '
            # else:
            #     line += f'& '
                
            line += f"{class_name} & "
            line += " & ".join([f'${round(results[model][metric][dim][class_num], 3)}$' for metric in gen_metrics])
        
            line += ' \\\\'
            print(line)

\midrule
\multirow{3}{*}{\rotatebox[origin=c]{90}{T5-base}} & Accurate & $0.483$ & $0.456$ & $0.662$ & $0.508$ & $0.517$ & $0.658$ & $0.706$ & $0.714$ & $0.795$ \\
& Incongruous & $0.362$ & $0.337$ & $0.462$ & $0.189$ & $0.185$ & $0.091$ & $0.62$ & $0.635$ & $0.463$ \\
& Nonfactual & $0.463$ & $0.436$ & $0.503$ & $0.303$ & $0.298$ & $0.251$ & $0.745$ & $0.752$ & $0.576$ \\
\midrule
\multirow{3}{*}{\rotatebox[origin=c]{90}{T5-small}} & Accurate & $0.35$ & $0.324$ & $0.584$ & $0.349$ & $0.357$ & $0.508$ & $0.609$ & $0.616$ & $0.699$ \\
& Incongruous & $0.398$ & $0.367$ & $0.46$ & $0.203$ & $0.213$ & $0.105$ & $0.626$ & $0.645$ & $0.477$ \\
& Nonfactual & $0.58$ & $0.543$ & $0.523$ & $0.447$ & $0.429$ & $0.388$ & $0.83$ & $0.839$ & $0.657$ \\
\midrule
\multirow{3}{*}{\rotatebox[origin=c]{90}{BART}} & Accurate & $0.058$ & $0.064$ & $0.362$ & $0.006$ & $0.009$ & $0.024$ & $0.356$ & $0.362$ & $0.366$ \\
& Incongruous & $0.374$ & $0.27$ & $0.533$ & $0.148$ & $0.195$ & $0.163$ & $0.561$ & $0.5

##### Identification

In [4]:
models = ['BERT', 'RoBERTa', 'Electra']
ident_metrics = ['Top', 'MRR']
tasks = ['claim', 'desc']

In [5]:
results = {}
for task in tasks:
    results[task] = read_results_task(task, models)

In [6]:
for model in models:
    print('\midrule')
    for idx, dim in enumerate(['overall', 'known', 'unknown']):
            
        for class_num, class_name in enumerate(['Accurate', 'Incongruous', 'Nonfactual', 'Both']):
            if idx==0 and class_num==0:
                    line = f'\multirow{{12}}{{*}}{{\\rotatebox[origin=c]{{90}}{{{model}}}}} & '
            else:
                line = f'& '

            if class_num==0:
                line += f'\multirow{{4}}{{*}}{{{row_labels[idx]}}} & '
            else:
                line += f'& '
                    
            line += f"{class_name} & "
                    
            for task_num, task in enumerate(tasks):
                line += " & ".join([f'${round(results[task][model][metric][dim][class_num], 3)}$' for metric in ident_metrics])
                if task_num==0:
                    line += " & "

            line += ' \\\\'
            print(line)

\midrule
\multirow{12}{*}{\rotatebox[origin=c]{90}{BERT}} & \multirow{4}{*}{Overall} & Accurate & $0.783$ & $0.883$ & $0.667$ & $0.809$ \\
& & Incongruous & $0.089$ & $0.462$ & $0.247$ & $0.576$ \\
& & Nonfactual & $0.115$ & $0.404$ & $0.049$ & $0.373$ \\
& & Both & $0.013$ & $0.335$ & $0.037$ & $0.325$ \\
& \multirow{4}{*}{Familiar} & Accurate & $0.879$ & $0.933$ & $0.661$ & $0.809$ \\
& & Incongruous & $0.091$ & $0.482$ & $0.271$ & $0.589$ \\
& & Nonfactual & $0.03$ & $0.348$ & $0.051$ & $0.383$ \\
& & Both & $0.0$ & $0.319$ & $0.017$ & $0.302$ \\
& \multirow{4}{*}{Unfamiliar} & Accurate & $0.714$ & $0.846$ & $0.682$ & $0.807$ \\
& & Incongruous & $0.088$ & $0.407$ & $0.152$ & $0.502$ \\
& & Nonfactual & $0.176$ & $0.444$ & $0.105$ & $0.459$ \\
& & Both & $0.022$ & $0.346$ & $0.061$ & $0.326$ \\
\midrule
\multirow{12}{*}{\rotatebox[origin=c]{90}{RoBERTa}} & \multirow{4}{*}{Overall} & Accurate & $0.809$ & $0.901$ & $0.711$ & $0.839$ \\
& & Incongruous & $0.102$ & $0.457$ & $0.224$ & $

In [7]:
### Without known and unknown

for model in models:
    print('\midrule')
    for idx, dim in enumerate(['overall']):
            
        for class_num, class_name in enumerate(['Accurate', 'Incongruous', 'Nonfactual', 'Both']):
            if idx==0 and class_num==0:
                    line = f'\multirow{{4}}{{*}}{{\\rotatebox[origin=c]{{90}}{{{model}}}}} & '
            else:
                line = f'& '

            # if class_num==0:
            #     line += f'\multirow{{3}}{{*}}{{{row_labels[idx]}}} & '
            # else:
            #     line += f'& '
                    
            line += f"{class_name} & "
                    
            for task_num, task in enumerate(tasks):
                line += " & ".join([f'${round(results[task][model][metric][dim][class_num], 3)}$' for metric in ident_metrics])
                if task_num==0:
                    line += " & "

            line += ' \\\\'
            print(line)

\midrule
\multirow{4}{*}{\rotatebox[origin=c]{90}{BERT}} & Accurate & $0.783$ & $0.883$ & $0.667$ & $0.809$ \\
& Incongruous & $0.089$ & $0.462$ & $0.247$ & $0.576$ \\
& Nonfactual & $0.115$ & $0.404$ & $0.049$ & $0.373$ \\
& Both & $0.013$ & $0.335$ & $0.037$ & $0.325$ \\
\midrule
\multirow{4}{*}{\rotatebox[origin=c]{90}{RoBERTa}} & Accurate & $0.809$ & $0.901$ & $0.711$ & $0.839$ \\
& Incongruous & $0.102$ & $0.457$ & $0.224$ & $0.579$ \\
& Nonfactual & $0.083$ & $0.397$ & $0.066$ & $0.355$ \\
& Both & $0.006$ & $0.329$ & $0.0$ & $0.31$ \\
\midrule
\multirow{4}{*}{\rotatebox[origin=c]{90}{Electra}} & Accurate & $0.841$ & $0.904$ & $0.368$ & $0.599$ \\
& Incongruous & $0.076$ & $0.434$ & $0.276$ & $0.538$ \\
& Nonfactual & $0.076$ & $0.419$ & $0.224$ & $0.511$ \\
& Both & $0.006$ & $0.326$ & $0.132$ & $0.435$ \\
