In [None]:
import matplotlib as mpl
# Use the pgf backend (must be set before pyplot imported)
mpl.use('pgf')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re
from pathlib import Path

In [ ]:
# use pgf backend
plt.style.use('seaborn-paper')


In [None]:
sumy_individual_path = Path('output/summaries/sumy_individual/')
ours_individual_path = Path('output/summaries/methods_reviews_individual/')

TABLE_PATH = Path("../../../EMIRR/papers/rsa_multi_document/tables/")
FIGURE = Path("../../../EMIRR/papers/rsa_multi_document/figures/")

# make sure the folder exists
TABLE_PATH.mkdir(parents=True, exist_ok=True)
FIGURE.mkdir(parents=True, exist_ok=True)

In [None]:

dfs = []
for file in sumy_individual_path.glob('*.csv'):
    df = pd.read_csv(file)
    method = file.stem.split('-_-')[1]
    
    sumy = file.stem.split('-_-')[-1].split('_')
    if len(sumy) > 1:
        sentence_count = int(sumy[-1])
        df['metadata/sentence_count'] = sentence_count

    # df['Method'] = method
    dfs.append(df)
    
    
for file in ours_individual_path.glob('*.csv'):
    generation_method, dataset, generation_params, date, rsa_param, rsa_ranking_model, method = file.stem.split('-_-')
    
    method, n = "_".join( method.split('_')[:-1]), method.split('_')[-1]
    
    if "metadata/method" not in df.columns:
        df['metadata/method'] = method
    
#     reranking_model = rsa_ranking_model[len("rsa_reranked-"):]
    
#     df['Ranking Model'] = reranking_model
#     df['Method'] = method
#     df['N'] = int(n) if n != "based" else 3 
    df['Generation Method'] = generation_method
    
    df = pd.read_csv(file)
    dfs.append(df)
        
df = pd.concat(dfs)
del dfs

df = df.drop([c for c in df.columns if "Unnamed" in c], axis=1)



In [None]:

df['metadata/method'] = df['metadata/method'].fillna('N/A')
df = df[~(df["metadata/method"].str.contains('lead'))]
df = df[~(df["metadata/method"].str.contains('Lead'))]



In [None]:
def fix_generation(x):
    if x == "abstractive_sentences":
        return "extractive_sentences"
    else:
        return x


df['Generation Method'] = df["Generation Method"].apply(fix_generation)

In [None]:
df['N'] = (df['metadata/n_sentences'].fillna(0) + df['metadata/sentence_count'].fillna(0)).apply(int)

def fix_methods(x):

    if "consensus" in str(x):
        return "Agreement"
    elif "rsa" in str(x):
        return "Speaker+Agreement"
    else:
        return x
    
df['metadata/method'] = df['metadata/method'].apply(fix_methods)



In [None]:
df['metadata/sentence_count'].unique()

In [None]:

metric= 'SHMetric/Main ideas/proba_1'

SHMetric = df.columns[df.columns.str.contains('SHMetric') & df.columns.str.contains('proba_1')].tolist()

toplot = df.copy()
toplot['metadata/reranking_model'] = toplot['metadata/reranking_model'].fillna('N/A')
toplot['Generation Method'] = toplot['Generation Method'].fillna('N/A')


toplot = toplot.groupby(["metadata/method", "id", "Generation Method", "metadata/reranking_model"]).mean()
idx = toplot.groupby(["metadata/method", "id", "Generation Method"])[metric].idxmax()

toplot = toplot.loc[idx].reset_index()

avg = toplot.groupby(["metadata/method"]).agg(['mean', 'std'])
avg = avg[SHMetric]

display(avg)

# rename columns Consiness, Main ideas, Repetition
avg.columns = pd.MultiIndex.from_tuples([(f'{c[0].split("/")[1]}', c[1]) for c in avg.columns])

def map_ours(x):
    if "Agreement" in x:
        return "Ours"
    else:
        return "Bas."


avg = avg.groupby(["metadata/method"]).mean()

avg['Ours'] = avg.index.get_level_values(0).map(map_ours)


avg = avg.reset_index().rename(columns={'metadata/method': 'Method'})
avg = avg.set_index(['Ours', 'Method'])
avg = avg.sort_index()

# print avg columns level 0
print(avg.columns.get_level_values(0))

#Index(['Comprehensible', 'Comprehensible', 'Repetition', 'Repetition',
      #  'Grammar', 'Grammar', 'Attribution', 'Attribution', 'Main ideas',
      #  'Main ideas', 'Conciseness', 'Conciseness'],
      # dtype='object')
      
# rename columns with shorter names
avg.columns = pd.MultiIndex.from_tuples([
    ('Compr.', 'mean'), ('Compr.', 'std'),
    ('Repet.', 'mean'), ('Repet.', 'std'),
    ('Gram.', 'mean'), ('Gram.', 'std'),
    ('Attr.', 'mean'), ('Attr.', 'std'),
    ('M. i.', 'mean'), ('M. i.', 'std'),
    ('Conc.', 'mean'), ('Conc.', 'std')
])


style = avg.style
style = style.format("{:.2f}")

# make std column smaller and lighter in latex
idx = pd.IndexSlice
# style = style.set_properties(subset=idx[:, ['std']], **{'font-size': '10pt', 'font-weight': 'lighter'})

# bold the best value in each mean column
style = style.highlight_max(axis=0, subset=idx[:, idx[:, 'mean']], props="bfseries: ;")

# make std columns smaller and add +/- sign
style = style.set_properties(**{'color':'[HTML]{A0A1A3}'} ,subset=(idx[:], idx[:, 'std']))
style = style.format("±{:.2f}", subset=(idx[:], idx[:, 'std']))

# drop level 1 of columns
style = style.hide_columns(level=1)

# to latex
latex = style.to_latex(clines="skip-last;data", hrules=True, multirow_align="l", environment="table*", caption="Estimated human judgment using the SEAHORSE metrics for all  baselines and our templated summaries compared against each document independently. M. i. stands for Main ideas, Attr. for Attribution, Gram. for Grammar, Compr. for Comprehensible, Conc. for Conciseness, and Repet. for Repetition. The best value in each column is in bold.")
display(style)

# add resize box
latex = latex.replace("\\begin{tabular}", "\\resizebox{\\textwidth}{!}{\\begin{tabular}")
latex = latex.replace("\\end{tabular}", "\\end{tabular}}")


# replace 

# write to file
with open(TABLE_PATH / "seahorse.tex", "w") as f:
    f.write(latex)






# display(avg)
# avg.set_index('Method')"

In [None]:
metric='SHMetric/Main ideas/proba_1'
# white grid
sns.set(style="whitegrid")
avg = df.groupby(["metadata/method", "id", "metadata/reranking_model", "Generation Method"]).mean().reset_index()
avg = avg.sort_values(metric)

# rename columns with human readable names
avg = avg.rename(columns={
    'metadata/method': 'Method',
    'metadata/reranking_model': 'Reranking Model',
    'Generation Method': 'Generation Method',
    metric: 'Main Ideas'
})



g = sns.catplot(data=avg, y="Main Ideas", x="Method", hue="Reranking Model", col="Generation Method", kind="bar")


# get legend label and handle
handles, labels = g._legend_data.values(), g._legend_data.keys()

# set legend
g._legend.remove()
g.fig.legend(handles, labels, loc='upper center', ncol=2, fontsize=25, title_fontsize=25, title="Reranking Model", bbox_to_anchor=(0.4, -0.3))


# set title template 
g.set_titles("{col_name}")

# add hline at 0.215 for the baseline, on each axis
for ax in g.axes.flat:
    ax.axhline(0.215, ls='--', color='black', linewidth=5)
    ax.set_xticklabels(ax.get_xticklabels(), rotation=30)
    
# make label bigger
for ax in g.axes.flat:
    ax.set_xlabel("")
    ax.set_ylabel(ax.get_ylabel(), fontsize=25, fontweight='bold')
    ax.set_xticklabels(ax.get_xticklabels(), fontsize=25, fontweight='bold')
    
# make title bigger
for ax in g.axes.flat:
    ax.set_title(ax.get_title(), fontsize=25, fontweight='bold')
    
# add annotation for the hline on the first axis





plt.xticks(rotation=30)

# save figure
g.savefig(FIGURE / "seahorse_main_ideas.pdf")





In [None]:
metric='SHMetric/Main ideas/proba_1'

toplot = df.copy()
toplot['metadata/reranking_model'] = toplot['metadata/reranking_model'].fillna('N/A')
toplot['Generation Method'] = toplot['Generation Method'].fillna('N/A')

toplot = toplot.groupby(["metadata/method", "id", "Generation Method", "metadata/reranking_model"]).mean()
idx = toplot.groupby(["metadata/method", "id", "Generation Method"])[metric].idxmax()
toplot = toplot.loc[idx].reset_index()
toplot = toplot[~toplot['metadata/method'].str.contains('Lead')]

toplot = toplot.sort_values(metric, ascending=True)
order = toplot.groupby("metadata/method").mean().sort_values(metric)


display(toplot.groupby("metadata/method").mean().sort_values(metric)[metric])

sns.barplot(data=toplot, y=metric, x="metadata/method", order=order.index)

plt.xticks(rotation=45)

In [None]:
metric='SHMetric/Conciseness/proba_1'

toplot = df.copy()
toplot['metadata/reranking_model'] = toplot['metadata/reranking_model'].fillna('N/A')
toplot['Generation Method'] = toplot['Generation Method'].fillna('N/A')

toplot = toplot.groupby(["metadata/method", "id", "Generation Method", "metadata/reranking_model"]).mean()
idx = toplot.groupby(["metadata/method", "id", "Generation Method"])[metric].idxmax()
toplot = toplot.loc[idx].reset_index()
toplot = toplot[~toplot['metadata/method'].str.contains('Lead')]

toplot = toplot.sort_values(metric, ascending=True)
order = toplot.groupby("metadata/method").mean().sort_values(metric)



sns.barplot(data=toplot, y=metric, x="metadata/method", order=order.index)

plt.xticks(rotation=45)

In [None]:
metric='SHMetric/Repetition/proba_1'

toplot = df.copy()
toplot['metadata/reranking_model'] = toplot['metadata/reranking_model'].fillna('N/A')
toplot['Generation Method'] = toplot['Generation Method'].fillna('N/A')

toplot = toplot.groupby(["metadata/method", "id", "Generation Method", "metadata/reranking_model"]).mean()
idx = toplot.groupby(["metadata/method", "id", "Generation Method"])[metric].idxmax()
toplot = toplot.loc[idx].reset_index()
toplot = toplot[~toplot['metadata/method'].str.contains('Lead')]

toplot = toplot.sort_values(metric, ascending=True)
order = toplot.groupby("metadata/method").mean().sort_values(metric)



sns.barplot(data=toplot, y=metric, x="metadata/method", order=order.index)

plt.xticks(rotation=45)

In [None]:
metric='SHMetric/Repetition/proba_1'

toplot = df.copy()
toplot['metadata/reranking_model'] = toplot['metadata/reranking_model'].fillna('N/A')
toplot['Generation Method'] = toplot['Generation Method'].fillna('N/A')

toplot = toplot.groupby(["metadata/method", "id", "Generation Method", "metadata/reranking_model"]).mean()
idx = toplot.groupby(["metadata/method", "id", "Generation Method"])[metric].idxmax()
toplot = toplot.loc[idx].reset_index()
toplot = toplot[~toplot['metadata/method'].str.contains('Lead')]

toplot = toplot.sort_values(metric, ascending=True)
order = toplot.groupby("metadata/method").mean().sort_values(metric)



sns.barplot(data=toplot, y=metric, x="metadata/method", order=order.index)

plt.xticks(rotation=45)

In [None]:
metric='SHMetric/Main ideas/proba_1'

avg = df.groupby(["metadata/method", "id", "N"]).mean().reset_index()
avg = avg.sort_values(metric)
sns.barplot(data=avg[~avg['metadata/method'].str.contains('Lead')], y=metric, x="metadata/method", hue='N')
plt.xticks(rotation=45)

In [None]:
metric='SHMetric/Main ideas/proba_1'

avg = df.groupby(["metadata/method", "id"]).mean().reset_index()
avg = avg[~avg['metadata/method'].str.contains('Lead')].sort_values(metric, )
sns.barplot(data=avg, y=metric, x="metadata/method")
plt.xticks(rotation=45)

In [None]:
metric='rougeL'

avg = df.groupby(["metadata/method", "id"]).mean().reset_index()
avg = avg[~avg['metadata/method'].str.contains('Lead')].sort_values(metric, )
sns.barplot(data=avg, y=metric, x="metadata/method")
plt.xticks(rotation=45)

In [None]:
df.columns