In [5]:
import os
import pandas as pd


def fm(i, k):
    return '{0:.2f}'.format(i[k])

def table_task_1(team_name):
    df = f'/workspace/all-submissions/{team_name}/evaluation-task-1.jsonl'
    if not os.path.isfile(df):
        return ""
    
    df = pd.read_json(df, lines=True)

    if len(df['Dataset'].str.startswith('task-1')) <= 0:
        return ""
    
    rows_test = ""
    for _, i in df[df['Dataset'] == 'task-1-type-classification-20221115-test'].iterrows():
        rows_test += f'{i["Team"]} & {i["Software"].replace("TASK2: ", "").replace("TASK1: ", "").replace("Task1: ", "")} & {i["run_id"]} '
        for k in ['balanced-accuracy', 'precision-for-phrase-spoilers', 'recall-for-phrase-spoilers', 'f1-for-phrase-spoilers', 'precision-for-passage-spoilers', 
                  'recall-for-passage-spoilers', 'f1-for-passage-spoilers','precision-for-multi-spoilers', 'recall-for-multi-spoilers', 'f1-for-multi-spoilers']:
            rows_test += f' & {fm(i, k)} '
        rows_test += '\\\\\n'

    return """\\begin{table*}[t]
\\centering
\\small
\\renewcommand{\\tabcolsep}{3.5pt}
\\caption{Overview of the effectiveness in spoiler type prediction (subtask~1 at SemEval~2023 Task~5) measured as balanced accuracy over all three spoiler types and precision (Pr.), recall (Rec.), and F1 score (F1) for phrase, passage, and multi spoilers on the test set. We report all runs by Team """ + team_name + """ and the baseline as well as one synthetic run that reports the best respectively the median observed scores for each measure.}
\\label{table-effectiveness-task-1}
\\begin{tabular}{@{}lllcccccccccc@{}}
\\toprule
  \\multicolumn{3}{c}{\\bfseries Submission} & \\bfseries Accuracy     &  \\multicolumn{3}{c}{\\bfseries Phrase} & \\multicolumn{3}{c}{\\bfseries Passage} & \\multicolumn{3}{c}{\\bfseries Multi}\\\\
\\cmidrule(l@{\\tabcolsep}){1-3}
\\cmidrule(l@{\\tabcolsep}){5-7}
\\cmidrule(l@{\\tabcolsep}){8-10}
\\cmidrule(l@{\\tabcolsep}){11-13}
Team & Approach & Run & & Pr. & Rec. & F1 & Pr. & Rec. & F1 & Pr. & Rec. & F1 \\\\

\\midrule

""" + rows_test + """

\\bottomrule
\\end{tabular}
\\end{table*}"""

def table_task_2(team_name):
    df = f'/workspace/all-submissions/{team_name}/evaluation-task-2.jsonl'
    if not os.path.isfile(df):
        return ""
    
    df = pd.read_json(df, lines=True)

    if len(df['Dataset'].str.startswith('task-2')) <= 0:
        return ""
    
    rows = ""
    measures = ['bleu-score-all-spoilers', 'bert-score-all-spoilers', 'meteor-score-all-spoilers',
                'bleu-score-phrase-spoilers', 'bert-score-phrase-spoilers', 'meteor-score-phrase-spoilers',
                'bleu-score-passage-spoilers', 'bert-score-passage-spoilers', 'meteor-score-passage-spoilers',
                'bleu-score-multi-spoilers', 'bert-score-multi-spoilers', 'meteor-score-multi-spoilers']
    
    for _, i in df[df['Dataset'] == 'task-2-spoiler-generation-20221115-test'].iterrows():
        rows += f'{i["Team"]} & {i["Software"].replace("TASK2: ", "").replace("TASK1: ", "")} & {i["run_id"]} '
        for k in measures:
            rows += f' & {fm(i, k)} '
        rows += '\\\\\n'
    
    return """\\begin{table*}[t]
\\centering
\\small
\\renewcommand{\\tabcolsep}{3pt}
\\caption{Overview of the effectiveness in spoiler generation (subtask~2 at SemEval~2023 Task~5) measured as BLEU-4 (BL4), BERTScore (BSc.) and METEOR (MET) over all clickbait posts respectively those requiring phrase, passage, or multi spoilers on the test set. We report all runs by Team """ + team_name + """ and the baseline as well as one synthetic run that reports the best respectively the median observed scores for each measure.}
\\label{table-effectiveness-task-2}
\\begin{tabular}{@{}lllcccccccccccccc@{}}
\\toprule
  \\multicolumn{3}{c}{\\bfseries Submission} & \\multicolumn{3}{c}{\\bfseries All}     &  \\multicolumn{3}{c}{\\bfseries Phrase} & \\multicolumn{3}{c}{\\bfseries Passage} & \\multicolumn{3}{c}{\\bfseries Multi}\\\\
\\cmidrule(l@{\\tabcolsep}){1-3}
\\cmidrule(l@{\\tabcolsep}){4-6}
\\cmidrule(l@{\\tabcolsep}){7-9}
\\cmidrule(l@{\\tabcolsep}){10-12}
\\cmidrule(l@{\\tabcolsep}){13-15}
Team & Approach & Run & BL4 & BSc. & MET & BL4 & BSc. & MET & BL4 & BSc. & MET & BL4 & BSc. & MET\\\\

\\midrule

""" + rows + """

\\bottomrule
\\end{tabular}
\\end{table*}"""

def latex_code(team_name):
    return """\\pdfoutput=1
\\documentclass[11pt]{article}
\\usepackage{ACL2023}
\\usepackage{times}
\\usepackage{latexsym}
\\usepackage{booktabs}
\\usepackage[T1]{fontenc}
\\usepackage[utf8]{inputenc}
\\usepackage{microtype}
\\usepackage{inconsolata}

\\title{Results of """ + team_name + """ at SemEval-2023 Task 5}
\\begin{document}
\\maketitle

The code that produces Table~\\ref{table-effectiveness-task-1} and Table~\\ref{table-effectiveness-task-2} is available at \\href{https://github.com/pan-webis-de/pan-code/tree/master/semeval23/generate-latex-tables.ipynb}.
""" + table_task_1(team_name) + """
 


""" + table_task_2(team_name) + """

\\end{document}
"""

team_name = ''

directory = f'/workspace/all-submissions/{team_name}/latex/'
!mkdir -p {directory}
!cp /workspace/all-submissions/acl2023.sty {directory}
with open(f'{directory}/semeval23-clickbait-spoiling-effectiveness-tables.tex', 'w') as f:
    f.write(latex_code(team_name))
