1. Read Directory of experiments to compare
2. Compute Rouge for each experiment
3. Keep best score
4. Print best in a table

In [1]:
import glob
import pandas as pd


def get_best_params(baseline_dir):
    best_rouge = 0
    best_df = None
    best_name = ""

    baseline_files = glob.glob(baseline_dir)

    for baseline_file in baseline_files:
        baseline_df = pd.read_json(baseline_file, lines=True)
        score = baseline_df['rouge'].mean()
        if score > best_rouge:
            best_rouge = score
            best_df = baseline_df
            best_name = baseline_file

    return best_rouge, best_name, best_df

In [40]:

def get_rouge_table(dataset):
    llms = ["google-flan-t5-xxl", "bigscience-bloomz", "bigscience-bloom", "eleutherai-gpt-neox-20b"]

    baseline_dir = "/dccstor/srosent2/generative/baseline_llms/" + dataset
    best_examples = []

    print("experiment|rouge|passages|n-shot|top p|top k|temperature|min length|max length")
    for llm in llms: 
        baseline_fnames = baseline_dir + "/" + llm + "/*/predictions*.json"

        best_rouge, best_name, best_df = get_best_params(baseline_fnames)

        params = best_name[len(baseline_dir + llm + "/"):best_name.rindex("/")].split("-")

        print_output = llm + "|" + str(best_rouge) + "|"
        print_output += params[1].split("_")[1] + "|"
        print_output += params[2][0] + "|"
        pktemp = params[3].split("_")
        print_output += pktemp[0] + "|" + pktemp[1] + "|" + pktemp[2] + "|"
        minmax = params[4].split("_")
        print_output += minmax[1] + "|" + minmax[2]
        print(print_output)
        for i, row in best_df.iterrows():
            best_examples.append([llm, str(row['id']), row['question'], row['text'], str(row['rouge'])])
    return best_examples

In [42]:
import csv

with open('/dccstor/srosent2/generative/baseline_llms/NQ/best.csv', 'w') as f:
    write = csv.writer(f)
    write.writerows(get_rouge_table("NQ"))
with open('/dccstor/srosent2/generative/baseline_llms/ELI5/best.csv', 'w') as f:
    write = csv.writer(f)
    write.writerows(get_rouge_table("ELI5"))

experiment|rouge|passages|n-shot|top p|top k|temperature|min length|max length


experiment|rouge|passages|n-shot|top p|top k|temperature|min length|max length
google-flan-t5-xxl|0.25508641580641983|True|0|0.25|100|0.5|200|1024
bigscience-bloomz|0.2541450173173494|True|0|1.0|100|0.75|100|1024
bigscience-bloom|0.22344075436693506|True|0|0.75|100|0.5|50|1024
eleutherai-gpt-neox-20b|0.16836206596932884|True|0|0.5|100|1.0|0|1024