In [16]:
import numpy as np
import pandas as pd
import os
import json

cwd = os.getcwd()

results_path = cwd[:-28] + "Results/Wiki-Finetune/T5_wiki_ft_lr{}_bs{}_epchs{}/all_scores_range_0_10_trail_{}.npz"
results_path_sbert = cwd[:-28] + "Results/Wiki-Finetune/T5_wiki_ft_lr{}_bs{}_epchs{}/all_sbert_scores_0_10_trail_{}.npz"

results_intent_path = cwd[:-28] + "Results/Wiki-Finetune/T5_wiki_ft_lr{}_bs{}_epchs{}/all_scores_range_by_intent_0_10_trail_{}.txt"

intents = ["What is the state's policy regarding education?",
           "What are the major historical events in this state?",
           "How is the weather of the state?",
           "Which places seem interesting to you for visiting in this state?",
           "What are some of the most interesting things about this state?"]

sample_size = 1
learning_rates = [1e-5, 1e-6, 1e-7, 1e-8]
batch_sizes = [8, 12, 16]
total_epochs = [5, 10, 30]

model = "Wiki Finetuned: lr: {}, bs: {}, epochs: {}"

In [17]:
# sbert_temp = np.load(results_path_sbert.format(1e-5, 8, 5, 0))
# data_sbert = sbert_temp['scores']

In [18]:
# print(data_sbert.mean(axis=1).mean(axis=0))

In [21]:
for lr in learning_rates:
    for bs in batch_sizes:
        for epochs in total_epochs:
            total_data = None
            total_data_sbert = []
            total_data_intent = {}

            for intent in intents:
                total_data_intent[intent] = []

            # get all of the "normal" data
            for i in range(sample_size):
                temp = np.load(results_path.format(lr, bs, epochs, i))
                sbert_temp = np.load(results_path_sbert.format(lr, bs, epochs, i))
                data = temp['scores']
                data_sbert = sbert_temp['scores']
                
                if total_data is None:
                    # we need to take the mean of axis-one first because each "user-sample" contains a set of predicted summaries. So we first avg those sets, before average across all the data/trials
                    total_data = data.mean(axis=1)
                    total_data_sbert = data_sbert.mean(axis=1)
                else:
                    total_data = np.concatenate((total_data, data.mean(axis=1)), axis=0)
                    total_data_sbert = np.concatenate((total_data_sbert, data_sbert.mean(axis=1)), axis=0)

            # # get all of the "by-intent" data
            # for i in range(sample_size):
            #     res = None
            #     with open(results_intent_path.format(lr, bs, epochs, i)) as f:
            #         res = json.load(f)

            #     for intent in intents:
            #         total_data_intent[intent].extend(np.mean(res[intent], axis=1).tolist())

            # now calculate the relavent stats
            total_mean = np.mean(total_data, axis=0)
            total_stddev = np.std(total_data, axis=0)

            total_mean_sbert = np.mean(total_data_sbert, axis=0)
            total_stddev_sbert = np.std(total_data_sbert, axis=0)

            mean_intent = {}
            stddev_intent = {}

            # for intent in intents:
            #     mean = np.mean(total_data_intent[intent], axis=0)
            #     stddev = np.std(total_data_intent[intent], axis=0)
            #     mean_intent[intent] = mean
            #     stddev_intent[intent] = stddev

            metrics = ["Rouge-1", "Rouge-2", "Rouge-L"]

            for i, (row_m, row_s) in enumerate(zip(total_mean, total_stddev)):
                row_str = model.format(lr, bs, epochs) + " & "
                for col_m, col_s in zip(row_m, row_s):
                    row_str += str(np.round(col_m, 4)) + " $\\pm$ " + str(np.round(col_s, 4)) + " & "
                
                row_str = row_str[:-3]
                row_str += " \\\\"
                print(metrics[i])
                print(row_str)
            print(model.format(lr, bs, epochs) + " & " + str(np.round(total_mean_sbert, 4)) + " $\\pm$ " + str(np.round(total_stddev_sbert, 4)) + " \\\\")

            print("\n\n\n")

Rouge-1
Wiki Finetuned: lr: 1e-05, bs: 8, epochs: 5 & 0.0803 $\pm$ 0.0202 & 0.3997 $\pm$ 0.1208 & 0.13 $\pm$ 0.0307 & 0.0812 $\pm$ 0.0192 \\
Rouge-2
Wiki Finetuned: lr: 1e-05, bs: 8, epochs: 5 & 0.0191 $\pm$ 0.0082 & 0.1043 $\pm$ 0.0659 & 0.0312 $\pm$ 0.014 & 0.0195 $\pm$ 0.0087 \\
Rouge-L
Wiki Finetuned: lr: 1e-05, bs: 8, epochs: 5 & 0.0586 $\pm$ 0.0128 & 0.3014 $\pm$ 0.1036 & 0.0953 $\pm$ 0.0206 & 0.0596 $\pm$ 0.0129 \\
Wiki Finetuned: lr: 1e-05, bs: 8, epochs: 5 & 0.3631 $\pm$ 0.0965 \\




Rouge-1
Wiki Finetuned: lr: 1e-05, bs: 8, epochs: 10 & 0.0278 $\pm$ 0.0126 & 0.0898 $\pm$ 0.0778 & 0.039 $\pm$ 0.0174 & 0.0244 $\pm$ 0.0109 \\
Rouge-2
Wiki Finetuned: lr: 1e-05, bs: 8, epochs: 10 & 0.007 $\pm$ 0.0047 & 0.0301 $\pm$ 0.0419 & 0.0098 $\pm$ 0.0061 & 0.0061 $\pm$ 0.0038 \\
Rouge-L
Wiki Finetuned: lr: 1e-05, bs: 8, epochs: 10 & 0.0237 $\pm$ 0.0104 & 0.0756 $\pm$ 0.0626 & 0.0332 $\pm$ 0.0142 & 0.0207 $\pm$ 0.0089 \\
Wiki Finetuned: lr: 1e-05, bs: 8, epochs: 10 & 0.2025 $\pm$ 0.1396 \\

