In [1]:
import numpy as np
import pandas as pd
import os
import json

num_trails = 10
cwd = os.getcwd()

results_path = cwd[:-28] + "Results/T5/all_scores_range_0_138_trail_{}.npz"

results_intent_path = cwd[:-28] + "Results/T5/all_scores_range_by_intent_0_138_trail_{}.txt"

intents = ["What is the state's policy regarding education?",
           "What are the major historical events in this state?",
           "How is the weather of the state?",
           "Which places seem interesting to you for visiting in this state?",
           "What are some of the most interesting things about this state?"]

sample_size = 10

model = "T5 Baseline"


In [2]:
full_results = []
total_data = None
total_data_intent = {}

for intent in intents:
    total_data_intent[intent] = []

# get all of the "normal" data
for i in range(sample_size):
    temp = np.load(results_path.format(i))
    data = temp['scores']
    
    if total_data is None:
        # we need to take the mean of axis-one first because each "user-sample" contains a set of predicted summaries. So we first avg those sets, before average across all the data/trials
        total_data = data.mean(axis=1)
    else:
        total_data = np.concatenate((total_data, data.mean(axis=1)), axis=0)

# get all of the "by-intent" data
for i in range(sample_size):
    res = None
    with open(results_intent_path.format(i)) as f:
        res = json.load(f)

    for intent in intents:
        total_data_intent[intent].extend(np.mean(res[intent], axis=1).tolist())

# now calculate the relavent stats
total_mean = np.mean(total_data, axis=0)
total_stddev = np.std(total_data, axis=0)

mean_intent = {}
stddev_intent = {}

for intent in intents:
    mean = np.mean(total_data_intent[intent], axis=0)
    stddev = np.std(total_data_intent[intent], axis=0)
    mean_intent[intent] = mean
    stddev_intent[intent] = stddev

In [3]:
metrics = ["Rouge-1", "Rouge-2", "Rouge-L"]

for i, (row_m, row_s) in enumerate(zip(total_mean, total_stddev)):
    row_str = model + " & "
    for col_m, col_s in zip(row_m, row_s):
        row_str += str(np.round(col_m, 4)) + " $\\pm$ " + str(np.round(col_s, 4)) + " & "
    
    row_str = row_str[:-3]
    row_str += " \\\\"
    print(metrics[i])
    print(row_str)

Rouge-1
T5 Baseline & 0.1272 $\pm$ 0.0393 & 0.5206 $\pm$ 0.1103 & 0.1965 $\pm$ 0.049 & 0.1228 $\pm$ 0.0306 \\
Rouge-2
T5 Baseline & 0.0331 $\pm$ 0.0216 & 0.1517 $\pm$ 0.1289 & 0.0522 $\pm$ 0.0347 & 0.0326 $\pm$ 0.0217 \\
Rouge-L
T5 Baseline & 0.0875 $\pm$ 0.0257 & 0.3668 $\pm$ 0.1004 & 0.1357 $\pm$ 0.0329 & 0.0848 $\pm$ 0.0206 \\


In [4]:
metrics = ["Rouge-1", "Rouge-2", "Rouge-L"]

for intent in intents:
    print("\nResults for intent {}\n".format(intent))
    for i, (row_m, row_s) in enumerate(zip(mean_intent[intent], stddev_intent[intent])):
        row_str = "\t"+model + " & "
        for col_m, col_s in zip(row_m, row_s):
            row_str += str(np.round(col_m, 4)) + " $\\pm$ " + str(np.round(col_s, 4)) + " & "
        
        row_str = row_str[:-3]
        row_str += " \\\\"
        print("\t"+metrics[i])
        print(row_str)


Results for intent What is the state's policy regarding education?

	Rouge-1
	T5 Baseline & 0.1425 $\pm$ 0.0322 & 0.449 $\pm$ 0.0563 & 0.2104 $\pm$ 0.0386 & 0.1315 $\pm$ 0.0241 \\
	Rouge-2
	T5 Baseline & 0.0264 $\pm$ 0.0099 & 0.0859 $\pm$ 0.0297 & 0.0393 $\pm$ 0.014 & 0.0246 $\pm$ 0.0087 \\
	Rouge-L
	T5 Baseline & 0.0983 $\pm$ 0.02 & 0.3141 $\pm$ 0.0426 & 0.1456 $\pm$ 0.0234 & 0.091 $\pm$ 0.0146 \\

Results for intent What are the major historical events in this state?

	Rouge-1
	T5 Baseline & 0.1059 $\pm$ 0.0367 & 0.533 $\pm$ 0.0802 & 0.1701 $\pm$ 0.0461 & 0.1063 $\pm$ 0.0288 \\
	Rouge-2
	T5 Baseline & 0.0268 $\pm$ 0.0126 & 0.1412 $\pm$ 0.0554 & 0.0435 $\pm$ 0.0181 & 0.0272 $\pm$ 0.0113 \\
	Rouge-L
	T5 Baseline & 0.0738 $\pm$ 0.0241 & 0.378 $\pm$ 0.0638 & 0.1187 $\pm$ 0.0296 & 0.0742 $\pm$ 0.0185 \\

Results for intent How is the weather of the state?

	Rouge-1
	T5 Baseline & 0.1244 $\pm$ 0.0307 & 0.5071 $\pm$ 0.0756 & 0.1933 $\pm$ 0.039 & 0.1208 $\pm$ 0.0244 \\
	Rouge-2
	T5 Baseline