In [1]:
import numpy as np
import pandas as pd
import os
import json

cwd = os.getcwd()

results_path = cwd[:-28] + "Results/T5_finetuned_aug_test/all_scores_range_0_137_trail_{}.npz"
results_path_sbert = cwd[:-28] + "Results/T5_finetuned_aug_test/all_sbert_scores_0_137_trail_{}.npz"

results_intent_path = cwd[:-28] + "Results/T5_finetuned_aug_test/all_scores_range_by_intent_0_137_trail_{}.txt"

intents = ["What drives the economy in this state?",
           "What are the available modes of transport in this state?",
           "How is the government structured in this state?",
           "What about this state's arts and culture attracts you the most?",
           "The main reasons why you would like living in this state"]

sample_size = 2

model = "T5 Agu. F.T."

In [2]:

total_data = None
total_data_sbert = []
total_data_intent = {}

for intent in intents:
    total_data_intent[intent] = []

# get all of the "normal" data
for i in range(sample_size):
    temp = np.load(results_path.format(i))
    sbert_temp = np.load(results_path_sbert.format(i))
    data = temp['scores']
    data_sbert = sbert_temp['scores']
    
    if total_data is None:
        # we need to take the mean of axis-one first because each "user-sample" contains a set of predicted summaries. So we first avg those sets, before average across all the data/trials
        total_data = data.mean(axis=1)
        total_data_sbert = data_sbert.mean(axis=1)
    else:
        total_data = np.concatenate((total_data, data.mean(axis=1)), axis=0)
        total_data_sbert = np.concatenate((total_data_sbert, data_sbert.mean(axis=1)), axis=0)

# get all of the "by-intent" data
for i in range(sample_size):
    res = None
    with open(results_intent_path.format(i)) as f:
        res = json.load(f)

    for intent in intents:
        total_data_intent[intent].extend(np.mean(res[intent], axis=1).tolist())

# now calculate the relavent stats
total_mean = np.mean(total_data, axis=0)
total_stddev = np.std(total_data, axis=0)

total_mean_sbert = np.mean(total_data_sbert, axis=0)
total_stddev_sbert = np.std(total_data_sbert, axis=0)

mean_intent = {}
stddev_intent = {}

for intent in intents:
    mean = np.mean(total_data_intent[intent], axis=0)
    stddev = np.std(total_data_intent[intent], axis=0)
    mean_intent[intent] = mean
    stddev_intent[intent] = stddev

metrics = ["Rouge-1", "Rouge-2", "Rouge-L"]

for i, (row_m, row_s) in enumerate(zip(total_mean, total_stddev)):
    row_str = model + " & "
    for col_m, col_s in zip(row_m, row_s):
        row_str += str(np.round(col_m, 4)) + " $\\pm$ " + str(np.round(col_s, 4)) + " & "
    
    row_str = row_str[:-3]
    row_str += " \\\\"
    print(metrics[i])
    print(row_str)
print("SBERT Cosime Sim. Score: ")
print(model + " & " + str(np.round(total_mean_sbert, 4)) + " $\\pm$ " + str(np.round(total_stddev_sbert, 4)) + " \\\\")

print("\n\n\n")

Rouge-1
T5 Agu. F.T. & 0.1315 $\pm$ 0.0331 & 0.5297 $\pm$ 0.0837 & 0.2036 $\pm$ 0.0404 & 0.1273 $\pm$ 0.0253 \\
Rouge-2
T5 Agu. F.T. & 0.0335 $\pm$ 0.0163 & 0.1484 $\pm$ 0.0807 & 0.053 $\pm$ 0.0258 & 0.0331 $\pm$ 0.0161 \\
Rouge-L
T5 Agu. F.T. & 0.093 $\pm$ 0.0219 & 0.3812 $\pm$ 0.0702 & 0.1445 $\pm$ 0.0271 & 0.0903 $\pm$ 0.017 \\
SBERT Cosime Sim. Score: 
T5 Agu. F.T. & 0.4806 $\pm$ 0.0762 \\






In [3]:
metrics = ["Rouge-1", "Rouge-2", "Rouge-L"]

for intent in intents:
    print("\nResults for intent {}\n".format(intent))
    for i, (row_m, row_s) in enumerate(zip(mean_intent[intent], stddev_intent[intent])):
        row_str = "\t"+model + " & "
        for col_m, col_s in zip(row_m, row_s):
            row_str += str(np.round(col_m, 4)) + " $\\pm$ " + str(np.round(col_s, 4)) + " & "
        
        row_str = row_str[:-3]
        row_str += " \\\\"
        print("\t"+metrics[i])
        print(row_str)


Results for intent What drives the economy in this state?

	Rouge-1
	T5 Agu. F.T. & 0.1344 $\pm$ 0.0328 & 0.5264 $\pm$ 0.0589 & 0.2077 $\pm$ 0.0389 & 0.1298 $\pm$ 0.0243 \\
	Rouge-2
	T5 Agu. F.T. & 0.0352 $\pm$ 0.0153 & 0.1474 $\pm$ 0.0563 & 0.0552 $\pm$ 0.0225 & 0.0345 $\pm$ 0.014 \\
	Rouge-L
	T5 Agu. F.T. & 0.0952 $\pm$ 0.0215 & 0.381 $\pm$ 0.0564 & 0.1478 $\pm$ 0.0259 & 0.0924 $\pm$ 0.0162 \\

Results for intent What are the available modes of transport in this state?

	Rouge-1
	T5 Agu. F.T. & 0.1319 $\pm$ 0.0307 & 0.5082 $\pm$ 0.0803 & 0.2016 $\pm$ 0.0366 & 0.126 $\pm$ 0.0229 \\
	Rouge-2
	T5 Agu. F.T. & 0.0265 $\pm$ 0.008 & 0.1126 $\pm$ 0.0437 & 0.0415 $\pm$ 0.0122 & 0.0259 $\pm$ 0.0076 \\
	Rouge-L
	T5 Agu. F.T. & 0.0908 $\pm$ 0.0195 & 0.3556 $\pm$ 0.0605 & 0.1392 $\pm$ 0.023 & 0.087 $\pm$ 0.0143 \\

Results for intent How is the government structured in this state?

	Rouge-1
	T5 Agu. F.T. & 0.1309 $\pm$ 0.0349 & 0.5057 $\pm$ 0.0708 & 0.2003 $\pm$ 0.0407 & 0.1252 $\pm$ 0.0254 \\
	