In [1]:
import numpy as np
import pandas as pd
import os
import json

cwd = os.getcwd()

results_path = cwd[:-28] + "Results/RandomN_test/all_scores_range_0_137_trail_{}.npz"
results_path_sbert = cwd[:-28] + "Results/RandomN_test/all_sbert_scores_0_137_trail_{}.npz"

results_intent_path = cwd[:-28] + "Results/RandomN_test/all_scores_range_by_intent_0_137_trail_{}.txt"

intents = ["What drives the economy in this state?",
           "What are the available modes of transport in this state?",
           "How is the government structured in this state?",
           "What about this state's arts and culture attracts you the most?",
           "The main reasons why you would like living in this state"]

sample_size = 2

model = "RandomN"

In [6]:

total_data = None
total_data_sbert = []
total_data_intent = {}
total_data_sbert_intent = {}

for intent in intents:
    total_data_intent[intent] = []
    total_data_sbert_intent[intent] = []

# get all of the "normal" data
for i in range(sample_size):
    temp = np.load(results_path.format(i))
    sbert_temp = np.load(results_path_sbert.format(i))
    data = temp['scores']
    data_sbert = sbert_temp['scores']
    
    if total_data is None:
        # we need to take the mean of axis-one first because each "user-sample" contains a set of predicted summaries. So we first avg those sets, before average across all the data/trials
        total_data = data.mean(axis=1)
        total_data_sbert = data_sbert.mean(axis=1)
    else:
        total_data = np.concatenate((total_data, data.mean(axis=1)), axis=0)
        total_data_sbert = np.concatenate((total_data_sbert, data_sbert.mean(axis=1)), axis=0)

# get all of the "by-intent" data
for i in range(sample_size):
    res = None
    with open(results_intent_path.format(i)) as f:
        res = json.load(f)

    for intent in intents:
        total_data_intent[intent].extend(np.mean(res[intent], axis=1).tolist())

# loop over intent arrays and all data arrays to extract sbert by intent 
for intent in intents:
    for intent_mat in total_data_intent[intent]:
        for i, mat in enumerate(total_data):
            if (np.array_equal(mat, np.array(intent_mat))):
                # print(total_data_sbert[i])
                total_data_sbert_intent[intent].append(total_data_sbert[i])
        

# now calculate the relavent stats
total_mean = np.mean(total_data, axis=0)
total_stddev = np.std(total_data, axis=0)

total_mean_sbert = np.mean(total_data_sbert, axis=0)
total_stddev_sbert = np.std(total_data_sbert, axis=0)

mean_intent = {}
stddev_intent = {}
mean_intent_sbert = {}
stddev_intent_sbert = {}

for intent in intents:
    mean = np.mean(total_data_intent[intent], axis=0)
    stddev = np.std(total_data_intent[intent], axis=0)
    mean_intent[intent] = mean
    stddev_intent[intent] = stddev

    mean_sbert = np.mean(total_data_sbert_intent[intent], axis=0)
    stddev_sbert = np.std(total_data_sbert_intent[intent], axis=0)
    mean_intent_sbert[intent] = mean_sbert
    stddev_intent_sbert[intent] = stddev_sbert

metrics = ["Rouge-1", "Rouge-2", "Rouge-L"]

for i, (row_m, row_s) in enumerate(zip(total_mean, total_stddev)):
    row_str = model + " & "
    for col_m, col_s in zip(row_m, row_s):
        row_str += str(np.round(col_m, 4)) + " $\\pm$ " + str(np.round(col_s, 4)) + " & "
    
    row_str = row_str[:-3]
    row_str += " & " + str(np.round(total_mean_sbert, 4)) + " $\\pm$ " + str(np.round(total_stddev_sbert, 4)) + " \\\\"
    print(metrics[i])
    print(row_str)
print("SBERT Cosime Sim. Score: ")
print(model + " & " + str(np.round(total_mean_sbert, 4)) + " $\\pm$ " + str(np.round(total_stddev_sbert, 4)) + " \\\\")

print("\n\n\n")

Rouge-1
RandomN & 0.3395 $\pm$ 0.0624 & 0.3258 $\pm$ 0.0598 & 0.3195 $\pm$ 0.0436 & 0.1997 $\pm$ 0.0272 & 0.4661 $\pm$ 0.0719 \\
Rouge-2
RandomN & 0.0859 $\pm$ 0.0383 & 0.0823 $\pm$ 0.0346 & 0.0807 $\pm$ 0.0332 & 0.0505 $\pm$ 0.0208 & 0.4661 $\pm$ 0.0719 \\
Rouge-L
RandomN & 0.1776 $\pm$ 0.0359 & 0.1685 $\pm$ 0.0313 & 0.1658 $\pm$ 0.0229 & 0.1036 $\pm$ 0.0143 & 0.4661 $\pm$ 0.0719 \\
SBERT Cosime Sim. Score: 
RandomN & 0.4661 $\pm$ 0.0719 \\






In [7]:
# metrics = ["Rouge-1", "Rouge-2", "Rouge-L"]

# for intent in intents:
#     print("\nResults for intent {}\n".format(intent))
#     for i, (row_m, row_s) in enumerate(zip(mean_intent[intent], stddev_intent[intent])):
#         row_str = "\t"+model + " & "
#         for col_m, col_s in zip(row_m, row_s):
#             row_str += str(np.round(col_m, 4)) + " $\\pm$ " + str(np.round(col_s, 4)) + " & "
        
#         row_str = row_str[:-3]
#         row_str += " \\\\"
#         print("\t"+metrics[i])
#         print(row_str)

In [14]:
intent_mean_list = []
intent_std_list = []

for intent in intents:
    mean = round(mean_intent[intent][2][3], 4)
    std = round(stddev_intent[intent][2][3], 4)
    intent_mean_list.append(mean)
    intent_std_list.append(std)

print(intent_mean_list)
print(intent_std_list)
    

[0.1023, 0.0973, 0.108, 0.1047, 0.1068]
[0.0137, 0.0125, 0.0134, 0.014, 0.0152]


In [16]:
intent_mean_list = []
intent_std_list = []

for intent in intents:
    mean = round(mean_intent_sbert[intent], 4)
    std = round(stddev_intent_sbert[intent], 4)
    intent_mean_list.append(mean)
    intent_std_list.append(std)

print(intent_mean_list)
print(intent_std_list)

[0.4777, 0.4522, 0.4268, 0.4608, 0.513]
[0.061, 0.0597, 0.0598, 0.0782, 0.0717]


In [17]:
results_path = cwd[:-28] + "Results/LeadN_test/all_scores_range_0_137_trail_{}.npz"
results_path_sbert = cwd[:-28] + "Results/LeadN_test/all_sbert_scores_0_137_trail_{}.npz"

results_intent_path = cwd[:-28] + "Results/LeadN_test/all_scores_range_by_intent_0_137_trail_{}.txt"

model = "LeadN"

In [18]:

total_data = None
total_data_sbert = []
total_data_intent = {}
total_data_sbert_intent = {}

for intent in intents:
    total_data_intent[intent] = []
    total_data_sbert_intent[intent] = []

# get all of the "normal" data
for i in range(sample_size):
    temp = np.load(results_path.format(i))
    sbert_temp = np.load(results_path_sbert.format(i))
    data = temp['scores']
    data_sbert = sbert_temp['scores']
    
    if total_data is None:
        # we need to take the mean of axis-one first because each "user-sample" contains a set of predicted summaries. So we first avg those sets, before average across all the data/trials
        total_data = data.mean(axis=1)
        total_data_sbert = data_sbert.mean(axis=1)
    else:
        total_data = np.concatenate((total_data, data.mean(axis=1)), axis=0)
        total_data_sbert = np.concatenate((total_data_sbert, data_sbert.mean(axis=1)), axis=0)

# get all of the "by-intent" data
for i in range(sample_size):
    res = None
    with open(results_intent_path.format(i)) as f:
        res = json.load(f)

    for intent in intents:
        total_data_intent[intent].extend(np.mean(res[intent], axis=1).tolist())

# loop over intent arrays and all data arrays to extract sbert by intent 
for intent in intents:
    for intent_mat in total_data_intent[intent]:
        for i, mat in enumerate(total_data):
            if (np.array_equal(mat, np.array(intent_mat))):
                # print(total_data_sbert[i])
                total_data_sbert_intent[intent].append(total_data_sbert[i])
        

# now calculate the relavent stats
total_mean = np.mean(total_data, axis=0)
total_stddev = np.std(total_data, axis=0)

total_mean_sbert = np.mean(total_data_sbert, axis=0)
total_stddev_sbert = np.std(total_data_sbert, axis=0)

mean_intent = {}
stddev_intent = {}
mean_intent_sbert = {}
stddev_intent_sbert = {}

for intent in intents:
    mean = np.mean(total_data_intent[intent], axis=0)
    stddev = np.std(total_data_intent[intent], axis=0)
    mean_intent[intent] = mean
    stddev_intent[intent] = stddev

    mean_sbert = np.mean(total_data_sbert_intent[intent], axis=0)
    stddev_sbert = np.std(total_data_sbert_intent[intent], axis=0)
    mean_intent_sbert[intent] = mean_sbert
    stddev_intent_sbert[intent] = stddev_sbert

metrics = ["Rouge-1", "Rouge-2", "Rouge-L"]

for i, (row_m, row_s) in enumerate(zip(total_mean, total_stddev)):
    row_str = model + " & "
    for col_m, col_s in zip(row_m, row_s):
        row_str += str(np.round(col_m, 4)) + " $\\pm$ " + str(np.round(col_s, 4)) + " & "
    
    row_str = row_str[:-3]
    row_str += " & " + str(np.round(total_mean_sbert, 4)) + " $\\pm$ " + str(np.round(total_stddev_sbert, 4)) + " \\\\"
    print(metrics[i])
    print(row_str)
print("SBERT Cosime Sim. Score: ")
print(model + " & " + str(np.round(total_mean_sbert, 4)) + " $\\pm$ " + str(np.round(total_stddev_sbert, 4)) + " \\\\")

print("\n\n\n")

Rouge-1
LeadN & 0.3607 $\pm$ 0.0595 & 0.3395 $\pm$ 0.0747 & 0.3362 $\pm$ 0.0523 & 0.2101 $\pm$ 0.0327 & 0.5388 $\pm$ 0.0782 \\
Rouge-2
LeadN & 0.0995 $\pm$ 0.0548 & 0.0942 $\pm$ 0.0524 & 0.0931 $\pm$ 0.0511 & 0.0582 $\pm$ 0.0319 & 0.5388 $\pm$ 0.0782 \\
Rouge-L
LeadN & 0.1987 $\pm$ 0.0477 & 0.1838 $\pm$ 0.0475 & 0.1833 $\pm$ 0.0403 & 0.1145 $\pm$ 0.0252 & 0.5388 $\pm$ 0.0782 \\
SBERT Cosime Sim. Score: 
LeadN & 0.5388 $\pm$ 0.0782 \\






In [19]:
# metrics = ["Rouge-1", "Rouge-2", "Rouge-L"]

# for intent in intents:
#     print("\nResults for intent {}\n".format(intent))
#     for i, (row_m, row_s) in enumerate(zip(mean_intent[intent], stddev_intent[intent])):
#         row_str = "\t"+model + " & "
#         for col_m, col_s in zip(row_m, row_s):
#             row_str += str(np.round(col_m, 4)) + " $\\pm$ " + str(np.round(col_s, 4)) + " & "
        
#         row_str = row_str[:-3]
#         row_str += " \\\\"
#         print("\t"+metrics[i])
#         print(row_str)

In [20]:
intent_mean_list = []
intent_std_list = []

for intent in intents:
    mean = round(mean_intent[intent][2][3], 4)
    std = round(stddev_intent[intent][2][3], 4)
    intent_mean_list.append(mean)
    intent_std_list.append(std)

print(intent_mean_list)
print(intent_std_list)
    

[0.1142, 0.1039, 0.1119, 0.1143, 0.1295]
[0.0225, 0.009, 0.0081, 0.0279, 0.0383]


In [21]:
intent_mean_list = []
intent_std_list = []

for intent in intents:
    mean = round(mean_intent_sbert[intent], 4)
    std = round(stddev_intent_sbert[intent], 4)
    intent_mean_list.append(mean)
    intent_std_list.append(std)

print(intent_mean_list)
print(intent_std_list)

[0.5513, 0.5315, 0.5041, 0.4985, 0.6039]
[0.057, 0.0464, 0.0483, 0.093, 0.0895]
