In [1]:
import matplotlib
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd 

import seaborn as sns
import json

import tiktoken
encoding__ = tiktoken.get_encoding("cl100k_base")
num_tokens = lambda x: len(encoding__.encode(x))

from collections import defaultdict
from matplotlib.ticker import MultipleLocator, PercentFormatter


import os, sys; sys.path.append("../src")
import utils_io


def read_jsons(parent_dir: str):
    data = {}
    for fname in sorted(os.listdir(parent_dir)):
        if not fname.endswith(".json"): continue
        data[fname] = utils_io.read_json(f"{parent_dir}/{fname}")
        print(len(data[fname]["assignments"]))
    assert len(data) > 0, f"Empty {parent_dir}"
    return data

def print_values(data_dict, msg):
    print(f"\t{msg}\n", "-"*len(msg) + "-----------")
    
    all_values = np.hstack(list(data_dict.values()))
    print(f"Total:\t {np.mean(all_values):.1f} (± {np.std(all_values):.1f})")
    print(f"Total count:", len(all_values))
    
    for fname, fvalues in data_dict.items():
        print(f"-> {fname}:\t {np.mean(fvalues):.1f} (± {np.std(fvalues):.1f})")

    print("\n\n")

def document_length(data):
    results = []
    for assignment in data["assignments"]:
        docs_uuids = assignment["docs_uuids"]
        docs_texts = [data["documents"][uuid] for uuid in docs_uuids]
        docs_texts = [doc["document_text"] for doc in docs_texts]
        
        doc_length = [num_tokens(t) for t in docs_texts]    
        doc_length = np.sum(doc_length)
        results.append(doc_length)
    return results

def all_subtopics(data):
    results = []
    
    for assignment in data["assignments"]:
        all_uuids = assignment["all_subtopics_uuids"]
        assert len(np.unique(all_uuids)) == len(all_uuids)
        results.append(len(all_uuids))
    return results

def shared_subtopics(data):
    results = []
    
    for assignment in data["assignments"]:
        all_shared_subtopics = [uuid for uuid, docs in assignment["subtopics_to_documents"].items() if len(docs) > 1]
        assert len(np.unique(all_shared_subtopics)) == len(all_shared_subtopics)
        results.append(len(all_shared_subtopics))
    return results

def all_insights(data):
    results = []

    for assignment in data["assignments"]:
        all_uuids = assignment["all_insights_uuids"]
        assert len(np.unique(all_uuids)) == len(all_uuids)
        results.append(len(all_uuids))
    return results

def subtopic_insights(data, is_adversarial=False):
    results = []

    uuid2insights = data["insights"]
    for assignment in data["assignments"]:
        all_uuids = assignment["all_insights_uuids"]

        subtopic_uuid = assignment["subtopic_uuid"]
        insights = [uuid2insights[uuid] for uuid in all_uuids]
        insights = [ins for ins in insights if ins["subtopic_id"] == subtopic_uuid]

        assert (is_adversarial and len(insights) == 0) or len(insights) > 0
        results.append(len(insights))
    return results


def shared_subtopic_insights(data, is_adversarial=False):
    results = []

    for assignment in data["assignments"]:
        uuids = assignment["ground_truth_insights_uuids"]
        assert (is_adversarial and len(uuids) == 0) or len(np.unique(uuids)) == len(uuids)
        results.append(len(uuids))
    return results


def shared_insights(data, is_adversarial=False):
    results = []

    for assignment in data["assignments"]:
        uuids = set(assignment["some_shared_insights_uuids"] + assignment["all_shared_insights_uuids"])
        results.append(len(uuids))
    return results

# Statistics combinations

In [4]:
base_dir = "../data/SummHay_conv/preprocessed/some_shared/combinations-2/"
dataset = read_jsons(base_dir)
dataset = {fname.replace(".json", ""): data for fname, data in dataset.items()}
is_adversarial = "adversarial" in base_dir

#   Dataset length
# ------------------
dataset_lens = {fname: document_length(data) for fname, data in dataset.items()}
print_values(dataset_lens, "Average file token length")

# ---------------------------
#  Number of all subtopics
# ---------------------------
dataset_all_subtopics = {fname: all_subtopics(data) for fname, data in dataset.items()}
print_values(dataset_all_subtopics, "Average number of subtopics in documents")

# ---------------------------
#  Number of shared subtopics
# ---------------------------
dataset_shared_subtopics = {fname: shared_subtopics(data) for fname, data in dataset.items()}
print_values(dataset_shared_subtopics, "Average number of shared subtopics in documents")

# ---------------------------
#  Number of all insights
# ---------------------------
dataset_all_insights = {fname: all_insights(data) for fname, data in dataset.items()}
print_values(dataset_all_insights, "Average number of insights in documents")

# ------------------------------------------------------
#  Number of all insights related to the subtopic
# ------------------------------------------------------
dataset_all_subtopic_insights = {fname: subtopic_insights(data, is_adversarial) for fname, data in dataset.items()}
print_values(dataset_all_subtopic_insights, "Average number of subtopic insights")


# ---------------------------------------
#  Number of shared insights regardless of subtopic
# ---------------------------------------
dataset_shared_insights = {fname: shared_insights(data, is_adversarial) for fname, data in dataset.items()}
print_values(dataset_shared_insights, "Average number of shared insights in documents")

# ---------------------------------------
#  Number of shared insights by subtopic
# ---------------------------------------
dataset_shared_subtopic_insights = {fname: shared_subtopic_insights(data, is_adversarial) for fname, data in dataset.items()}
print_values(dataset_shared_subtopic_insights, "Average number of shared subtopic insights in documents")

68
60
57
71
85
	Average file token length
 ------------------------------------
Total:	 2054.6 (± 235.2)
Total count: 341
-> topic_conv1__300:	 2034.7 (± 209.0)
-> topic_conv2__300:	 1949.9 (± 223.2)
-> topic_conv3__300:	 2101.2 (± 227.1)
-> topic_conv4__300:	 2004.8 (± 219.9)
-> topic_conv5__300:	 2154.9 (± 235.1)



	Average number of subtopics in documents
 ---------------------------------------------------
Total:	 2.9 (± 0.3)
Total count: 341
-> topic_conv1__300:	 2.9 (± 0.2)
-> topic_conv2__300:	 2.9 (± 0.3)
-> topic_conv3__300:	 2.9 (± 0.3)
-> topic_conv4__300:	 2.9 (± 0.3)
-> topic_conv5__300:	 2.9 (± 0.3)



	Average number of shared subtopics in documents
 ----------------------------------------------------------
Total:	 1.1 (± 0.3)
Total count: 341
-> topic_conv1__300:	 1.1 (± 0.2)
-> topic_conv2__300:	 1.1 (± 0.3)
-> topic_conv3__300:	 1.1 (± 0.3)
-> topic_conv4__300:	 1.1 (± 0.3)
-> topic_conv5__300:	 1.1 (± 0.3)



	Average number of insights in documents
 --------------

# Statistics of Generated Summaries

In [51]:
import os, sys; sys.path.append("../src")
from eval_summaries import parse_responses_

def response_length(data):
    results = []
    for assignment in data["assignments"]:
        results.append(num_tokens(assignment["response"])) 
    return results

def num_insights_in_response(data):
    results = []
    for assignment in data["assignments"]:
        results.append(len(assignment["response__parsed"]))
    return results

ImportError: cannot import name 'parse_responses_' from 'eval_summaries' (/Users/catarinabelem/Desktop/Projects/hallucination-multi-doc-summarization/notebooks/../src/eval_summaries.py)

In [None]:
base_dir = "../outputs/results/SummHay__combinations-4/gpt-3.5-turbo-0125"
results = read_jsons(base_dir)
results = {fname.replace(".json", ""): data for fname, data in results.items()}
is_adversarial = "adversarial" in base_dir

#   Dataset length
# ------------------
results_lens = {fname: response_length(data) for fname, data in results.items()}
print_values(results_lens, "Average response token length")

# -------------------------------
#  Number of predicted insights
# -------------------------------
[parse_responses_(data["assignments"]) for data in results.values()]
results_num_insights = {fname: num_insights_in_response(data) for fname, data in results.items()}
print_values(results_num_insights, "Average number of predicted insights")
