In [1]:
import torch
import os
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('seaborn-white')
from nltk import word_tokenize, ngrams
import argparse
import os
import re
import time
from multiprocessing import Pool

import shutil
from nltk import sent_tokenize
import evaluate


2024-01-04 13:37:09.961917: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-01-04 13:37:21.466730: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2024-01-04 13:37:21.468345: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory


In [None]:
def get_all_filepaths(dir):
    file_paths = {}
    titles = ["head_0", "head_1", "mixed"]
    for title in titles:
        path = os.path.join(dir, title)
        path = os.path.join(path,"test_outfinal.txt")
        file_paths[title] = path
    return file_paths
def get_paired_outputs(filepath):
    paired_outputs = {}
    with open(filepath, "r") as f:
        lines = f.readlines()
        cur_index = 0
        for index , line in enumerate(lines):
            if index % 4 == 0:
                paired_outputs[cur_index] = {"article": line}
            elif index % 4 == 1:
                paired_outputs[cur_index]["reference"] = line
            elif index % 4 == 2:
                paired_outputs[cur_index]["summary"] = line
            elif index % 4 == 3:
                cur_index += 1
    return paired_outputs
def get_rouge_scores(references, candidates):
    rouge = evaluate.load("rouge")
    results = rouge.compute(predictions=candidates, references=references)
    print(results)




def read_file(input_dict):

    inputs = []
    candidates = []
    references = []

    for key in input_dict.keys():
        input = input_dict[key]['article']
        gold = input_dict[key]['reference']
        summary = input_dict[key]['summary']
        input = input.strip()
        input = re.sub('<.*?>', '', input)
        gold = gold.strip()
        gold = re.sub('<.*?>', '', gold)
        summary = summary.strip()
        summary = re.sub('<.*?>', '', summary)
        inputs.append(input)
        candidates.append(summary)
        references.append(gold)

    return inputs, references, candidates



def compute_rouge(input_dict):
    inputs, references, candidates = read_file(input_dict)
    get_rouge_scores(references, candidates)

def get_overlap(inp, out, ngram):
    grams_inp = set(ngrams(word_tokenize(inp.lower()), ngram))
    grams_out = set(ngrams(word_tokenize(out.lower()), ngram))

    total = len(grams_out)
    common = len(grams_inp.intersection(grams_out))
    if total == 0:
        return 0
    else:
        return float(common) / float(total)
def get_overlap_file(input_dict , output_filename, ngram=2, graph=False):
    """ draws and stores the ngram overlap of the generated and gold summaries with the input 
        ARGS : 
            input_dict : input dict where key is index, where each output is of the form article , reference, summary
            output_filename : used to store the resulting plot
            ngram : the size of the ngram overlap to be considered
            graph : whether to draw the graph or not

    
    """

    overlap_gold = []
    overlap_gen = []
    gen_length = []
    gold_length = []

    for key in input_dict.keys():
        inp = input_dict[key]['article']
        gold = input_dict[key]['reference']
        out = input_dict[key]['summary']


        overlap_gold.append(get_overlap(inp, gold, ngram))
        overlap_gen.append(get_overlap(inp, out, ngram))

        gen_length.append(len(out.split(' ')))
        gold_length.append(len(gold.split(' ')))


    overlap_gold_mean = np.mean(overlap_gold)
    overlap_gen_mean = np.mean(overlap_gen)
    gen_length = np.mean(gen_length)
    gold_length = np.mean(gold_length)


    #print(f'Gold overlap %dgram = %f' % (ngram, overlap_gold_mean))
    print(f'Generated overlap %dgram = %f' % (ngram, overlap_gen_mean))

    #print(f'Gold length = %f' % gold_length)
    print(f'Generated length = %f' % gen_length)

    if graph:
        # the histogram of the data
        kwargs = dict(histtype='stepfilled', alpha=0.5, density=True, bins=80)

        weights = np.ones_like(overlap_gold) / float(len(overlap_gold))
        plt.hist(overlap_gold, **kwargs, label='gold', weights=weights)

        weights = np.ones_like(overlap_gen) / float(len(overlap_gold))
        plt.hist(overlap_gen, **kwargs, label='generated', weights=weights)



        plt.xlabel(f'{ngram}-gram overlap')
        plt.ylim(0, 8)
        #plt.xlim(0, 1)
        plt.legend()
        plt.grid(True)
        plt.savefig(output_filename)
        plt.clf()
    return overlap_gold, overlap_gen
def generate_results(exp_name = "exp_0"):
    # print the experiment 
    print("experiment name : " + str(exp_name))
    head_0_outputs = get_paired_outputs("./outputs/" + str(exp_name) + "/head_0/test_outfinal.txt")
    head_1_outputs = get_paired_outputs("./outputs/" + str(exp_name) + "/head_1/test_outfinal.txt")
    mixed_outputs = get_paired_outputs("./outputs/" + str(exp_name) + "/mixed/test_outfinal.txt")
    #filepaths = get_all_filepaths("outputs/exp_3")
    #head_0_outputs = get_paired_outputs(filepaths["head_0"])
    #head_1_outputs = get_paired_outputs(filepaths["head_1"])
    #mixed_outputs = get_paired_outputs(filepaths["mixed"])
    print("gold statistic")
    print("gold overlap : {0} and gold length : {1}".format(0.44, 0.14))

    print("for head 0 ")
    compute_rouge(head_0_outputs)
    head_0_overlap_gold, head_0_overlap_gen = get_overlap_file(head_0_outputs, "./outputs/" + str(exp_name) + "/head_0_overlap.jpg", ngram = 2, graph = True)
    print("")
    
    print("for head 1")
    compute_rouge(head_1_outputs)
    head_1_overlap_gold, head_1_overlap_gen = get_overlap_file(head_1_outputs, "./outputs/" + str(exp_name) + "/head_1_overlap.jpg", ngram = 2, graph = True)
    print("")

    print("for mixed")
    compute_rouge(mixed_outputs)
    mixed_overlap_gold, mixed_overlap_gen = get_overlap_file(mixed_outputs, "./outputs/" + str(exp_name) + "/mixed_overlap.jpg", ngram = 2, graph = True)
    print("")

    print("---------------------------------- Done ----------------------------------")
    print("")
    print("")

    # print("head 0")
    # head_0_overlap_gold, head_0_overlap_gen = get_overlap_file(head_0_outputs, "./outputs/" + str(exp_name) + "/head_0_overlap.jpg", ngram = 2, graph = True)
    # print("head 1")
    # head_1_overlap_gold, head_1_overlap_gen = get_overlap_file(head_1_outputs, "./outputs/" + str(exp_name) + "/head_1_overlap.jpg", ngram = 2, graph = True)
    # print("mixed")
    # mixed_overlap_gold, mixed_overlap_gen = get_overlap_file(mixed_outputs, "./outputs/" + str(exp_name) + "/mixed_overlap.jpg", ngram = 2, graph = True)
    
    kwargs = dict(histtype='stepfilled', alpha=0.5, density=True, bins=80)

    weights = np.ones_like(head_0_overlap_gen) / float(len(head_0_overlap_gold))
    plt.hist(head_0_overlap_gen, **kwargs, label='head_0', weights=weights)

    weights = np.ones_like(head_1_overlap_gen) / float(len(head_1_overlap_gold))
    plt.hist(head_1_overlap_gen, **kwargs, label='head_1', weights=weights)

    weights = np.ones_like(mixed_overlap_gen) / float(len(mixed_overlap_gold))
    plt.hist(mixed_overlap_gen, **kwargs, label='mixed', weights=weights)



    plt.xlabel(f'{2}-gram overlap')
    plt.ylim(0, 8)
    #plt.xlim(0, 1)
    plt.legend()
    plt.grid(True)
    plt.savefig("./outputs/" + str(exp_name) + "/combined_overlap.jpg")
    plt.clf()
    return head_0_overlap_gold, head_0_overlap_gen, head_1_overlap_gold, head_1_overlap_gen, mixed_overlap_gold, mixed_overlap_gen
def do_experiments(list_of_experiments):
    for exp in list_of_experiments:
        results = generate_results(exp)







In [None]:
def generate_results(exp_name = "exp_0"):
    # print the experiment 
    print("experiment name : " + str(exp_name))
    head_0_outputs = get_paired_outputs("./outputs/" + str(exp_name) + "/head_0/test_outfinal.txt")
    head_1_outputs = get_paired_outputs("./outputs/" + str(exp_name) + "/head_1/test_outfinal.txt")
    mixed_outputs = get_paired_outputs("./outputs/" + str(exp_name) + "/mixed/test_outfinal.txt")
    #filepaths = get_all_filepaths("outputs/exp_3")
    #head_0_outputs = get_paired_outputs(filepaths["head_0"])
    #head_1_outputs = get_paired_outputs(filepaths["head_1"])
    #mixed_outputs = get_paired_outputs(filepaths["mixed"])
    print("gold statistic")
    print("gold overlap : {0} and gold length : {1}".format(0.44, 0.14))

    print("for head 0 ")
    compute_rouge(head_0_outputs)
    head_0_overlap_gold, head_0_overlap_gen = get_overlap_file(head_0_outputs, "./outputs/" + str(exp_name) + "/head_0_overlap.jpg", ngram = 2, graph = True)
    print("")
    
    print("for head 1")
    compute_rouge(head_1_outputs)
    head_1_overlap_gold, head_1_overlap_gen = get_overlap_file(head_1_outputs, "./outputs/" + str(exp_name) + "/head_1_overlap.jpg", ngram = 2, graph = True)
    print("")

    print("for mixed")
    compute_rouge(mixed_outputs)
    mixed_overlap_gold, mixed_overlap_gen = get_overlap_file(mixed_outputs, "./outputs/" + str(exp_name) + "/mixed_overlap.jpg", ngram = 2, graph = True)
    print("")

    print("---------------------------------- Done ----------------------------------")
    print("")
    print("")

    # print("head 0")
    # head_0_overlap_gold, head_0_overlap_gen = get_overlap_file(head_0_outputs, "./outputs/" + str(exp_name) + "/head_0_overlap.jpg", ngram = 2, graph = True)
    # print("head 1")
    # head_1_overlap_gold, head_1_overlap_gen = get_overlap_file(head_1_outputs, "./outputs/" + str(exp_name) + "/head_1_overlap.jpg", ngram = 2, graph = True)
    # print("mixed")
    # mixed_overlap_gold, mixed_overlap_gen = get_overlap_file(mixed_outputs, "./outputs/" + str(exp_name) + "/mixed_overlap.jpg", ngram = 2, graph = True)
    
    kwargs = dict(histtype='stepfilled', alpha=0.5, density=True, bins=80)

    weights = np.ones_like(head_0_overlap_gen) / float(len(head_0_overlap_gold))
    plt.hist(head_0_overlap_gen, **kwargs, label='head_0', weights=weights)

    weights = np.ones_like(head_1_overlap_gen) / float(len(head_1_overlap_gold))
    plt.hist(head_1_overlap_gen, **kwargs, label='head_1', weights=weights)

    weights = np.ones_like(mixed_overlap_gen) / float(len(mixed_overlap_gold))
    plt.hist(mixed_overlap_gen, **kwargs, label='mixed', weights=weights)



    plt.xlabel(f'{2}-gram overlap')
    plt.ylim(0, 8)
    #plt.xlim(0, 1)
    plt.legend()
    plt.grid(True)
    plt.savefig("./outputs/" + str(exp_name) + "/combined_overlap.jpg")
    plt.clf()
    return head_0_overlap_gold, head_0_overlap_gen, head_1_overlap_gold, head_1_overlap_gen, mixed_overlap_gold, mixed_overlap_gen
def do_experiments(list_of_experiments):
    for exp in list_of_experiments:
        results = generate_results(exp)



In [15]:
experiment_list = ["baseline_div_loss_0", "baseline_div_loss_0.1","baseline_div_loss_0.5", "baseline_div_loss_1"]
do_experiments(experiment_list)

experiment name : baseline_div_loss_0
gold statistic
gold overlap : 0.44 and gold length : 0.14
for head 0 


INFO:absl:Using default tokenizer.


{'rouge1': 0.15126413389825547, 'rouge2': 0.05910292822933239, 'rougeL': 0.11993000124746361, 'rougeLsum': 0.11989552981766322}
Generated overlap 2gram = 0.747915
Generated length = 86.587705

for head 1


INFO:absl:Using default tokenizer.


{'rouge1': 0.26261168309562755, 'rouge2': 0.11216058456960615, 'rougeL': 0.2263330086471663, 'rougeLsum': 0.22605314047660982}
Generated overlap 2gram = 0.759864
Generated length = 23.501639

for mixed


INFO:absl:Using default tokenizer.


{'rouge1': 0.268467777969422, 'rouge2': 0.12464014082455674, 'rougeL': 0.23418868509638588, 'rougeLsum': 0.23388101956004165}
Generated overlap 2gram = 0.815626
Generated length = 26.877869

---------------------------------- Done ----------------------------------


experiment name : baseline_div_loss_0.1
gold statistic
gold overlap : 0.44 and gold length : 0.14
for head 0 


INFO:absl:Using default tokenizer.


{'rouge1': 0.15377611931975008, 'rouge2': 0.05892924815360488, 'rougeL': 0.1209570381795109, 'rougeLsum': 0.12097531936410129}
Generated overlap 2gram = 0.679400
Generated length = 86.245082

for head 1


INFO:absl:Using default tokenizer.


{'rouge1': 0.2597816862424973, 'rouge2': 0.11220495023907201, 'rougeL': 0.22427377791050668, 'rougeLsum': 0.22401510390769352}
Generated overlap 2gram = 0.785831
Generated length = 23.904918

for mixed


INFO:absl:Using default tokenizer.


{'rouge1': 0.2704905706683032, 'rouge2': 0.12708174890356277, 'rougeL': 0.2369099391371935, 'rougeLsum': 0.23654299356307268}
Generated overlap 2gram = 0.818020
Generated length = 26.649180

---------------------------------- Done ----------------------------------


experiment name : baseline_div_loss_0.5
gold statistic
gold overlap : 0.44 and gold length : 0.14
for head 0 


INFO:absl:Using default tokenizer.


{'rouge1': 0.15956644020718347, 'rouge2': 0.06401224384454499, 'rougeL': 0.127696213654833, 'rougeLsum': 0.12785567942654838}
Generated overlap 2gram = 0.700229
Generated length = 85.800820

for head 1


INFO:absl:Using default tokenizer.


{'rouge1': 0.2668610905994591, 'rouge2': 0.11619047103506293, 'rougeL': 0.23022370739768186, 'rougeLsum': 0.23015969025006905}
Generated overlap 2gram = 0.764636
Generated length = 22.190164

for mixed


INFO:absl:Using default tokenizer.


{'rouge1': 0.26777189812869917, 'rouge2': 0.12124664186441964, 'rougeL': 0.23314744780267344, 'rougeLsum': 0.23281989992354046}
Generated overlap 2gram = 0.808447
Generated length = 25.182787

---------------------------------- Done ----------------------------------


experiment name : baseline_div_loss_1
gold statistic
gold overlap : 0.44 and gold length : 0.14
for head 0 


INFO:absl:Using default tokenizer.


{'rouge1': 0.15254441585930323, 'rouge2': 0.057279101658005024, 'rougeL': 0.12148196764243774, 'rougeLsum': 0.12156345269932062}
Generated overlap 2gram = 0.687028
Generated length = 85.757377

for head 1


INFO:absl:Using default tokenizer.


{'rouge1': 0.22660534700530272, 'rouge2': 0.09118497692762864, 'rougeL': 0.18894252085369206, 'rougeLsum': 0.18903445874874755}
Generated overlap 2gram = 0.730249
Generated length = 33.613115

for mixed


INFO:absl:Using default tokenizer.


{'rouge1': 0.2669356094154369, 'rouge2': 0.11997088112545665, 'rougeL': 0.23067081913898924, 'rougeLsum': 0.23022259806919915}
Generated overlap 2gram = 0.808062
Generated length = 27.241803

---------------------------------- Done ----------------------------------




<Figure size 432x288 with 0 Axes>

In [10]:
exp_results = generate_results(exp_name = "baseline_div_loss_0.1")

gold statistic
gold overlap : 0.44 and gold length : 0.14
for head 0 


INFO:absl:Using default tokenizer.


{'rouge1': 0.15377611931975008, 'rouge2': 0.05892924815360488, 'rougeL': 0.1209570381795109, 'rougeLsum': 0.12097531936410129}
Generated overlap 2gram = 0.679400
Generated length = 86.245082

for head 1


INFO:absl:Using default tokenizer.


{'rouge1': 0.2597816862424973, 'rouge2': 0.11220495023907201, 'rougeL': 0.22427377791050668, 'rougeLsum': 0.22401510390769352}
Generated overlap 2gram = 0.785831
Generated length = 23.904918

for mixed


INFO:absl:Using default tokenizer.


{'rouge1': 0.2704905706683032, 'rouge2': 0.12708174890356277, 'rougeL': 0.2369099391371935, 'rougeLsum': 0.23654299356307268}
Generated overlap 2gram = 0.818020
Generated length = 26.649180



<Figure size 432x288 with 0 Axes>

In [11]:
exp_results = generate_results(exp_name = "baseline_div_loss_0.5")


gold statistic
gold overlap : 0.44 and gold length : 0.14
for head 0 


INFO:absl:Using default tokenizer.


{'rouge1': 0.15956644020718347, 'rouge2': 0.06401224384454499, 'rougeL': 0.127696213654833, 'rougeLsum': 0.12785567942654838}
Generated overlap 2gram = 0.700229
Generated length = 85.800820

for head 1


INFO:absl:Using default tokenizer.


{'rouge1': 0.2668610905994591, 'rouge2': 0.11619047103506293, 'rougeL': 0.23022370739768186, 'rougeLsum': 0.23015969025006905}
Generated overlap 2gram = 0.764636
Generated length = 22.190164

for mixed


INFO:absl:Using default tokenizer.


{'rouge1': 0.26777189812869917, 'rouge2': 0.12124664186441964, 'rougeL': 0.23314744780267344, 'rougeLsum': 0.23281989992354046}
Generated overlap 2gram = 0.808447
Generated length = 25.182787



<Figure size 432x288 with 0 Axes>

In [12]:
exp_results = generate_results(exp_name = "baseline_div_loss_1")

gold statistic
gold overlap : 0.44 and gold length : 0.14
for head 0 


INFO:absl:Using default tokenizer.


{'rouge1': 0.15254441585930323, 'rouge2': 0.057279101658005024, 'rougeL': 0.12148196764243774, 'rougeLsum': 0.12156345269932062}
Generated overlap 2gram = 0.687028
Generated length = 85.757377

for head 1


INFO:absl:Using default tokenizer.


{'rouge1': 0.22660534700530272, 'rouge2': 0.09118497692762864, 'rougeL': 0.18894252085369206, 'rougeLsum': 0.18903445874874755}
Generated overlap 2gram = 0.730249
Generated length = 33.613115

for mixed


INFO:absl:Using default tokenizer.


{'rouge1': 0.2669356094154369, 'rouge2': 0.11997088112545665, 'rougeL': 0.23067081913898924, 'rougeLsum': 0.23022259806919915}
Generated overlap 2gram = 0.808062
Generated length = 27.241803



<Figure size 432x288 with 0 Axes>

{'head_0': 'outputs/baseline/head_0/test_outfinal.txt', 'head_1': 'outputs/baseline/head_1/test_outfinal.txt', 'mixed': 'outputs/baseline/mixed/test_outfinal.txt'}


In [11]:
filepaths = get_all_filepaths("outputs/baseline")
head_0_outputs = get_paired_outputs(filepaths["head_0"])
head_1_outputs = get_paired_outputs(filepaths["head_1"])
mixed_outputs = get_paired_outputs(filepaths["mixed"])
print("for head 0 ")
compute_rouge(head_0_outputs)
print("for head 1")
compute_rouge(head_1_outputs)
print("for mixed")
compute_rouge(mixed_outputs)

for head 0 


INFO:absl:Using default tokenizer.


{'rouge1': 0.27271380259059985, 'rouge2': 0.11844801885694384, 'rougeL': 0.23112620936625985, 'rougeLsum': 0.23138093685586372}
for head 1


INFO:absl:Using default tokenizer.


{'rouge1': 0.25682834746651545, 'rouge2': 0.11131523426570167, 'rougeL': 0.22266794109804502, 'rougeLsum': 0.22259763270539173}
for mixed


INFO:absl:Using default tokenizer.


{'rouge1': 0.2734564229395584, 'rouge2': 0.12687083097645957, 'rougeL': 0.23668556560625628, 'rougeLsum': 0.23661751499962919}


In [12]:
filepaths = get_all_filepaths("outputs/exp_2")
head_0_outputs = get_paired_outputs(filepaths["head_0"])
head_1_outputs = get_paired_outputs(filepaths["head_1"])
mixed_outputs = get_paired_outputs(filepaths["mixed"])
print("for head 0 ")
compute_rouge(head_0_outputs)
print("for head 1")
compute_rouge(head_1_outputs)
print("for mixed")
compute_rouge(mixed_outputs)

for head 0 


INFO:absl:Using default tokenizer.


{'rouge1': 0.16181272918470943, 'rouge2': 0.06811278007292305, 'rougeL': 0.12751564455650777, 'rougeLsum': 0.12732488762591}
for head 1


INFO:absl:Using default tokenizer.


{'rouge1': 0.16182719023254533, 'rouge2': 0.06813434061676622, 'rougeL': 0.12751429036211634, 'rougeLsum': 0.12732459097764445}
for mixed


INFO:absl:Using default tokenizer.


{'rouge1': 0.1618180191652035, 'rouge2': 0.06834181045959487, 'rougeL': 0.12745064555999921, 'rougeLsum': 0.12731014210696573}


In [13]:
filepaths = get_all_filepaths("outputs/exp_3")
head_0_outputs = get_paired_outputs(filepaths["head_0"])
head_1_outputs = get_paired_outputs(filepaths["head_1"])
mixed_outputs = get_paired_outputs(filepaths["mixed"])
print("for head 0 ")
compute_rouge(head_0_outputs)
print("for head 1")
compute_rouge(head_1_outputs)
print("for mixed")
compute_rouge(mixed_outputs)

for head 0 


INFO:absl:Using default tokenizer.


{'rouge1': 0.1608135700886158, 'rouge2': 0.06756863789155886, 'rougeL': 0.12677801594999827, 'rougeLsum': 0.12662263007161959}
for head 1


INFO:absl:Using default tokenizer.


{'rouge1': 0.16076454316681926, 'rouge2': 0.06756053138555018, 'rougeL': 0.12675721285824504, 'rougeLsum': 0.12660349056751985}
for mixed


INFO:absl:Using default tokenizer.


{'rouge1': 0.16097998909798306, 'rouge2': 0.06784285437698793, 'rougeL': 0.12677887221510348, 'rougeLsum': 0.12663278598260874}


In [14]:
filepaths = get_all_filepaths("outputs/exp_3")
head_0_outputs = get_paired_outputs(filepaths["head_0"])
head_1_outputs = get_paired_outputs(filepaths["head_1"])
mixed_outputs = get_paired_outputs(filepaths["mixed"])
print("for head 0 ")
compute_rouge(head_0_outputs)
print("for head 1")
compute_rouge(head_1_outputs)
print("for mixed")
compute_rouge(mixed_outputs)

for head 0 


INFO:absl:Using default tokenizer.


{'rouge1': 0.1608135700886158, 'rouge2': 0.06756863789155886, 'rougeL': 0.12677801594999827, 'rougeLsum': 0.12662263007161959}
for head 1


INFO:absl:Using default tokenizer.


{'rouge1': 0.16076454316681926, 'rouge2': 0.06756053138555018, 'rougeL': 0.12675721285824504, 'rougeLsum': 0.12660349056751985}
for mixed


INFO:absl:Using default tokenizer.


{'rouge1': 0.16097998909798306, 'rouge2': 0.06784285437698793, 'rougeL': 0.12677887221510348, 'rougeLsum': 0.12663278598260874}
