In [57]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [58]:
import sys
import os

In [59]:
import re
import inspect

In [60]:
sys.path.append("../scripts/")

In [61]:
import json

In [62]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [63]:
from transformers import Seq2SeqTrainer

In [64]:
from transformers.models.led.modeling_led import LEDModel

In [65]:
from tqdm import tqdm
import torch
from datasets import load_dataset, load_metric
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from dataset import CitationTextGenerationDataset, CitationParagraphGenerationDataset
from torch.utils.data import Dataset, DataLoader

In [66]:
from matplotlib import pyplot as plt
import seaborn as sns
plt.rcParams["figure.figsize"] = (10, 8)
sns.set()



In [67]:
device = "cuda"

In [68]:
max_input_length = 16384
max_output_length = 1024

In [69]:
def process_data_to_model_inputs(batch, special_tokens=['[Dominant]', '[Reference]']):
    # tokenize the inputs and labels
    
    additional_special_tokens_lookup = {token: idx for token, idx in zip(tokenizer.additional_special_tokens, tokenizer.additional_special_tokens_ids)}
    special_token_ids = set([additional_special_tokens_lookup[token] for token in special_tokens])
    special_token_ids.add(tokenizer.mask_token_id)
    
    inputs = tokenizer(
        batch["source"],
        padding="max_length",
        truncation=True,
        max_length=max_input_length,
        add_special_tokens=True 
    )
    outputs = tokenizer(
        batch["target"],
        padding="max_length",
        truncation=True,
        max_length=max_output_length,
        add_special_tokens=True 
    )

    batch["input_ids"] = inputs.input_ids
    batch["attention_mask"] = inputs.attention_mask

    # create 0 global_attention_mask lists
    batch["global_attention_mask"] = len(batch["input_ids"]) * [
        [0 for _ in range(len(batch["input_ids"][0]))]
    ]

    # since above lists are references, the following line changes the 0 index for all samples
    for i_batch in range(len(batch["input_ids"])):
        for i_token in range(len(batch["input_ids"][0])):
            if batch["input_ids"][i_batch][i_token] in special_token_ids:
                batch["global_attention_mask"][i_batch][i_token] = 1
            
    batch["labels"] = outputs.input_ids

    # We have to make sure that the PAD token is ignored
    batch["labels"] = [
        [-100 if token == tokenizer.pad_token_id else token for token in labels]
        for labels in batch["labels"]
    ]
    return batch

In [15]:
path = "/home/bxm200000/models/led_paragraph_generation_v3/checkpoint-29000/"

In [47]:
tokenizer = AutoTokenizer.from_pretrained(path)
special_tokens = ['<doc>','</doc>', '[BOS]', '[Dominant]', '[Reference]', '[B_Dominant]',  '[E_Dominant]', '[B_Reference]', '[E_Reference]', '<context>', '</context>']
additional_special_tokens = {'additional_special_tokens': special_tokens}
tokenizer.add_special_tokens(additional_special_tokens)

0

In [27]:
torch.cuda.empty_cache()

In [48]:
model = AutoModelForSeq2SeqLM.from_pretrained(
    path
)

In [49]:
model = model.to(device).half()
model.eval()

LEDForConditionalGeneration(
  (led): LEDModel(
    (shared): Embedding(50276, 768, padding_idx=1)
    (encoder): LEDEncoder(
      (embed_tokens): Embedding(50276, 768, padding_idx=1)
      (embed_positions): LEDLearnedPositionalEmbedding(16384, 768, padding_idx=1)
      (layers): ModuleList(
        (0): LEDEncoderLayer(
          (self_attn): LEDEncoderAttention(
            (longformer_self_attn): LEDEncoderSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (query_global): Linear(in_features=768, out_features=768, bias=True)
              (key_global): Linear(in_features=768, out_features=768, bias=True)
              (value_global): Linear(in_features=768, out_features=768, bias=True)
            )
            (output): Linear(in_features=768, out_features=768, bias=True)
        

In [None]:
val_set = CitationParagraphGenerationDataset(
    "/home/data/XiangciLi/CORWA/annotated_test/", 
    tokenizer, 
    MAX_SENT_LEN = max_input_length,
    related_work_path='/home/data/XiangciLi/20200705v1/acl/selected_related_work.jsonl',
    cited_metadata_path='/home/data/XiangciLi/20200705v1/acl/selected_cited_metadata.jsonl',
    cited_paper_path="/home/data/XiangciLi/20200705v1/acl/selected_cited_pdf_parses.jsonl",
    citing_paper_path="/home/data/XiangciLi/20200705v1/acl/selected_pdf_parses.jsonl",
    skip_no_citations = True,
    context_sep_flag=True
)

 18%|█▊        | 64/362 [00:03<00:11, 26.74it/s]

In [70]:
def run_model(batch, model):
    processed_batch = process_data_to_model_inputs(batch, special_tokens=['[Dominant]', '[Reference]'])
    processed_batch_cuda = {}
    for key in ["input_ids", "attention_mask", "global_attention_mask", "labels"]:
        processed_batch_cuda[key] = torch.tensor(processed_batch[key]).to(device)
    predicted_abstract_ids = model.generate(
        processed_batch_cuda["input_ids"], 
        attention_mask=processed_batch_cuda["attention_mask"], 
        global_attention_mask=processed_batch_cuda["global_attention_mask"]
    )
    out = tokenizer.batch_decode(predicted_abstract_ids, skip_special_tokens=False)
    target = batch["target"]
    return out, target

In [None]:
# for batch in tqdm(DataLoader(val_set, batch_size = 1, shuffle=False)):
#     if '[Dominant]' in batch["source"][0]:
#         out, target = run_model(batch, model)
#         print("Dominant: ", out)
#         batch["source"][0] = batch["source"][0].replace("[Dominant]", "[Reference]")
#         out, target = run_model(batch, model)
#         print("Reference: ", out)
#         print("Dominant label: ", target)
#         print()
#     elif '[Reference]' in batch["source"][0]:
#         out, target = run_model(batch, model)
#         print("Reference: ", out)
#         batch["source"][0] = batch["source"][0].replace("[Reference]", "[Dominant]")
#         out, target = run_model(batch, model)
#         print("Dominant: ", out)
#         print("Reference label: ", target)
#         print()

In [71]:
def get_citations(src):
    """Get citations given source content"""
    all_citations = []
    for cite_data in src.split("[B_Reference]")[1:]:

        all_citations.append(cite_data.split("</s>")[0].strip())

    for cite_data in src.split("[B_Dominant]")[1:]:

        all_citations.append(cite_data.split("</s>")[0].strip())
    
    return all_citations

In [None]:
accumulated_data = []

#reference_predicted = []
#reference_reference = []
#dominant_predicted = []
#dominant_reference = []
for batch in tqdm(DataLoader(val_set, batch_size = 1, shuffle=False)):
    processed_batch = process_data_to_model_inputs(batch, special_tokens=['[Dominant]', '[Reference]'])
    processed_batch_cuda = {}
    for key in ["input_ids", "attention_mask", "global_attention_mask", "labels"]:
        processed_batch_cuda[key] = torch.tensor(processed_batch[key]).to(device)
    predicted_abstract_ids = model.generate(
        processed_batch_cuda["input_ids"], 
        attention_mask=processed_batch_cuda["attention_mask"], 
        global_attention_mask=processed_batch_cuda["global_attention_mask"]
    )
    out = tokenizer.batch_decode(predicted_abstract_ids, skip_special_tokens=False)
#     print(out)
    target = batch["target"]
    for o, t, part_id, source in zip(out, target, batch["id"], batch["source"]):
        accumulated_data.append(
            {"source": source, "target": t, 
             "generated": o, "part_id": part_id}
        )

In [None]:
# len(accumulated_data)

In [25]:
reference_predicted = []
reference_reference = []
dominant_predicted = []
dominant_reference = []
accumulated_data = []
i = 0
# with open(os.path.join(path, "sample_output.txt"),"w") as f:
for batch in tqdm(DataLoader(val_set, batch_size = 6, shuffle=False)):
    processed_batch = process_data_to_model_inputs(batch, special_tokens=['[Dominant]', '[Reference]'])
    processed_batch_cuda = {}
    for key in ["input_ids", "attention_mask", "global_attention_mask", "labels"]:
        processed_batch_cuda[key] = torch.tensor(processed_batch[key]).to(device)
    predicted_abstract_ids = model.generate(
        processed_batch_cuda["input_ids"], 
        attention_mask=processed_batch_cuda["attention_mask"], 
        global_attention_mask=processed_batch_cuda["global_attention_mask"]
    )
    out = tokenizer.batch_decode(predicted_abstract_ids, skip_special_tokens=True)
    target = batch["target"]
    for o, t, part_id, source in zip(out, target, batch["id"], batch["source"]):
        accumulated_data.append(
            {"source": source, "target": t, 
             "generated": o, "part_id": part_id}
        )    
    cleaned_out = []
    cleaned_target = []
    for pred, label, source in zip(out, target, batch["source"]):
       for c in get_citations(source):
           pred = pred.replace(c,"")
           label = label.replace(c,"")
       if "[Dominant]" in source:
           dominant_predicted.append(pred)
           dominant_reference.append(label)
       elif "[Reference]" in source:
           reference_predicted.append(pred)
           reference_reference.append(label)
        
    i += 1
    if i % 20 == 0:
        print(rouge.compute(
            predictions=dominant_predicted, 
            references=dominant_reference, 
            rouge_types=["rouge1","rouge2","rougeL"]
        ))
        
        print(rouge.compute(
            predictions=reference_predicted, 
            references=reference_reference, 
            rouge_types=["rouge1","rouge2","rougeL"]
        ))

  4%|▍         | 20/498 [05:43<3:04:08, 23.11s/it]

{'rouge1': AggregateScore(low=Score(precision=0.2620824714924607, recall=0.17616972818852342, fmeasure=0.1985584457245057), mid=Score(precision=0.3212742973526913, recall=0.21263331650133646, fmeasure=0.2399728189214046), high=Score(precision=0.38685212733972396, recall=0.24934519988700207, fmeasure=0.28197728111010084)), 'rouge2': AggregateScore(low=Score(precision=0.09040883367522495, recall=0.05185882077957665, fmeasure=0.06319686566070278), mid=Score(precision=0.13303445061211108, recall=0.07718802087419462, fmeasure=0.09225461959085002), high=Score(precision=0.18755884254365476, recall=0.10826859441603635, fmeasure=0.12762758099771973)), 'rougeL': AggregateScore(low=Score(precision=0.2245291910134432, recall=0.14947390516012257, fmeasure=0.1696338451263147), mid=Score(precision=0.27234959590567065, recall=0.1800089990693695, fmeasure=0.20292593659081282), high=Score(precision=0.3309965572756981, recall=0.21385803550530336, fmeasure=0.24264302517790187))}
{'rouge1': AggregateScore(

  8%|▊         | 40/498 [13:09<2:32:10, 19.94s/it]

{'rouge1': AggregateScore(low=Score(precision=0.267369985331449, recall=0.19214676365740183, fmeasure=0.21189592352827513), mid=Score(precision=0.30109561075470914, recall=0.21322391057897044, fmeasure=0.23598497870154928), high=Score(precision=0.3351888613101005, recall=0.23715076068675678, fmeasure=0.260529217057712)), 'rouge2': AggregateScore(low=Score(precision=0.07925163353353526, recall=0.051474993638085144, fmeasure=0.059566012304695555), mid=Score(precision=0.10210793665058815, recall=0.06626541243069929, fmeasure=0.0762775466155323), high=Score(precision=0.13008556064091073, recall=0.08278740102417265, fmeasure=0.09427889258584592)), 'rougeL': AggregateScore(low=Score(precision=0.224644453456219, recall=0.16081477742354958, fmeasure=0.17871463171064172), mid=Score(precision=0.25129364655335995, recall=0.18076419177975223, fmeasure=0.1984281750623746), high=Score(precision=0.28068767052919097, recall=0.20054455811677657, fmeasure=0.2182232696621422))}
{'rouge1': AggregateScore(

 12%|█▏        | 60/498 [21:23<3:09:43, 25.99s/it]

{'rouge1': AggregateScore(low=Score(precision=0.2556206205841671, recall=0.19670865595953066, fmeasure=0.20987778166009252), mid=Score(precision=0.2811102993595077, recall=0.21414015670362113, fmeasure=0.2280157050625966), high=Score(precision=0.30633290284782666, recall=0.23443554483889856, fmeasure=0.2480001369701635)), 'rouge2': AggregateScore(low=Score(precision=0.07121970804888933, recall=0.04877486697492516, fmeasure=0.05497350776173324), mid=Score(precision=0.08877014690916833, recall=0.06085718346624055, fmeasure=0.06829259895330142), high=Score(precision=0.1077853704209946, recall=0.07399589825843945, fmeasure=0.08270770497880221)), 'rougeL': AggregateScore(low=Score(precision=0.21552755509173555, recall=0.16634175517461489, fmeasure=0.17629928691965324), mid=Score(precision=0.23626192490872927, recall=0.18153475158721746, fmeasure=0.19292141781886654), high=Score(precision=0.25770252864769727, recall=0.19759464736118032, fmeasure=0.20778255428292702))}
{'rouge1': AggregateSco

 16%|█▌        | 80/498 [28:26<2:35:34, 22.33s/it]

{'rouge1': AggregateScore(low=Score(precision=0.26699421482993385, recall=0.1981169090364383, fmeasure=0.21350148287843757), mid=Score(precision=0.2899590529719958, recall=0.21413533099804744, fmeasure=0.23092776347447347), high=Score(precision=0.313821569253569, recall=0.23194527101446513, fmeasure=0.2483552282481004)), 'rouge2': AggregateScore(low=Score(precision=0.07733708655809239, recall=0.050952316782600024, fmeasure=0.05797608481967203), mid=Score(precision=0.09395912364963638, recall=0.06266572635927198, fmeasure=0.07031609762446978), high=Score(precision=0.11190555756264915, recall=0.07451590129978294, fmeasure=0.0833145728079423)), 'rougeL': AggregateScore(low=Score(precision=0.224194186335918, recall=0.16596806020130433, fmeasure=0.17968451805827604), mid=Score(precision=0.24428184519015206, recall=0.1810836550615043, fmeasure=0.194558929795681), high=Score(precision=0.2648551511708475, recall=0.19567357474083277, fmeasure=0.20871382174165226))}
{'rouge1': AggregateScore(low

 20%|█▉        | 99/498 [34:57<1:58:44, 17.86s/it]

{'rouge1': AggregateScore(low=Score(precision=0.27359749344556916, recall=0.20124738935908468, fmeasure=0.2178284333969935), mid=Score(precision=0.29353229117807145, recall=0.2171209384032566, fmeasure=0.23402619316456055), high=Score(precision=0.3152509539421709, recall=0.23422970581255706, fmeasure=0.250472127647031)), 'rouge2': AggregateScore(low=Score(precision=0.08100259165837788, recall=0.05637138899939226, fmeasure=0.062146026165118995), mid=Score(precision=0.09623459866452036, recall=0.06765274259480347, fmeasure=0.07432712542977073), high=Score(precision=0.1115651977692859, recall=0.07898255251527515, fmeasure=0.08640366042386258)), 'rougeL': AggregateScore(low=Score(precision=0.22736842482475195, recall=0.16946483402995627, fmeasure=0.18321451525831217), mid=Score(precision=0.2455838438338378, recall=0.18247558053224822, fmeasure=0.1961270352206464), high=Score(precision=0.26392521405830044, recall=0.19762061004994994, fmeasure=0.21037573348411767))}


 20%|██        | 100/498 [35:17<2:01:31, 18.32s/it]

{'rouge1': AggregateScore(low=Score(precision=0.23049963086591735, recall=0.21933784896206754, fmeasure=0.20229182423511732), mid=Score(precision=0.26662760269246233, recall=0.253679909692881, fmeasure=0.23650007296190215), high=Score(precision=0.30249421095435236, recall=0.2907509556269443, fmeasure=0.27075952657984426)), 'rouge2': AggregateScore(low=Score(precision=0.06796581923856591, recall=0.06342521375786786, fmeasure=0.05897989944354685), mid=Score(precision=0.09081932412526009, recall=0.08689184732985444, fmeasure=0.08159836129881903), high=Score(precision=0.11609232746894348, recall=0.11240347403983571, fmeasure=0.10456008027161988)), 'rougeL': AggregateScore(low=Score(precision=0.22131562659506018, recall=0.21085431837457533, fmeasure=0.19520342973885138), mid=Score(precision=0.25826719878174875, recall=0.2469593074551753, fmeasure=0.23072126218890565), high=Score(precision=0.2961130271205235, recall=0.2837260695007361, fmeasure=0.264583325270732))}


 24%|██▍       | 119/498 [42:16<2:10:00, 20.58s/it]

{'rouge1': AggregateScore(low=Score(precision=0.2734446343233108, recall=0.20189564874933646, fmeasure=0.21782295786325045), mid=Score(precision=0.29310519102903543, recall=0.21729726016677187, fmeasure=0.2326756348046548), high=Score(precision=0.31361499641546886, recall=0.23268192372061525, fmeasure=0.24812102494289306)), 'rouge2': AggregateScore(low=Score(precision=0.08553132485568393, recall=0.06009488108038166, fmeasure=0.06585749705770301), mid=Score(precision=0.09897495758054989, recall=0.07034917473853486, fmeasure=0.07638927033596052), high=Score(precision=0.11548203631198334, recall=0.0809757462699537, fmeasure=0.08774106465050235)), 'rougeL': AggregateScore(low=Score(precision=0.2284904712640852, recall=0.1710532018012355, fmeasure=0.18307680292844256), mid=Score(precision=0.2461576132728419, recall=0.18368924793823455, fmeasure=0.19611789747882652), high=Score(precision=0.26397342827968356, recall=0.19840504624741997, fmeasure=0.20986161188743574))}


 24%|██▍       | 120/498 [42:37<2:09:47, 20.60s/it]

{'rouge1': AggregateScore(low=Score(precision=0.23752234470978648, recall=0.22259691461131395, fmeasure=0.20768138906484931), mid=Score(precision=0.26884721132389566, recall=0.2533370020945258, fmeasure=0.23741330481398065), high=Score(precision=0.30112234760797857, recall=0.2856956384288537, fmeasure=0.26632011362579555)), 'rouge2': AggregateScore(low=Score(precision=0.07378315260956202, recall=0.06970990101656788, fmeasure=0.06478613744552315), mid=Score(precision=0.09649562332138173, recall=0.0919873707429508, fmeasure=0.08508812279184844), high=Score(precision=0.12109289691161806, recall=0.1160666694920111, fmeasure=0.10699879245947165)), 'rougeL': AggregateScore(low=Score(precision=0.22850725545675726, recall=0.21578877012921818, fmeasure=0.20270231775427916), mid=Score(precision=0.2603537701220499, recall=0.24732472380193998, fmeasure=0.231052484627518), high=Score(precision=0.2910370933230073, recall=0.27954096867551653, fmeasure=0.25996856640076643))}


 28%|██▊       | 139/498 [49:48<1:53:32, 18.98s/it]

{'rouge1': AggregateScore(low=Score(precision=0.27025352075523706, recall=0.20200411070647023, fmeasure=0.21641305197173874), mid=Score(precision=0.2892233991582924, recall=0.21608623382209252, fmeasure=0.22979447401969538), high=Score(precision=0.3094813837749876, recall=0.2320411200077711, fmeasure=0.2444819847420506)), 'rouge2': AggregateScore(low=Score(precision=0.08332967817047271, recall=0.059874721539991876, fmeasure=0.0646144664712299), mid=Score(precision=0.09649298543864951, recall=0.0699100591088985, fmeasure=0.07477086397416684), high=Score(precision=0.11200318471471483, recall=0.08095624095970481, fmeasure=0.08686057672521254)), 'rougeL': AggregateScore(low=Score(precision=0.22755803081138645, recall=0.170940543128077, fmeasure=0.18243620187764595), mid=Score(precision=0.24345528296530108, recall=0.18356356178673564, fmeasure=0.19453470121956246), high=Score(precision=0.2615430545641356, recall=0.19800240807333833, fmeasure=0.20813081910395428))}


 28%|██▊       | 140/498 [50:06<1:52:36, 18.87s/it]

{'rouge1': AggregateScore(low=Score(precision=0.23674693382497664, recall=0.22904376420260886, fmeasure=0.21213950504167203), mid=Score(precision=0.2647895016094296, recall=0.25863720885168295, fmeasure=0.23738367078317724), high=Score(precision=0.2937690743678565, recall=0.2895736889505559, fmeasure=0.265786283798327)), 'rouge2': AggregateScore(low=Score(precision=0.08026058369668511, recall=0.0796865715792393, fmeasure=0.07251506058403058), mid=Score(precision=0.10211328397010733, recall=0.1008211910834316, fmeasure=0.09187230995466429), high=Score(precision=0.12456821116569866, recall=0.12272148383430415, fmeasure=0.1124085789789561)), 'rougeL': AggregateScore(low=Score(precision=0.22787810877601056, recall=0.22446882797620463, fmeasure=0.20671914546444253), mid=Score(precision=0.2574335412779462, recall=0.2520750735780054, fmeasure=0.23210008845381808), high=Score(precision=0.28479446372357614, recall=0.2812977534774913, fmeasure=0.257573735860109))}


 32%|███▏      | 159/498 [57:22<2:26:55, 26.00s/it]

{'rouge1': AggregateScore(low=Score(precision=0.27043317180092397, recall=0.20260965317181337, fmeasure=0.21782703683371107), mid=Score(precision=0.2889748325409206, recall=0.21641923713285677, fmeasure=0.23039131636443372), high=Score(precision=0.30795839908884615, recall=0.2312500146605569, fmeasure=0.24497127800963417)), 'rouge2': AggregateScore(low=Score(precision=0.08389365613447844, recall=0.06045338540315964, fmeasure=0.06513549870649027), mid=Score(precision=0.09639870144402636, recall=0.06959557213059586, fmeasure=0.07496988300714297), high=Score(precision=0.110954041879183, recall=0.07968486549486792, fmeasure=0.08517273391559911)), 'rougeL': AggregateScore(low=Score(precision=0.22816267734515372, recall=0.17100887426307568, fmeasure=0.1833148166406965), mid=Score(precision=0.24288454104724788, recall=0.1828292232795417, fmeasure=0.19431547241330172), high=Score(precision=0.25894155333484853, recall=0.19621675967504534, fmeasure=0.20754784817625266))}


 32%|███▏      | 160/498 [57:39<2:09:59, 23.08s/it]

{'rouge1': AggregateScore(low=Score(precision=0.2482237674697862, recall=0.2408819165796112, fmeasure=0.22212556270108155), mid=Score(precision=0.2738554008110189, recall=0.265837708416237, fmeasure=0.24454083526896803), high=Score(precision=0.3019897691658554, recall=0.2940191531836425, fmeasure=0.27049568968907284)), 'rouge2': AggregateScore(low=Score(precision=0.08084732476749197, recall=0.08241293658306754, fmeasure=0.07273996835111798), mid=Score(precision=0.1009028484668574, recall=0.10103790138970001, fmeasure=0.09026050977348535), high=Score(precision=0.12393292276298846, recall=0.12318519410330303, fmeasure=0.11092785073366207)), 'rougeL': AggregateScore(low=Score(precision=0.23902712833101006, recall=0.23534656803639337, fmeasure=0.21555549942499203), mid=Score(precision=0.2653238941361997, recall=0.2612929467762848, fmeasure=0.23895477825083306), high=Score(precision=0.2931331865478483, recall=0.28803066750963174, fmeasure=0.2645081137995941))}


 36%|███▌      | 179/498 [1:05:05<2:18:58, 26.14s/it]

{'rouge1': AggregateScore(low=Score(precision=0.27192739208918065, recall=0.20277410326682097, fmeasure=0.21726652841060232), mid=Score(precision=0.2880125195674939, recall=0.21623060602027833, fmeasure=0.22948058728074155), high=Score(precision=0.30469773912042614, recall=0.22918655005807012, fmeasure=0.24259758132159523)), 'rouge2': AggregateScore(low=Score(precision=0.08130909948699848, recall=0.05818567284191436, fmeasure=0.06303077643665947), mid=Score(precision=0.09349892759978581, recall=0.06738587759629092, fmeasure=0.07220764949193897), high=Score(precision=0.10641937470871013, recall=0.0765182850018906, fmeasure=0.08151403490088967)), 'rougeL': AggregateScore(low=Score(precision=0.22486939097623485, recall=0.16996659076979995, fmeasure=0.18109047889013868), mid=Score(precision=0.23971108293946708, recall=0.1809489922703842, fmeasure=0.19150687246721007), high=Score(precision=0.25475392367776, recall=0.19273687667720532, fmeasure=0.20322692870197967))}


 36%|███▌      | 180/498 [1:05:42<2:34:49, 29.21s/it]

{'rouge1': AggregateScore(low=Score(precision=0.25002080394847465, recall=0.24683421839982264, fmeasure=0.2244533541442863), mid=Score(precision=0.2763662277029265, recall=0.26969427130816825, fmeasure=0.24731643046434076), high=Score(precision=0.30402879584355147, recall=0.29779194285864946, fmeasure=0.27278570397461194)), 'rouge2': AggregateScore(low=Score(precision=0.08268517304934958, recall=0.08112846396266914, fmeasure=0.07251950214166779), mid=Score(precision=0.1031279919061075, recall=0.10095717639849155, fmeasure=0.09097130262870404), high=Score(precision=0.12453793644008476, recall=0.12206319862461015, fmeasure=0.11059988358666104)), 'rougeL': AggregateScore(low=Score(precision=0.24401999301307684, recall=0.2392417993876544, fmeasure=0.21909055558526686), mid=Score(precision=0.2688914873784816, recall=0.26440803285180225, fmeasure=0.24159541420424396), high=Score(precision=0.2942983598253257, recall=0.2891659429578294, fmeasure=0.26516018432948224))}


 40%|███▉      | 199/498 [1:12:28<1:49:18, 21.93s/it]

{'rouge1': AggregateScore(low=Score(precision=0.2743845159315814, recall=0.2065768739249585, fmeasure=0.22015314288006432), mid=Score(precision=0.29042275969063147, recall=0.2187506932216807, fmeasure=0.2319082706147636), high=Score(precision=0.30755718143474275, recall=0.23164044418229982, fmeasure=0.24484148555712285)), 'rouge2': AggregateScore(low=Score(precision=0.08499314163735772, recall=0.0615118321443629, fmeasure=0.06637759886543806), mid=Score(precision=0.09742888943938902, recall=0.07032010603003286, fmeasure=0.07564216189048018), high=Score(precision=0.11086140282768575, recall=0.08032479612788035, fmeasure=0.08590550479519533)), 'rougeL': AggregateScore(low=Score(precision=0.22770689068819505, recall=0.17221446887441477, fmeasure=0.18318687191156494), mid=Score(precision=0.24246929482660773, recall=0.18350282557335545, fmeasure=0.1940078634918443), high=Score(precision=0.2579911200293715, recall=0.19550345326139715, fmeasure=0.2059174573859215))}


 40%|████      | 200/498 [1:13:04<2:09:16, 26.03s/it]

{'rouge1': AggregateScore(low=Score(precision=0.25302866635616666, recall=0.2547590859741988, fmeasure=0.231003430749862), mid=Score(precision=0.2794338338557778, recall=0.2800774958644575, fmeasure=0.2539656458507292), high=Score(precision=0.3055505348896384, recall=0.3058491383397928, fmeasure=0.2767612472994492)), 'rouge2': AggregateScore(low=Score(precision=0.09380388103595362, recall=0.09743657163456483, fmeasure=0.08587132585861851), mid=Score(precision=0.11111395020566069, recall=0.11516331231484228, fmeasure=0.1016577513452929), high=Score(precision=0.13230077660093478, recall=0.13625372549414216, fmeasure=0.12027971860913507)), 'rougeL': AggregateScore(low=Score(precision=0.24627672856503816, recall=0.2506220401775832, fmeasure=0.22550468019648093), mid=Score(precision=0.27223517973865763, recall=0.2746256784863914, fmeasure=0.2484169923482058), high=Score(precision=0.29495232528196835, recall=0.29762795301566997, fmeasure=0.26888186077147985))}


 44%|████▍     | 219/498 [1:20:06<1:27:56, 18.91s/it]

{'rouge1': AggregateScore(low=Score(precision=0.2734227915101112, recall=0.20545470052190407, fmeasure=0.21810971473665755), mid=Score(precision=0.2893495643154497, recall=0.2175669671491084, fmeasure=0.23056711881583403), high=Score(precision=0.30505621700340296, recall=0.23003832517970413, fmeasure=0.2420519049750127)), 'rouge2': AggregateScore(low=Score(precision=0.08488687284068837, recall=0.06137906970171557, fmeasure=0.0661756873810927), mid=Score(precision=0.09603253900763065, recall=0.06970196449405543, fmeasure=0.07460889630110276), high=Score(precision=0.10956705676317163, recall=0.07915088028126087, fmeasure=0.08466599437923482)), 'rougeL': AggregateScore(low=Score(precision=0.2276349746370628, recall=0.1722069687044518, fmeasure=0.18267466965733062), mid=Score(precision=0.24091282667700076, recall=0.18275501745385886, fmeasure=0.19276834871103798), high=Score(precision=0.2550194617741483, recall=0.19437655847587804, fmeasure=0.20385827297968495))}


 44%|████▍     | 220/498 [1:20:34<1:39:36, 21.50s/it]

{'rouge1': AggregateScore(low=Score(precision=0.2488195965948396, recall=0.24908323044814074, fmeasure=0.2269588803459902), mid=Score(precision=0.2729121151439259, recall=0.27343731897804585, fmeasure=0.24812340195177335), high=Score(precision=0.2974456716515307, recall=0.2971892111734413, fmeasure=0.2707400778649129)), 'rouge2': AggregateScore(low=Score(precision=0.09254358840713955, recall=0.09464302037616458, fmeasure=0.0836650045415147), mid=Score(precision=0.1106765719256447, recall=0.1128763395166994, fmeasure=0.10033544913238836), high=Score(precision=0.1296941289586995, recall=0.13167071008453646, fmeasure=0.11720785255041653)), 'rougeL': AggregateScore(low=Score(precision=0.24296047720794392, recall=0.24684313952793094, fmeasure=0.2213500830965903), mid=Score(precision=0.26640018719584296, recall=0.26837594194487113, fmeasure=0.24352127817070512), high=Score(precision=0.2896850699403985, recall=0.28945154966709025, fmeasure=0.26357716075716997))}


 48%|████▊     | 239/498 [1:27:49<1:33:06, 21.57s/it]

{'rouge1': AggregateScore(low=Score(precision=0.2780432082429384, recall=0.20687933787877252, fmeasure=0.2206691851377201), mid=Score(precision=0.2931010590326528, recall=0.2182600258099635, fmeasure=0.23233606226862635), high=Score(precision=0.3084753697769739, recall=0.22914450778192108, fmeasure=0.24347930214572042)), 'rouge2': AggregateScore(low=Score(precision=0.08676851649282107, recall=0.06167789468924968, fmeasure=0.06682663086223556), mid=Score(precision=0.09823689589846243, recall=0.0691266588043237, fmeasure=0.07523318755889309), high=Score(precision=0.11003423180279555, recall=0.0778482217029422, fmeasure=0.08453426290568111)), 'rougeL': AggregateScore(low=Score(precision=0.23082180635609126, recall=0.17336619073961748, fmeasure=0.18468382558839064), mid=Score(precision=0.24418619543121586, recall=0.1827594093643227, fmeasure=0.1942355713502042), high=Score(precision=0.2566081198130312, recall=0.19361610723335881, fmeasure=0.20432142222315638))}


 48%|████▊     | 240/498 [1:28:35<2:04:26, 28.94s/it]

{'rouge1': AggregateScore(low=Score(precision=0.25057251601035657, recall=0.251851336077815, fmeasure=0.22977977467624064), mid=Score(precision=0.2730503120119899, recall=0.27513366426928276, fmeasure=0.2497113670597282), high=Score(precision=0.2958887854573853, recall=0.2972111841052782, fmeasure=0.27008963087549975)), 'rouge2': AggregateScore(low=Score(precision=0.09395918395873007, recall=0.09583948569051036, fmeasure=0.08474419362736985), mid=Score(precision=0.11008109727827939, recall=0.1132726253336915, fmeasure=0.1002947410483246), high=Score(precision=0.12905971064462848, recall=0.1329838145478962, fmeasure=0.11787522949900055)), 'rougeL': AggregateScore(low=Score(precision=0.2442528361091353, recall=0.2477893844883114, fmeasure=0.22305478379793317), mid=Score(precision=0.2669731813536952, recall=0.270457155572678, fmeasure=0.24560653232117702), high=Score(precision=0.28827571268156865, recall=0.291496403694926, fmeasure=0.2647767931191354))}


 52%|█████▏    | 259/498 [1:35:03<1:08:03, 17.08s/it]

{'rouge1': AggregateScore(low=Score(precision=0.27317839741897915, recall=0.20619157664827414, fmeasure=0.21845979869537585), mid=Score(precision=0.28722539849581996, recall=0.21690325404884864, fmeasure=0.2289663721790073), high=Score(precision=0.3019137934765493, recall=0.22751093857347068, fmeasure=0.2397629965203607)), 'rouge2': AggregateScore(low=Score(precision=0.08492126501958895, recall=0.06072625494853328, fmeasure=0.06542135819668278), mid=Score(precision=0.0953650585740319, recall=0.06743267306448236, fmeasure=0.07307707688787957), high=Score(precision=0.10724641398254421, recall=0.07525797170680355, fmeasure=0.08165894069894498)), 'rougeL': AggregateScore(low=Score(precision=0.22758490225303998, recall=0.17234175191340637, fmeasure=0.18265602819587698), mid=Score(precision=0.23978231766604424, recall=0.18196353396320167, fmeasure=0.19165501173852714), high=Score(precision=0.25252443221821924, recall=0.19248364803982146, fmeasure=0.20143815963839432))}


 52%|█████▏    | 260/498 [1:35:25<1:14:35, 18.80s/it]

{'rouge1': AggregateScore(low=Score(precision=0.26390108446853505, recall=0.2724137536555867, fmeasure=0.24532643616664535), mid=Score(precision=0.2862941589747829, recall=0.29377954817325025, fmeasure=0.26572782591933963), high=Score(precision=0.3089839902442613, recall=0.3168500979318071, fmeasure=0.2859134976096658)), 'rouge2': AggregateScore(low=Score(precision=0.10044404541195479, recall=0.1091252112814034, fmeasure=0.09368241873967376), mid=Score(precision=0.11794396136585217, recall=0.12665140634982408, fmeasure=0.10980605857296494), high=Score(precision=0.13571529706067656, recall=0.1464774168408698, fmeasure=0.1262964672659076)), 'rougeL': AggregateScore(low=Score(precision=0.25884263625873066, recall=0.2683409302724056, fmeasure=0.2423858912998409), mid=Score(precision=0.28031274841088805, recall=0.28937081464841563, fmeasure=0.26115336905532793), high=Score(precision=0.30222011336895477, recall=0.3108405525228439, fmeasure=0.2815324981279416))}


 56%|█████▌    | 279/498 [1:43:21<1:33:06, 25.51s/it]

{'rouge1': AggregateScore(low=Score(precision=0.2779472510140245, recall=0.20927544337588772, fmeasure=0.2217027976998672), mid=Score(precision=0.29071197990794156, recall=0.21906794009383795, fmeasure=0.23142349694088254), high=Score(precision=0.30500017686509795, recall=0.22980505167949222, fmeasure=0.2424120177877598)), 'rouge2': AggregateScore(low=Score(precision=0.08570498248331535, recall=0.06153837927102687, fmeasure=0.06685334889869593), mid=Score(precision=0.09680009808346735, recall=0.06896606048065539, fmeasure=0.07445159312890264), high=Score(precision=0.10794820095612831, recall=0.07687983874865474, fmeasure=0.08307608996612166)), 'rougeL': AggregateScore(low=Score(precision=0.22870143300839071, recall=0.174538024794529, fmeasure=0.1839390683310854), mid=Score(precision=0.24164784786710958, recall=0.1836280802233537, fmeasure=0.19320315322662998), high=Score(precision=0.2550001092387962, recall=0.19338539963551987, fmeasure=0.20264349632148376))}


 56%|█████▌    | 280/498 [1:43:38<1:23:47, 23.06s/it]

{'rouge1': AggregateScore(low=Score(precision=0.26852996143078495, recall=0.2771216409817299, fmeasure=0.24992743335672177), mid=Score(precision=0.290790004656303, recall=0.29876366468406707, fmeasure=0.2698821014720738), high=Score(precision=0.31084167217188413, recall=0.320665393805704, fmeasure=0.2896698829062898)), 'rouge2': AggregateScore(low=Score(precision=0.10626912030968495, recall=0.11489845352570578, fmeasure=0.09984029995806323), mid=Score(precision=0.12326618969363926, recall=0.13263822196491365, fmeasure=0.11512300390410855), high=Score(precision=0.13990231598417047, recall=0.1509780293163927, fmeasure=0.13101986221354522)), 'rougeL': AggregateScore(low=Score(precision=0.2641949530496415, recall=0.27331245625831724, fmeasure=0.24675336895045896), mid=Score(precision=0.28396612382424397, recall=0.29436508737406886, fmeasure=0.26518638413620094), high=Score(precision=0.30399608564868263, recall=0.31372038639479344, fmeasure=0.2835804537567507))}


 60%|██████    | 299/498 [1:51:32<1:16:46, 23.15s/it]

{'rouge1': AggregateScore(low=Score(precision=0.27976853931578394, recall=0.2106937297720428, fmeasure=0.2236999923551524), mid=Score(precision=0.29197389843191557, recall=0.2203942139260928, fmeasure=0.23273417261123647), high=Score(precision=0.30619854456477014, recall=0.23034881224529663, fmeasure=0.24324113692726573)), 'rouge2': AggregateScore(low=Score(precision=0.08789116728982878, recall=0.062365219556912765, fmeasure=0.06799423724800588), mid=Score(precision=0.0978635736931216, recall=0.0692476601200108, fmeasure=0.07531007831694692), high=Score(precision=0.10872308203978968, recall=0.07676794649288715, fmeasure=0.08324782919713015)), 'rougeL': AggregateScore(low=Score(precision=0.23223409837180456, recall=0.17622398607078354, fmeasure=0.1860224812470455), mid=Score(precision=0.2438476116892695, recall=0.18496827792270804, fmeasure=0.1947816983599931), high=Score(precision=0.25618689012695794, recall=0.1948597642341046, fmeasure=0.20431022997372025))}


 60%|██████    | 300/498 [1:51:49<1:10:00, 21.21s/it]

{'rouge1': AggregateScore(low=Score(precision=0.26874843588081204, recall=0.28330238330491175, fmeasure=0.2516310071917021), mid=Score(precision=0.28973635681401944, recall=0.3028116907961571, fmeasure=0.271026722588908), high=Score(precision=0.3111619533019403, recall=0.3247107646254842, fmeasure=0.2907763361408146)), 'rouge2': AggregateScore(low=Score(precision=0.10857942768992557, recall=0.11780652194504705, fmeasure=0.10133728038654868), mid=Score(precision=0.12551298134221112, recall=0.13566476003210542, fmeasure=0.11749016305587441), high=Score(precision=0.1431024286007678, recall=0.15439371236886834, fmeasure=0.1341510724834744)), 'rougeL': AggregateScore(low=Score(precision=0.26395499417783236, recall=0.278083919146857, fmeasure=0.2483440067498538), mid=Score(precision=0.28361270541217853, recall=0.29848018427665945, fmeasure=0.26606300503738634), high=Score(precision=0.3049043723139166, recall=0.31961391448337995, fmeasure=0.28520897194476186))}


 64%|██████▍   | 319/498 [1:58:09<1:09:02, 23.14s/it]

{'rouge1': AggregateScore(low=Score(precision=0.2825832912421814, recall=0.21402033905479628, fmeasure=0.22597484873069998), mid=Score(precision=0.29474432723772537, recall=0.22404469134122426, fmeasure=0.23575096127195994), high=Score(precision=0.30846111463652603, recall=0.23335889473392274, fmeasure=0.24567214855672448)), 'rouge2': AggregateScore(low=Score(precision=0.089148168900272, recall=0.06380371440787216, fmeasure=0.06915018341900137), mid=Score(precision=0.09948895801354946, recall=0.07117321413230657, fmeasure=0.07732828964714308), high=Score(precision=0.10997499533719796, recall=0.07932831220965686, fmeasure=0.08551623743890714)), 'rougeL': AggregateScore(low=Score(precision=0.23505304623817708, recall=0.17815935624306614, fmeasure=0.18809193698823815), mid=Score(precision=0.2466130305078933, recall=0.18795665396554437, fmeasure=0.19726030349138984), high=Score(precision=0.25804355126703543, recall=0.19787962974128712, fmeasure=0.20633182980560733))}


 64%|██████▍   | 320/498 [1:59:12<1:43:40, 34.95s/it]

{'rouge1': AggregateScore(low=Score(precision=0.2749789007956962, recall=0.2879360735648658, fmeasure=0.2577843519906316), mid=Score(precision=0.29474428972484834, recall=0.30720929439860406, fmeasure=0.2757411518018875), high=Score(precision=0.3144209765619682, recall=0.3274655498425747, fmeasure=0.2942348254247375)), 'rouge2': AggregateScore(low=Score(precision=0.11538088937450826, recall=0.12417082613956484, fmeasure=0.10674697184448723), mid=Score(precision=0.1324593799893384, recall=0.14193621978922466, fmeasure=0.12322766945584421), high=Score(precision=0.149144252730231, recall=0.15811895426311176, fmeasure=0.13920812300245197)), 'rougeL': AggregateScore(low=Score(precision=0.26768383885608416, recall=0.280778861127566, fmeasure=0.25044553741229325), mid=Score(precision=0.28817327669318693, recall=0.30169412756154934, fmeasure=0.2701599417176231), high=Score(precision=0.30765538824066885, recall=0.32329404258850963, fmeasure=0.28947899643365654))}


 68%|██████▊   | 339/498 [2:07:00<1:09:52, 26.37s/it]

{'rouge1': AggregateScore(low=Score(precision=0.2835899104112728, recall=0.21507950351457386, fmeasure=0.22655398291465373), mid=Score(precision=0.29591974855193565, recall=0.22466881095794083, fmeasure=0.235895235138948), high=Score(precision=0.30872064933200194, recall=0.23439579321895154, fmeasure=0.24525334060399256)), 'rouge2': AggregateScore(low=Score(precision=0.0888705220507576, recall=0.06452637838797566, fmeasure=0.0692855303495921), mid=Score(precision=0.09930093321131439, recall=0.07182348048350637, fmeasure=0.07741914192155304), high=Score(precision=0.11025493686850876, recall=0.08003621189891916, fmeasure=0.08531111689215652)), 'rougeL': AggregateScore(low=Score(precision=0.23526012889412803, recall=0.17885739757501729, fmeasure=0.18842580032780026), mid=Score(precision=0.24728666371598085, recall=0.1879747214422045, fmeasure=0.19709035408242462), high=Score(precision=0.2589333065161015, recall=0.19692340606802855, fmeasure=0.20602536882915173))}


 68%|██████▊   | 340/498 [2:07:34<1:15:51, 28.81s/it]

{'rouge1': AggregateScore(low=Score(precision=0.274966087725301, recall=0.2865307324622051, fmeasure=0.25739543565249995), mid=Score(precision=0.2938778762713582, recall=0.30657492517365637, fmeasure=0.2749727232784049), high=Score(precision=0.31422142896223715, recall=0.3268554607248944, fmeasure=0.2937169498004206)), 'rouge2': AggregateScore(low=Score(precision=0.11669125685693771, recall=0.12574768424807484, fmeasure=0.10829626776551053), mid=Score(precision=0.13451744433175786, recall=0.14377954857528777, fmeasure=0.12536699876460675), high=Score(precision=0.15214059822210513, recall=0.16029437097519447, fmeasure=0.14143696811069179)), 'rougeL': AggregateScore(low=Score(precision=0.2687692163644606, recall=0.28044749176131906, fmeasure=0.25182607045212546), mid=Score(precision=0.2876029005422162, recall=0.3007585439142775, fmeasure=0.26983475624652753), high=Score(precision=0.3070282270737728, recall=0.3227571038607283, fmeasure=0.2885811625213981))}


 72%|███████▏  | 359/498 [2:14:29<51:13, 22.11s/it]  

{'rouge1': AggregateScore(low=Score(precision=0.28158796463945146, recall=0.21563283660059224, fmeasure=0.22676923394825607), mid=Score(precision=0.2939242173374139, recall=0.22509977977955434, fmeasure=0.23585717407014833), high=Score(precision=0.3058991542855533, recall=0.2343798975429496, fmeasure=0.2450564871711128)), 'rouge2': AggregateScore(low=Score(precision=0.08670839313460407, recall=0.0637402670700393, fmeasure=0.06856303898392502), mid=Score(precision=0.09722096267814326, recall=0.07074620658475775, fmeasure=0.07608660306960473), high=Score(precision=0.10678384190586454, recall=0.07841825594453113, fmeasure=0.08400662718359543)), 'rougeL': AggregateScore(low=Score(precision=0.2330339861796811, recall=0.17919406095326507, fmeasure=0.18806852167121105), mid=Score(precision=0.2449606631446845, recall=0.18795028126874866, fmeasure=0.1966405476449654), high=Score(precision=0.25716123857260204, recall=0.19646205947479878, fmeasure=0.20523878867894788))}


 72%|███████▏  | 360/498 [2:14:58<55:32, 24.15s/it]

{'rouge1': AggregateScore(low=Score(precision=0.2696316289914175, recall=0.2821666668029417, fmeasure=0.25285454843712996), mid=Score(precision=0.28824676997215604, recall=0.30262921944553195, fmeasure=0.2705581615607281), high=Score(precision=0.30804449057601585, recall=0.3215055324080185, fmeasure=0.2886271243565678)), 'rouge2': AggregateScore(low=Score(precision=0.11364880872157421, recall=0.12310392227212945, fmeasure=0.10649712445962577), mid=Score(precision=0.12994189948034943, recall=0.1402088212335718, fmeasure=0.1215516558158831), high=Score(precision=0.14577539740402773, recall=0.15582620310869327, fmeasure=0.13597970523034236)), 'rougeL': AggregateScore(low=Score(precision=0.2632308790238699, recall=0.27808986976779193, fmeasure=0.24803852939632545), mid=Score(precision=0.28229642090662793, recall=0.29696944999776, fmeasure=0.2653027222384996), high=Score(precision=0.30157769899902753, recall=0.3181509493137394, fmeasure=0.2838929670516054))}


 76%|███████▌  | 379/498 [2:22:24<49:10, 24.79s/it]  

{'rouge1': AggregateScore(low=Score(precision=0.2816256774993404, recall=0.21471826925824097, fmeasure=0.2259307398212442), mid=Score(precision=0.2925035591672226, recall=0.2239869196207047, fmeasure=0.23456614522056218), high=Score(precision=0.30392365322643927, recall=0.2335780200628298, fmeasure=0.2435746798800281)), 'rouge2': AggregateScore(low=Score(precision=0.08601593446307701, recall=0.06251494513729935, fmeasure=0.0670475350468239), mid=Score(precision=0.09505805848739024, recall=0.06936407408407293, fmeasure=0.07449360118168116), high=Score(precision=0.10458381659143821, recall=0.07668116459548711, fmeasure=0.082151721146236)), 'rougeL': AggregateScore(low=Score(precision=0.2326768856473304, recall=0.17818042785698215, fmeasure=0.18669826993459568), mid=Score(precision=0.24327679507556016, recall=0.18653449001714587, fmeasure=0.1951161270474189), high=Score(precision=0.2543128984292489, recall=0.19492473770725685, fmeasure=0.20358546440563882))}


 76%|███████▋  | 380/498 [2:22:45<46:23, 23.59s/it]

{'rouge1': AggregateScore(low=Score(precision=0.2688706971449693, recall=0.2820539830332895, fmeasure=0.2517123625026104), mid=Score(precision=0.28701411541868, recall=0.3016127141499693, fmeasure=0.2691726786327751), high=Score(precision=0.3060429302411054, recall=0.3202607166994928, fmeasure=0.286721532860937)), 'rouge2': AggregateScore(low=Score(precision=0.11288951846081251, recall=0.12253278518318354, fmeasure=0.10563837572690127), mid=Score(precision=0.1295431330119693, recall=0.13920576547338886, fmeasure=0.12108016542928789), high=Score(precision=0.14482150537606522, recall=0.15440840701289302, fmeasure=0.1349809887133059)), 'rougeL': AggregateScore(low=Score(precision=0.2623138201882047, recall=0.27767636736161827, fmeasure=0.2466024033305663), mid=Score(precision=0.281212557269797, recall=0.29624861819511566, fmeasure=0.2642627202024628), high=Score(precision=0.2992649084115817, recall=0.31656130713339875, fmeasure=0.2821349797066839))}


 80%|████████  | 399/498 [2:29:35<36:25, 22.08s/it]

{'rouge1': AggregateScore(low=Score(precision=0.28034436776268534, recall=0.2142831927181077, fmeasure=0.22531072161249593), mid=Score(precision=0.29203066831952806, recall=0.2229702767449781, fmeasure=0.23393976435435276), high=Score(precision=0.303404476362665, recall=0.23111465533640438, fmeasure=0.24175099946714002)), 'rouge2': AggregateScore(low=Score(precision=0.08538383828835361, recall=0.061929904224454, fmeasure=0.0668633367555188), mid=Score(precision=0.09369151391871379, recall=0.06835950280257021, fmeasure=0.07335393055375486), high=Score(precision=0.10335414851224793, recall=0.07505672819966512, fmeasure=0.08058077681673746)), 'rougeL': AggregateScore(low=Score(precision=0.23248695752815424, recall=0.1782884965607555, fmeasure=0.18672577638846788), mid=Score(precision=0.24259738795949481, recall=0.1857888899126661, fmeasure=0.19434714242806156), high=Score(precision=0.2527204863596766, recall=0.19338565125751847, fmeasure=0.20208940412640106))}


 80%|████████  | 400/498 [2:29:49<32:07, 19.67s/it]

{'rouge1': AggregateScore(low=Score(precision=0.2654782786389271, recall=0.2785626873578741, fmeasure=0.2488405953149321), mid=Score(precision=0.28329721526559254, recall=0.2977520177194429, fmeasure=0.26591003041742767), high=Score(precision=0.30201266315599096, recall=0.3149076500470893, fmeasure=0.28344771600639806)), 'rouge2': AggregateScore(low=Score(precision=0.11046850997241994, recall=0.12150762173012714, fmeasure=0.10404974098479154), mid=Score(precision=0.1271382326531863, recall=0.13683953260290593, fmeasure=0.11898803373002576), high=Score(precision=0.1409140342693013, recall=0.15083488144737925, fmeasure=0.1318306130370923)), 'rougeL': AggregateScore(low=Score(precision=0.2596194883457751, recall=0.27320935371101396, fmeasure=0.24378078794413574), mid=Score(precision=0.2772636615976366, recall=0.292287992507966, fmeasure=0.26074415519484284), high=Score(precision=0.2944432741683617, recall=0.3109610558179431, fmeasure=0.27702467858310453))}


 84%|████████▍ | 419/498 [2:37:10<35:44, 27.14s/it]

{'rouge1': AggregateScore(low=Score(precision=0.2811674594240449, recall=0.21488529551232902, fmeasure=0.22552185661308904), mid=Score(precision=0.2920341076142723, recall=0.22328976801504907, fmeasure=0.23364709465358752), high=Score(precision=0.30417211395368615, recall=0.23134161636433762, fmeasure=0.24130242096131077)), 'rouge2': AggregateScore(low=Score(precision=0.08561070006172161, recall=0.061422626732181865, fmeasure=0.06638719460336538), mid=Score(precision=0.09412764689639386, recall=0.06819427799801417, fmeasure=0.07317252994704235), high=Score(precision=0.10317201574164886, recall=0.07499186090030088, fmeasure=0.08031104131377159)), 'rougeL': AggregateScore(low=Score(precision=0.23290088112372764, recall=0.1783686040446851, fmeasure=0.18636961034505117), mid=Score(precision=0.24272726396746314, recall=0.18564421279169696, fmeasure=0.19385034292362804), high=Score(precision=0.25206741998932525, recall=0.19328256584209655, fmeasure=0.20098085948477773))}


 84%|████████▍ | 420/498 [2:37:43<37:28, 28.82s/it]

{'rouge1': AggregateScore(low=Score(precision=0.26825636541274483, recall=0.27898134827412946, fmeasure=0.2509300072436518), mid=Score(precision=0.285898860285001, recall=0.29811929050613317, fmeasure=0.2672303046134088), high=Score(precision=0.30394130487062415, recall=0.3150920141917176, fmeasure=0.28406974112841143)), 'rouge2': AggregateScore(low=Score(precision=0.1132582829989938, recall=0.12199032343930385, fmeasure=0.1053567567293087), mid=Score(precision=0.12803433770646366, recall=0.13594643082681085, fmeasure=0.11893416618492669), high=Score(precision=0.14145360242854668, recall=0.15049842073413294, fmeasure=0.13142738775994525)), 'rougeL': AggregateScore(low=Score(precision=0.26213870806569134, recall=0.27595319950767894, fmeasure=0.24641185582624955), mid=Score(precision=0.2791769927962918, recall=0.29237954000247035, fmeasure=0.26187821769475583), high=Score(precision=0.297167185401649, recall=0.31075123447861475, fmeasure=0.2788774890002893))}


 88%|████████▊ | 439/498 [2:44:45<22:30, 22.90s/it]

{'rouge1': AggregateScore(low=Score(precision=0.28123332548011637, recall=0.2135454412125458, fmeasure=0.22467105563389467), mid=Score(precision=0.29211237693084013, recall=0.2221091385451963, fmeasure=0.23289201809789084), high=Score(precision=0.3035218685800925, recall=0.22986094182437464, fmeasure=0.24062027384820278)), 'rouge2': AggregateScore(low=Score(precision=0.0854010906481651, recall=0.06100891876861171, fmeasure=0.06608040865507588), mid=Score(precision=0.09366699069467607, recall=0.06744646213559555, fmeasure=0.07254055127364606), high=Score(precision=0.10254860263753647, recall=0.0738122647540146, fmeasure=0.0792055008897587)), 'rougeL': AggregateScore(low=Score(precision=0.23171025580900917, recall=0.17659603879550717, fmeasure=0.1856582381765627), mid=Score(precision=0.24196709964010604, recall=0.18432204964054433, fmeasure=0.19299764776008), high=Score(precision=0.2518046617734272, recall=0.19152843722699156, fmeasure=0.20027354437284967))}


 88%|████████▊ | 440/498 [2:45:00<19:54, 20.59s/it]

{'rouge1': AggregateScore(low=Score(precision=0.26458844233126866, recall=0.27825303444711175, fmeasure=0.24747030819295282), mid=Score(precision=0.2815923852857133, recall=0.29640951555289075, fmeasure=0.2636878022423975), high=Score(precision=0.29931783905990544, recall=0.3137228396264439, fmeasure=0.2795695126628711)), 'rouge2': AggregateScore(low=Score(precision=0.11019551694075591, recall=0.12027486924935608, fmeasure=0.10283018020714607), mid=Score(precision=0.12430329655990857, recall=0.13448370274500443, fmeasure=0.1160606643544702), high=Score(precision=0.13884944161949453, recall=0.15028017446009426, fmeasure=0.12950150604242958)), 'rougeL': AggregateScore(low=Score(precision=0.25812286209248975, recall=0.27303921505944834, fmeasure=0.2424619448565991), mid=Score(precision=0.2756396642790342, recall=0.29074710765955447, fmeasure=0.25869394485640895), high=Score(precision=0.29292485286119413, recall=0.30845947972658655, fmeasure=0.27480517740187815))}


 92%|█████████▏| 459/498 [2:52:14<12:54, 19.85s/it]

{'rouge1': AggregateScore(low=Score(precision=0.27994799752616106, recall=0.2152393135270065, fmeasure=0.2249262867402634), mid=Score(precision=0.29101725023042135, recall=0.22302379609075512, fmeasure=0.2324925223435622), high=Score(precision=0.30213823677094875, recall=0.23083327096216677, fmeasure=0.24014153393751686)), 'rouge2': AggregateScore(low=Score(precision=0.08507478519707282, recall=0.06099449967795208, fmeasure=0.06564806577653311), mid=Score(precision=0.09264105771727855, recall=0.06718726507878438, fmeasure=0.07177994696489137), high=Score(precision=0.1017240795146539, recall=0.07390466898984344, fmeasure=0.07908919947192385)), 'rougeL': AggregateScore(low=Score(precision=0.2314848056924142, recall=0.17790609467971116, fmeasure=0.18578797669761454), mid=Score(precision=0.24101689917034394, recall=0.18522562937007053, fmeasure=0.1926404506066317), high=Score(precision=0.24986954625974678, recall=0.19272930470274696, fmeasure=0.19936164104370174))}


 92%|█████████▏| 460/498 [2:52:23<10:28, 16.53s/it]

{'rouge1': AggregateScore(low=Score(precision=0.26753969389395904, recall=0.28122552165678744, fmeasure=0.2511570298732202), mid=Score(precision=0.28456653490174294, recall=0.29902095334327733, fmeasure=0.26681279474802977), high=Score(precision=0.3019916874961712, recall=0.31643613761268097, fmeasure=0.28256891156127695)), 'rouge2': AggregateScore(low=Score(precision=0.10861268138990007, recall=0.11853030357478395, fmeasure=0.10189632387399418), mid=Score(precision=0.1227696692058993, recall=0.1327451801978538, fmeasure=0.11464563194984485), high=Score(precision=0.13644696832101041, recall=0.1470139528075412, fmeasure=0.12757605997221488)), 'rougeL': AggregateScore(low=Score(precision=0.26099023329565424, recall=0.2764079485975303, fmeasure=0.24586767074998414), mid=Score(precision=0.2787450397158239, recall=0.2931769345427626, fmeasure=0.2615626711786167), high=Score(precision=0.2970160524854534, recall=0.31296292553864435, fmeasure=0.2793115599949313))}


 96%|█████████▌| 479/498 [3:00:22<07:31, 23.75s/it]

{'rouge1': AggregateScore(low=Score(precision=0.280402017344057, recall=0.21519740558427974, fmeasure=0.22467516539985657), mid=Score(precision=0.29060182861785716, recall=0.22336323397079555, fmeasure=0.23247707727901992), high=Score(precision=0.30138310123318396, recall=0.23141170006800768, fmeasure=0.24041304050121465)), 'rouge2': AggregateScore(low=Score(precision=0.08415746414542816, recall=0.06085862953931077, fmeasure=0.06525380428641869), mid=Score(precision=0.09163291354535526, recall=0.06682249616492736, fmeasure=0.07141796932898459), high=Score(precision=0.09950773727520354, recall=0.07291238885038649, fmeasure=0.07752185568169236)), 'rougeL': AggregateScore(low=Score(precision=0.23084990052178156, recall=0.17778038566104917, fmeasure=0.18494229591845274), mid=Score(precision=0.2399791098451084, recall=0.18502672173507287, fmeasure=0.19213393264907108), high=Score(precision=0.24932432945883834, recall=0.19229090015803396, fmeasure=0.19927797618048668))}


 96%|█████████▋| 480/498 [3:00:44<06:57, 23.21s/it]

{'rouge1': AggregateScore(low=Score(precision=0.26521573159484446, recall=0.2782457898148648, fmeasure=0.24935265846905438), mid=Score(precision=0.28236250378297256, recall=0.2961324865693957, fmeasure=0.26465851056972867), high=Score(precision=0.29916905946981465, recall=0.31219264913490635, fmeasure=0.28037296514066323)), 'rouge2': AggregateScore(low=Score(precision=0.10795270868081616, recall=0.11799006237633415, fmeasure=0.10104870576903559), mid=Score(precision=0.12114850319992207, recall=0.13112188869885, fmeasure=0.11314175759121886), high=Score(precision=0.13394715766971887, recall=0.14476639579607897, fmeasure=0.12520037339533643)), 'rougeL': AggregateScore(low=Score(precision=0.26006996961703766, recall=0.2735849198376858, fmeasure=0.24425965712378453), mid=Score(precision=0.27646917203986654, recall=0.29022167044877917, fmeasure=0.25961312479640075), high=Score(precision=0.29311674378183, recall=0.3086318207696876, fmeasure=0.2761188854612854))}


100%|██████████| 498/498 [3:07:11<00:00, 22.55s/it]


In [46]:
# model

In [35]:
# with open(os.path.join("/home/bxm200000/models/led_paragraph_generation_v3/", "sample_output_train_paragraph_gen_v3.2.json"), 'w') as f:
#     json.dump(accumulated_data, f)

In [28]:
len(accumulated_data)

2983

In [123]:
with open(os.path.join("/home/bxm200000/models/led_paragraph_generation_v3/", "sample_output_train_paragraph_gen_v3.2.json"), 'r') as f:
    accumulated_data = json.loads(f.read())

In [110]:
# accumulated_data[0]

In [111]:
path = "/home/bxm200000/models/led_length_control/positional_length_control_para_gen_predict_length_v21.1/checkpoint-32000/"

In [112]:
with open(os.path.abspath(os.path.join(path, "../sample_output.json")), 'r') as f:
    accumulated_data = json.loads(f.read())

In [78]:
ddd = accumulated_data[2]

In [80]:
# ddd

In [124]:
rouge = load_metric("rouge")

In [125]:
def strip_context(paragraph):
    """strip context from paragraph and only return span"""
    return paragraph.split("<context>")[1].split("</context>")[0].strip()

In [126]:
# accumulated_data[0]

In [127]:
new_accumulated_data = accumulated_data

In [128]:
predicted, references = [], []
dominant_predicted, dominant_reference = [], []
reference_predicted, reference_reference = [], []
for index, data in enumerate(new_accumulated_data):
    target = data["target"]
    gen = data["generated"]
    
    if "<context>" in gen and "</context>" in gen:
        
        target = strip_context(data["target"])
        gen = strip_context(data["generated"])
        data["generated"] = gen
        data["target"] = target
        
        new_accumulated_data[index] = data
        # remove citations         
        for c in get_citations(data["source"]):
            c = c.replace(",", "").replace(".", "")
            gen = gen.replace(",", "").replace(".", "").replace(c, "")
            target = target.replace(",", "").replace(".", "").replace(c, "")
        
        predicted.append(gen)
        references.append(target)
        
        if "[Dominant]" in data["source"]:
            dominant_predicted.append(gen)
            dominant_reference.append(target)
            
            
        if "[Reference]" in data["source"]:
            reference_predicted.append(gen)
            reference_reference.append(target)


In [129]:
# plt.hist(
#     [tokenizer.tokenize(target).__len__() - tokenizer.tokenize(predicted).__len__() \
#      for target, predicted in zip(references, predicted)],
#     bins=50,
#     range = (-100, 100)
# )

In [116]:
# with open(os.path.join("/home/bxm200000/models/led_paragraph_generation_v3/", "sample_output_train_paragraph_gen_strip_v3.3.json"), 'w') as f:
#     json.dump(new_accumulated_data, f)

In [117]:
# i =4
# i += 1
# accumulated_data[i]

In [118]:
# rouge.compute(predictions=predicted, references=references, rouge_types=["rouge1","rouge2","rougeL"])

In [130]:
target

' annotated the Luna corpus 3 which does not include English annotations'

In [131]:
rouge.compute(predictions=predicted, references=references, rouge_types=["rouge1","rouge2","rougeL"])

{'rouge1': AggregateScore(low=Score(precision=0.21124460273478848, recall=0.2760019409231681, fmeasure=0.2094705587951958), mid=Score(precision=0.2203567834437479, recall=0.28703975940955084, fmeasure=0.21778941370227095), high=Score(precision=0.22944923376345533, recall=0.29761193585704865, fmeasure=0.2259181729667761)),
 'rouge2': AggregateScore(low=Score(precision=0.0703281742355175, recall=0.09465983653498301, fmeasure=0.06905230442419608), mid=Score(precision=0.07649506718235055, recall=0.10305123449691425, fmeasure=0.07532427184587537), high=Score(precision=0.08320664194208792, recall=0.11191122202367075, fmeasure=0.08166549234124715)),
 'rougeL': AggregateScore(low=Score(precision=0.18616921418624321, recall=0.2469718414930492, fmeasure=0.18610663792328194), mid=Score(precision=0.19460780291508112, recall=0.25635423220379416, fmeasure=0.1934282808580508), high=Score(precision=0.20281944984675335, recall=0.26650836014325663, fmeasure=0.2013999128367323))}

In [132]:
rouge.compute(
    predictions=dominant_predicted, 
    references=dominant_reference, rouge_types=["rouge1","rouge2","rougeL"], use_stemmer=True)

{'rouge1': AggregateScore(low=Score(precision=0.2521972424115864, recall=0.2949464151559301, fmeasure=0.2392433991632979), mid=Score(precision=0.2615801686766283, recall=0.3040527915677015, fmeasure=0.24613630235562173), high=Score(precision=0.2710635573034785, recall=0.31328535049245004, fmeasure=0.25319517445395634)),
 'rouge2': AggregateScore(low=Score(precision=0.061756626805981416, recall=0.070434797507288, fmeasure=0.05733334218023684), mid=Score(precision=0.06727584695058104, recall=0.07625996042020035, fmeasure=0.061773983804685254), high=Score(precision=0.07352286044441975, recall=0.08263460060292332, fmeasure=0.06705751732613527)),
 'rougeL': AggregateScore(low=Score(precision=0.1943844663947913, recall=0.22600995627848006, fmeasure=0.1841215350463397), mid=Score(precision=0.20162014032970038, recall=0.23383721333992896, fmeasure=0.18944829544319963), high=Score(precision=0.209892055603762, recall=0.24197344008203958, fmeasure=0.19534121488215386))}

In [133]:
rouge.compute(
    predictions=reference_predicted, 
    references=reference_reference, 
    rouge_types=["rouge1","rouge2","rougeL"], use_stemmer=True)

{'rouge1': AggregateScore(low=Score(precision=0.2100272539726637, recall=0.3024299313658022, fmeasure=0.21704336614103276), mid=Score(precision=0.22366404914570404, recall=0.32002726857057173, fmeasure=0.22985812269979283), high=Score(precision=0.23856682198580942, recall=0.33709592301252367, fmeasure=0.24432709112879286)),
 'rouge2': AggregateScore(low=Score(precision=0.08400731558117439, recall=0.12337946528710475, fmeasure=0.08573042372339902), mid=Score(precision=0.09405605142475731, recall=0.13695622714200884, fmeasure=0.09548413873057918), high=Score(precision=0.10572322683408894, recall=0.1509369275913055, fmeasure=0.10676721742930914)),
 'rougeL': AggregateScore(low=Score(precision=0.20241219222316614, recall=0.2915125817773235, fmeasure=0.20871017939689857), mid=Score(precision=0.21544506179036507, recall=0.30864203721651046, fmeasure=0.221594975842006), high=Score(precision=0.22959993408938173, recall=0.32431389004718497, fmeasure=0.23492011558701797))}