### Config

In [1]:
import os
import sys

sys.path.insert(0, '/home/marco/epfl/magma/')
import config

### Load Fine-Tuned Model

In [2]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

model = AutoModelForSeq2SeqLM.from_pretrained(config.MAGMA_DIR+\
    'fine-tuning/ft_pegasus_bull_para_embed_merged_overlaps_bybook_gas64_lr5e-05/checkpoint-539/')

tokenizer = AutoTokenizer.from_pretrained(config.MAGMA_DIR+\
    'fine-tuning/ft_pegasus_bull_para_embed_merged_overlaps_bybook_gas64_lr5e-05/checkpoint-539/')

### Evaluation Function Definition

In [3]:
from rouge_score import rouge_scorer, scoring

from sentence_transformers import SentenceTransformer
sentence_distilroberta = SentenceTransformer('paraphrase-distilroberta-base-v1')

import re
import nltk
import numpy as np

ROUGE_KEYS = ["rouge1", "rouge2", "rougeLsum"]

def add_newline_to_end_of_each_sentence(x):
    re.sub("<n>", "", x)  # remove pegasus newline char
    return "\n".join(nltk.sent_tokenize(x))

def calculate_rouge(
    pred_lns,
    tgt_lns,
):
    scorer = rouge_scorer.RougeScorer(ROUGE_KEYS, use_stemmer=True)
    aggregator = scoring.BootstrapAggregator()
    for tgt, pred in zip(tgt_lns, pred_lns):
        pred = add_newline_to_end_of_each_sentence(pred)
        tgt = add_newline_to_end_of_each_sentence(tgt)
        
        scores = scorer.score(tgt, pred)
        aggregator.add_scores(scores)
        
    result = aggregator.aggregate()
    return {k: round(v.mid.fmeasure * 100, 4) for k, v in result.items()}

def calculate_sentence_trans_cosine(
    pred_lns,
    tgt_lns
):
    cosine_sim = lambda a, b: (np.dot(a, b) / (np.linalg.norm(a)*np.linalg.norm(b)))

    return {'sentence_distilroberta_cosine': np.mean([\
        cosine_sim(sentence_distilroberta.encode(pred),
                   sentence_distilroberta.encode(tgt))\
        for tgt, pred in zip(tgt_lns, pred_lns)])*100}

### Show Random Examples

In [26]:
seed = 11

In [27]:
dataset_dir = config.MAGMA_DIR+\
    'datasets/bullet_paragraph_embeddings/pegasus/st/merged_overlaps/bybook/'
with open(dataset_dir+'test.source', 'r') as src,\
    open(dataset_dir+'test.target', 'r') as tgt:
    s = np.array([l.rstrip() for l in src.readlines()])
    t = np.array([l.rstrip() for l in tgt.readlines()])

In [28]:
np.random.seed(seed)
random_examples_idx = np.random.randint(len(s), size=5)
random_examples_src = s[random_examples_idx]
random_examples_tgt = t[random_examples_idx]

In [29]:
from tqdm import tqdm

random_examples_pred = []
for src in tqdm(random_examples_src):
    pred = tokenizer.batch_decode(
        model.generate(
            tokenizer(src, return_tensors='pt', truncation=True, padding='longest').input_ids,
            min_length = config.ONE_BULLET_MIN_LEN,
            max_length = config.ONE_BULLET_MAX_LEN
        ), skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
    random_examples_pred.append(pred)

100%|██████████| 5/5 [00:59<00:00, 11.94s/it]


In [30]:
for src, pred, ref in zip(random_examples_src, random_examples_pred, random_examples_tgt):
    print('Source:')
    print(src+'\n')
    print('Reference:')
    print(ref+'\n')
    print('Prediction:')
    print(pred+'\n')
    
    rouge_eval = calculate_rouge([pred], [ref])
    st_eval = calculate_sentence_trans_cosine([pred], [ref])
    
    print(rouge_eval)
    print(st_eval)
    
    print(''.join(['#']*100))
    print()

Source:
The gluteal and piriformis muscles are shown in Figure 3.1. The primary functions of the gluteal muscles are to support, stabilize and mobilize the hips and lower extremities in relation to the pelvis and the trunk. Possibly the most common manifestation of lumbar and lumbosacral pain referral is to the mid-portion of the gluteal muscles (buttocks) that overlie the piriformis muscle. This is accompanied by a palpable localized region of deep, tender muscle induration. Because of the thickness of the overlying gluteus maximus muscle, the fibers of which are parallel to (and indistinguishable from) the piriformis muscle fibers, it is not possible to determine by palpation if the piriformis muscle per se is the source of the localized muscle tenderness. A 'piriformis syndrome' is therefore somewhat problematic to identify and must be carefully distinguished from the more common, spinal discogenic basis for sciatic radiculopathy. A piriformis syndrome may result from scarring after