In [1]:
magma_dir = '/home/marco/epfl/magma/'
transformers_dir = '/home/marco/epfl/transformers/'

### **Config**

In [2]:
import os
import sys

sys.path.insert(0, magma_dir)
import config

from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'

In [3]:
MODEL = 'bart'
MODELS = {}

### **Init**

In [4]:
import matplotlib.pyplot as plt
import numpy as np
import torch
import re
import pandas as pd
from tqdm import tqdm
from textwrap import fill

### **Function Definition**

##### Import Model and Tok

In [5]:
def import_model_tok(model_name_or_path):
    global MODELS

    if model_name_or_path in MODELS.keys():
        print('[+] model already present in cache\n')
        return MODELS[model_name_or_path]
    print('[*] importing the model\n')

    if 'bart' in MODEL:
        from transformers import BartForConditionalGeneration, BartTokenizer

        model = BartForConditionalGeneration.from_pretrained(model_name_or_path)
        tokenizer = BartTokenizer.from_pretrained(model_name_or_path)
    elif 'pegasus' in MODEL:
        from transformers import PegasusForConditionalGeneration, PegasusTokenizer

        model = PegasusForConditionalGeneration.from_pretrained(model_name_or_path)
        tokenizer = PegasusTokenizer.from_pretrained(model_name_or_path)

    MODELS[model_name_or_path] = model, tokenizer
    print('[+] the model is now present in cache\n')
    return MODELS[model_name_or_path]

##### Nice Print

In [6]:
def print_example(text, summ, bull):
    print(fill(text, 100))
    print()
    if isinstance(summ, list):
        for s in summ:
            print('Prediction:')
            print(fill(s, 100))
            print()
    else:
        print('Prediction:')
        print(fill(summ, 100))
        print()
    print('Reference:')
    print(fill(bull, 100))
    print()
    print(''.join(['#']*100))
    print()

## **Summarization experiments**

##### Model writing the opposite of input

In [17]:
def summarize(model_name_or_path, text, bullets, num_samples=2):
    model, tokenizer = import_model_tok(model_name_or_path)
    model = model.to(device)

    summ_enc = model.generate(
        tokenizer.encode(text, return_tensors='pt').to(device),
        max_length = config.ONE_BULLET_MAX_LEN,
        num_return_sequences=num_samples)
    summ = []
    for i in range(num_samples):
        summ.append(tokenizer.decode(summ_enc[i], skip_special_tokens=True))

    print_example(text, summ, bullets)

In [18]:
text = 'Long-term risks. As with any form of chronic UV exposure, it is likely that phototherapy carries an increased risk of non-melanoma skin cancers, which increases with cumulative exposure. Broad-band UVB is probably associated with a small increase in the risk of squamous cell carcinomas, particularly in patients receiving high-level exposure (more than 300 treatments). Narrow-band UVB phototherapy has now been in clinical use for long enough to determine the associated risk precisely. On the basis of cancer-risk calculations from mouse models and efficacy data, it seems likely that the risk is at least as great as that associated with broad-band UVB, but probably less than that associated with PUVA. In view of this, phototherapy may be relatively contraindicated for patients with pre-existing risk factors for skin cancer, such as individuals who work outside, those who have already had significant amounts of phototherapy, patients with very fair skin, and those with multiple melanocytic or atypical nevi.'
bullets = 'Long-term risks of PUVA include premature skin aging and skin cancer (non-melanoma and melanoma).'

In [19]:
summarize('sshleifer/distilbart-cnn-12-6', text, bullets)

[+] model already present in cache

Long-term risks. As with any form of chronic UV exposure, it is likely that phototherapy carries an
increased risk of non-melanoma skin cancers, which increases with cumulative exposure. Broad-band
UVB is probably associated with a small increase in the risk of squamous cell carcinomas,
particularly in patients receiving high-level exposure (more than 300 treatments). Narrow-band UVB
phototherapy has now been in clinical use for long enough to determine the associated risk
precisely. On the basis of cancer-risk calculations from mouse models and efficacy data, it seems
likely that the risk is at least as great as that associated with broad-band UVB, but probably less
than that associated with PUVA. In view of this, phototherapy may be relatively contraindicated for
patients with pre-existing risk factors for skin cancer, such as individuals who work outside, those
who have already had significant amounts of phototherapy, patients with very fair skin,

In [26]:
summarize(magma_dir+'fine-tuning/sshleifer?distilbart-cnn-12-6_karger_books_para_train', text, bullets, 4)

[+] model already present in cache

Long-term risks. As with any form of chronic UV exposure, it is likely that phototherapy carries an
increased risk of non-melanoma skin cancers, which increases with cumulative exposure. Broad-band
UVB is probably associated with a small increase in the risk of squamous cell carcinomas,
particularly in patients receiving high-level exposure (more than 300 treatments). Narrow-band UVB
phototherapy has now been in clinical use for long enough to determine the associated risk
precisely. On the basis of cancer-risk calculations from mouse models and efficacy data, it seems
likely that the risk is at least as great as that associated with broad-band UVB, but probably less
than that associated with PUVA. In view of this, phototherapy may be relatively contraindicated for
patients with pre-existing risk factors for skin cancer, such as individuals who work outside, those
who have already had significant amounts of phototherapy, patients with very fair skin,

## **New compute_metrics Function**

In [42]:
from rouge_score import rouge_scorer, scoring
import numpy as np
import re
import nltk
from typing import Callable, Dict, Iterable, List, Tuple, Union

In [43]:
from sentence_transformers import SentenceTransformer

# might want to try 'msmarco-distilbert-base-v2' too
sentence_distilroberta = SentenceTransformer('paraphrase-distilroberta-base-v1')

In [65]:
ROUGE_KEYS = ["rouge1", "rouge2", "rougeL", "rougeLsum"]

def add_newline_to_end_of_each_sentence(x: str) -> str:
    """This was added to get rougeLsum scores matching published rougeL scores for BART and PEGASUS."""
    re.sub("<n>", "", x)  # remove pegasus newline char
    return "\n".join(nltk.sent_tokenize(x))

def extract_rouge_mid_statistics(dct):
    new_dict = {}
    for k1, v1 in dct.items():
        mid = v1.mid
        for stat in ["precision", "recall", "fmeasure"]:
            new_dict[k1+'_'+stat] = round(getattr(mid, stat), 4)*100
    return new_dict

def calculate_rouge(
    pred_lns: List[str],
    tgt_lns: List[str],
    use_stemmer=True,
    rouge_keys=ROUGE_KEYS,
    return_precision_and_recall=True,
    bootstrap_aggregation=True,
    newline_sep=True,
) -> Dict:
    scorer = rouge_scorer.RougeScorer(rouge_keys, use_stemmer=use_stemmer)
    aggregator = scoring.BootstrapAggregator()
    for tgt, pred in zip(tgt_lns, pred_lns):
        # rougeLsum expects "\n" separated sentences within a summary
        if newline_sep:
            pred = add_newline_to_end_of_each_sentence(pred)
            tgt = add_newline_to_end_of_each_sentence(tgt)
        scores = scorer.score(tgt, pred)
        aggregator.add_scores(scores)

    if bootstrap_aggregation:
        result = aggregator.aggregate()
        if return_precision_and_recall:
            return extract_rouge_mid_statistics(result)  # here we return dict
        else:
            return {k: round(v.mid.fmeasure * 100, 4) for k, v in result.items()}

    else:
        return aggregator._scores  # here we return defaultdict(list)
    
def calculate_sentence_trans_cosine(
    pred_lns: List[str],
    tgt_lns: List[str]
):
    cosine_sim = lambda a, b: (np.dot(a, b) / (np.linalg.norm(a)*np.linalg.norm(b)))

    return np.mean([cosine_sim(sentence_distilroberta.encode(pred), sentence_distilroberta.encode(tgt))\
        for tgt, pred in zip(tgt_lns, pred_lns)])*100
    

def summarization_metrics(pred_str, label_str) -> Dict:
    metrics: Dict = calculate_rouge(pred_str, label_str)
    metrics['sentence_distilroberta_cosine'] = calculate_sentence_trans_cosine(pred_str, label_str)
    
    return metrics

In [66]:
pred_str = ['This is the summary that we get. In other words, this is the prediction.', 'This is prediction number two.']
label_str = ['This is the summary that we want. In other words, this is the ground truth.', 'This is reference number two.']

summarization_metrics(pred_str, label_str)

{'rouge1_precision': 82.86,
 'rouge1_recall': 80.0,
 'rouge1_fmeasure': 81.38,
 'rouge2_precision': 63.46000000000001,
 'rouge2_recall': 60.709999999999994,
 'rouge2_fmeasure': 62.03999999999999,
 'rougeL_precision': 82.86,
 'rougeL_recall': 80.0,
 'rougeL_fmeasure': 81.38,
 'rougeLsum_precision': 82.86,
 'rougeLsum_recall': 80.0,
 'rougeLsum_fmeasure': 81.38,
 'sentence-distilroberta-cosine': 69.60583329200745}