In [1]:
magma_dir = '/home/marco/epfl/magma/'

#### AWS

In [2]:
magma_dir = '/home/ubuntu/magma/'
bucket_dir = '/home/ubuntu/s3/'
transformers_dir = '/home/ubuntu/transformers/'
cache_dir = bucket_dir+'.cache/'

### **Config**

In [2]:
import os
import sys

sys.path.insert(0, magma_dir)
import config

from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'

In [3]:
MODEL = 'bart'
MODELS = {}

In [4]:
# Dataset path
data_dir = magma_dir + 'datasets/karger_books_para_wordembed/'+MODEL+'/'

# Output path
OUTPUT_PATH = magma_dir+'summarization/assign_bullets_para_wordembed/'+MODEL+'/'
if not os.path.exists(OUTPUT_PATH):
    os.makedirs(OUTPUT_PATH)

#### AWS

In [None]:
data_dir = bucket_dir + 'datasets/karger_books_para_wordembed/'+MODEL+'/'

### **Init**

In [5]:
import matplotlib.pyplot as plt
import numpy as np
import torch
import re
import pandas as pd
from tqdm import tqdm
from textwrap import fill
tqdm.pandas()

In [21]:
from datasets import load_metric
metric = load_metric("rouge")

from sentence_transformers import SentenceTransformer
sentence_distilroberta = SentenceTransformer('paraphrase-distilroberta-base-v1')

import gensim

### **Function Definition**

##### Import Model and Tok

In [7]:
def import_model_tok(model_name_or_path, verbose=False):
    global MODELS

    if model_name_or_path in MODELS.keys():
        if verbose : print('[+] model already present in cache\n')
        return MODELS[model_name_or_path]
    if verbose : print('[*] importing the model\n')
    
    from transformers import BartForConditionalGeneration, BartTokenizer
    model = BartForConditionalGeneration.from_pretrained(model_name_or_path)
    tokenizer = BartTokenizer.from_pretrained(model_name_or_path)

    if verbose : print(model.config)
    MODELS[model_name_or_path] = model, tokenizer
    if verbose : print('[+] the model is now present in cache\n')
    return MODELS[model_name_or_path]

##### Print Examples

In [37]:
def evaluate_prediction(pred, ref):
    rouge_res =\
        metric.compute(
            predictions = [pred],
            references = [ref],
            rouge_types = config.ROUGE_TYPES,
            use_agregator = False)
    
    rouge = {}
    for r in config.ROUGE_TYPES:
        for i, prf in enumerate(['precision', 'recall', 'fmeasure']):
            rouge[r+'_'+prf] =\
                100*rouge_res[r][0][i]
    
    cosine_sim = lambda a, b: (np.dot(a, b) / (np.linalg.norm(a)*np.linalg.norm(b)))
    
    sent_trans_res = 100*cosine_sim(
            sentence_distilroberta.encode(pred),
            sentence_distilroberta.encode(ref))
    
    def cosine_sim_w2v(s, b):
        s = gensim.utils.simple_preprocess(s, deacc=True)
        b = gensim.utils.simple_preprocess(b, deacc=True)
        corpus = [s, b]
        w2v = gensim.models.Word2Vec(
            corpus,
            min_count=1,
            sg=1,
            seed = config.SEED)
        s_embed = np.mean([w2v.wv[word] for word in s], axis=0)
        b_embed = np.mean([w2v.wv[word] for word in b], axis=0)
        return cosine_sim(s_embed, b_embed)
        
    w2v_res = 100*cosine_sim_w2v(pred, ref)
    
    return rouge, sent_trans_res, w2v_res

In [51]:
def print_examples(model_name_list, df, n_examples=10):
    
    df_examples = df.sample(n_examples, axis='index', random_state=config.SEED)
    
    for idx, row in df_examples.iterrows():
        print(idx)
        print(fill(row.text, 100))
        print()
        print('Reference:')
        print(fill(row.bullets, 100))
        print()
        for model_name in model_name_list:
            model, tokenizer = import_model_tok(model_name)
            model = model.to(device)
            
            summ_enc_list = model.generate(
                tokenizer.encode(row.text, return_tensors='pt').to(device),
                min_length = config.ONE_BULLET_MIN_LEN,
                max_length = config.ONE_BULLET_MAX_LEN,
                length_penalty = config.LENGTH_PENALTY,
                num_beams = config.NUM_BEAMS,
                no_repeat_ngram_size = config.NO_REPEAT_NGRAM_SIZE,
                num_return_sequences = config.NUM_BEAMS,
                early_stopping = True)
            
            for summ_enc in summ_enc_list:
                summ_num_tok = len(tokenizer.convert_ids_to_tokens(summ_enc, skip_special_tokens=True))
                summ = tokenizer.decode(summ_enc, skip_special_tokens=True)

                rouge, sent_trans, w2v = evaluate_prediction(summ, row.bullets)
                
                print('%s (%d tok):'%(model_name, summ_num_tok))
                print('R1\tp: %.2f \tr: %.2f \tf: %.2f\nR2\tp: %.2f \tr: %.2f \tf: %.2f\nRL\tp: %.2f \tr: %.2f \tf: %.2f'%(
                rouge['rouge1_precision'], rouge['rouge1_recall'], rouge['rouge1_fmeasure'],
                rouge['rouge2_precision'], rouge['rouge2_recall'], rouge['rouge2_fmeasure'],
                rouge['rougeL_precision'], rouge['rougeL_recall'], rouge['rougeL_fmeasure']))
                print('ST\t%.2f\t\tW2V\t%.2f'%(sent_trans, w2v))
                print()
                print(fill(summ, 100))
                print()
            print()
            
        print(''.join(['#']*100))
        print()

##### Plot Evaluation

In [15]:
def plot_evaluation_bullet_by_bullet(model_name_or_path):
    df = pd.read_csv(OUTPUT_PATH+model_name_or_path.replace('/', '?')+\
        '_bullet_by_bullet.csv').set_index(['book', 'chapter'])

    prf = ['precision', 'recall', 'fmeasure']
    num_rouge = len(config.ROUGE_TYPES)

    from matplotlib.cm import get_cmap
    color = get_cmap('tab10')(range(num_rouge))
    def set_box_color(b, c):
        for k in b.keys():
            plt.setp(b[k], color=c)
    
    xticks = 2*np.array(np.arange(1, num_rouge+3))
    
    box_plt_list = []

    fig, ax = plt.subplots(figsize=(10, 6))
    for r, var in zip(prf, np.linspace(-0.15*num_rouge, 0.15*num_rouge, num_rouge)):
    
        box_plt_list.append(
            plt.boxplot(
            [df[rouge+'_'+r].tolist() for rouge in config.ROUGE_TYPES],
            positions= xticks[:-2]+var,
            sym='+',
            widths=0.4,
            patch_artist=False,
            meanline=True,
            showmeans=True))
        
    box_plt_list.append(
        plt.boxplot(
        df['st_cosine_sim'].tolist(),
        positions=[xticks[-2]],
        sym='+',
        widths=0.4,
        patch_artist=False,
        meanline=True,
        showmeans=True))
    
    box_plt_list.append(
        plt.boxplot(
        df['w2v_cosine_sim'].tolist(),
        positions=[xticks[-1]],
        sym='+',
        widths=0.4,
        patch_artist=False,
        meanline=True,
        showmeans=True))

    for i, bp in enumerate(box_plt_list[:-2]):
        set_box_color(bp, color[i])
        plt.plot([], c=color[i], label=prf[i])
    plt.legend()

    ax.grid(True, axis='y', alpha=0.7, linestyle='--')
    ax.set_title('Evaluation Results', fontsize='xx-large')
    ax.set_ylabel('Rouge', fontsize='x-large')
    plt.xticks(xticks, config.ROUGE_TYPES+['ST Cosine', 'W2V Cosine'], fontsize='x-large')
    plt.show()

In [16]:
def plot_evaluation_grouping_bullets(model_name_or_path):
    df = pd.read_csv(OUTPUT_PATH+model_name_or_path.replace('/', '?')+\
        '_grouped.csv').set_index(['book', 'chapter'])

    prf = ['precision', 'recall', 'fmeasure']
    num_rouge = len(config.ROUGE_TYPES)

    from matplotlib.cm import get_cmap
    color = get_cmap('tab10')(range(num_rouge))
    def set_box_color(b, c):
        for k in b.keys():
            plt.setp(b[k], color=c)
    
    xticks = 2*np.array(np.arange(1, num_rouge+3))
    
    box_plt_list = []

    fig, ax = plt.subplots(figsize=(10, 6))
    for r, var in zip(prf, np.linspace(-0.15*num_rouge, 0.15*num_rouge, num_rouge)):
    
        box_plt_list.append(
            plt.boxplot(
            [df[rouge+'_'+r].tolist() for rouge in config.ROUGE_TYPES],
            positions= xticks[:-2]+var,
            sym='+',
            widths=0.4,
            patch_artist=False,
            meanline=True,
            showmeans=True))
    
    box_plt_list.append(
        plt.boxplot(
        df['st_cosine_sim'].tolist(),
        positions=[xticks[-2]],
        sym='+',
        widths=0.4,
        patch_artist=False,
        meanline=True,
        showmeans=True))
    
    box_plt_list.append(
        plt.boxplot(
        df['w2v_cosine_sim'].tolist(),
        positions=[xticks[-1]],
        sym='+',
        widths=0.4,
        patch_artist=False,
        meanline=True,
        showmeans=True))

    for i, bp in enumerate(box_plt_list[:-2]):
        set_box_color(bp, color[i])
        plt.plot([], c=color[i], label=prf[i])
    plt.legend()

    ax.grid(True, axis='y', alpha=0.7, linestyle='--')
    ax.set_title('Evaluation Results', fontsize='xx-large')
    ax.set_ylabel('Rouge', fontsize='x-large')
    plt.xticks(xticks, config.ROUGE_TYPES+['ST Cosine', 'W2V Cosine'], fontsize='x-large')
    plt.show()

## **Sentence-Transformers**

In [39]:
data_dir_st = data_dir + 'st/base/'

In [40]:
df_train = pd.read_csv(data_dir_st + 'train.csv').set_index(['book', 'chapter'])
df_val = pd.read_csv(data_dir_st + 'val.csv').set_index(['book', 'chapter'])
df_test = pd.read_csv(data_dir_st + 'test.csv').set_index(['book', 'chapter'])

### **Print and Summarization**

#### Print Train Examples

In [52]:
print_examples([
    'sshleifer/distilbart-cnn-12-6',
    magma_dir+'fine-tuning/ft_bart_para_wordembed/checkpoint-196'],
    df_train)

(9781912776696, 'hh-5')
Kaplan-Meier curves provide a way of plotting the distribution of a time-to-event endpoint. In
Figure 2.1 the dashed line shows that at 24 months' follow-up the estimated survival probability is
54%. Kaplan-Meier survival curves are often used to compare the data between two groups of subjects.
Figure 2.2 shows Kaplan-Meier curves for OS in a randomized study of patients with human epidermal
growth factor receptor 2 (HER2)-positive metastatic breast cancer treated either with or without
trastuzumab. The Kaplan-Meier curve steps down at time points at which deaths occur, while censored
observations are denoted by notches on the curve. In this study, the follow-up period ranged from 3
months to 74 months.

Reference:
Kaplan-Meier curves plot the probability of being event free over time. The curves from different
treatment groups can be plotted against each other to show the differences in outcome.

sshleifer/distilbart-cnn-12-6 (63 tok):
R1	p: 18.75 	r: 30.00 	f:

sshleifer/distilbart-cnn-12-6 (40 tok):
R1	p: 17.86 	r: 20.83 	f: 19.23
R2	p: 0.00 	r: 0.00 	f: 0.00
RL	p: 14.29 	r: 16.67 	f: 15.38
ST	68.58		W2V	29.40

 Postamputation persistent pain is a special case of PPSP because large nerves are deliberately cut
in all patients. Only 30-50% develop PPSP, whereas 100% have nerve injury.

sshleifer/distilbart-cnn-12-6 (37 tok):
R1	p: 20.00 	r: 20.83 	f: 20.41
R2	p: 0.00 	r: 0.00 	f: 0.00
RL	p: 16.00 	r: 16.67 	f: 16.33
ST	65.58		W2V	30.29

 Postamputation persistent pain is a special case of PPSP because large nerves are deliberately cut.
Only 30-50% develop PPSP, whereas 100% have nerve injury.

sshleifer/distilbart-cnn-12-6 (39 tok):
R1	p: 18.52 	r: 20.83 	f: 19.61
R2	p: 0.00 	r: 0.00 	f: 0.00
RL	p: 14.81 	r: 16.67 	f: 15.69
ST	68.35		W2V	29.66

 Postamputation persistent pain is a special case of PPSP because large nerves are deliberately cut
in all patients. 30-50% develop PPSP, whereas 100% have nerve injury.

sshleifer/distilbart-cnn-12-6 (

sshleifer/distilbart-cnn-12-6 (44 tok):
R1	p: 28.57 	r: 66.67 	f: 40.00
R2	p: 14.71 	r: 35.71 	f: 20.83
RL	p: 28.57 	r: 66.67 	f: 40.00
ST	74.74		W2V	54.85

 Neuropathic pain is initiated or caused by a primary lesion of the PNS or CNS. Patients often
complain not only of spontaneous pain, but also of pain from stimuli that are not normally painful
(allodynia)

sshleifer/distilbart-cnn-12-6 (48 tok):
R1	p: 26.32 	r: 66.67 	f: 37.74
R2	p: 13.51 	r: 35.71 	f: 19.61
RL	p: 26.32 	r: 66.67 	f: 37.74
ST	75.09		W2V	55.65

 Neuropathic pain is initiated or caused by a primary lesion of the PNS or CNS. Pain is constructed
by complex brain processes that are strongly affected by a person's attitudes, beliefs, personality
and interpretation of nociceptive stimuli.

sshleifer/distilbart-cnn-12-6 (51 tok):
R1	p: 24.39 	r: 66.67 	f: 35.71
R2	p: 12.50 	r: 35.71 	f: 18.52
RL	p: 24.39 	r: 66.67 	f: 35.71
ST	74.61		W2V	57.66

 Neuropathic pain is initiated or caused by a primary lesion of the PNS or CNS

/home/marco/epfl/magma/fine-tuning/ft_bart_para_wordembed/checkpoint-196 (40 tok):
R1	p: 27.78 	r: 12.82 	f: 17.54
R2	p: 5.88 	r: 2.63 	f: 3.64
RL	p: 27.78 	r: 12.82 	f: 17.54
ST	55.34		W2V	27.00

NSCLC-NOS can be classified as 'NSCLC, not otherwise specified (NOS) or 'adenocarcinoma' based on
morphology and immunohistochemistry.

/home/marco/epfl/magma/fine-tuning/ft_bart_para_wordembed/checkpoint-196 (40 tok):
R1	p: 27.78 	r: 12.82 	f: 17.54
R2	p: 5.88 	r: 2.63 	f: 3.64
RL	p: 27.78 	r: 12.82 	f: 17.54
ST	55.34		W2V	27.00

NSCLC-NOS can be classified as 'NSCLC, not otherwise specified (NOS) or 'adenocarcinoma' based on
morphology and immunohistochemistry.

/home/marco/epfl/magma/fine-tuning/ft_bart_para_wordembed/checkpoint-196 (29 tok):
R1	p: 15.38 	r: 5.13 	f: 7.69
R2	p: 0.00 	r: 0.00 	f: 0.00
RL	p: 15.38 	r: 5.13 	f: 7.69
ST	49.15		W2V	16.50

NSCLC-NOS can be classified as NSCLC, not otherwise specified (NOS) or adenocarcinoma.

/home/marco/epfl/magma/fine-tuning/ft_bart_para_worde

sshleifer/distilbart-cnn-12-6 (60 tok):
R1	p: 20.45 	r: 25.71 	f: 22.78
R2	p: 2.33 	r: 2.94 	f: 2.60
RL	p: 15.91 	r: 20.00 	f: 17.72
ST	60.92		W2V	45.55

 LINGO antagonists have been shown to promote oligodendrocyte differentiation and myelination in
vitro. Mastinib has completed a positive Phase IIb study in patients with primary and secondary
progressive MS, the results of which are reported to include improvement in the Multiple Sclerosis
Functional Composite.

sshleifer/distilbart-cnn-12-6 (65 tok):
R1	p: 18.37 	r: 25.71 	f: 21.43
R2	p: 0.00 	r: 0.00 	f: 0.00
RL	p: 10.20 	r: 14.29 	f: 11.90
ST	59.86		W2V	33.06

 LINGO antagonists promote oligodendrocyte differentiation and myelination in vitro. Mastinib has
completed a positive Phase IIb study in patients with primary and secondary progressive MS. A Phase
III trial in this population is now in progress, and it may prove to be an oral treatment option for
this population.

sshleifer/distilbart-cnn-12-6 (43 tok):
R1	p: 20.69 	r: 17.1

#### Print Val Examples

In [53]:
print_examples([
    'sshleifer/distilbart-cnn-12-6',
    magma_dir+'fine-tuning/ft_bart_para_wordembed/checkpoint-196'],
    df_val)

(9781908541178, 'ch_6')
Many anatomic factors can play a primary or a secondary role in the development and progression of
low back pain syndromes. The most common causes of low back pain are mechanical in origin.
Mechanical disorders of the lumbar spine are related to injury, overuse or deformity of a spinal
structure. The most important traumatic factors in low back pain relate to soft tissue structures.
Precise identification of the injured tissue, and the role of that injury in the consequent pain and
dysfunction, can be frustratingly difficult. Problems occur in muscles, intervertebral discs, facet
joints, ligaments or spinal nerves. Aging causes modification of these structures over time, and
different parts of the spine tend to be at greater risk for change or injury during different
decades of life. Early in life, muscle injuries are more frequent, while joint problems occur in the
sixth decade of life (Table 3.1).

Reference:
Mechanical disorders - muscle strain, spondylolisth

sshleifer/distilbart-cnn-12-6 (66 tok):
R1	p: 32.69 	r: 94.44 	f: 48.57
R2	p: 27.45 	r: 82.35 	f: 41.18
RL	p: 30.77 	r: 88.89 	f: 45.71
ST	71.46		W2V	63.27

 An open-label and a triple-blind RCT have demonstrated that fluid resuscitation using lactated
Ringer's solution (a balanced salt solution) is associated with a decreased inflammatory response
when compared with fluid resuscitation with normal saline. According to in vitro experiments, it
seems that this anti-inflammatory effect depends on lactate.

sshleifer/distilbart-cnn-12-6 (57 tok):
R1	p: 27.27 	r: 66.67 	f: 38.71
R2	p: 23.26 	r: 58.82 	f: 33.33
RL	p: 27.27 	r: 66.67 	f: 38.71
ST	66.63		W2V	45.11

 An open-label and a triple-blind RCT have demonstrated that fluid resuscitation using lactated
Ringer's solution (a balanced salt solution) is associated with a decreased inflammatory response.
According to in vitro experiments, it seems that this anti-inflammatory effect depends on lactate.

sshleifer/distilbart-cnn-12-6 (69 tok)

sshleifer/distilbart-cnn-12-6 (77 tok):
R1	p: 46.67 	r: 56.00 	f: 50.91
R2	p: 22.03 	r: 26.53 	f: 24.07
RL	p: 33.33 	r: 40.00 	f: 36.36
ST	86.24		W2V	74.10

 The potential sensitivity of a given cancer to immuno-oncology therapies will depend on the ability
of the tumor to trigger an immune response (immunogenicity) Cancer is characterized by an
accumulation of genetic mutations, many of which result in the expression of cancer-specific
antigens. Somatic mutation rates differ markedly, both between tumor types and within an individual
tumor type.

sshleifer/distilbart-cnn-12-6 (59 tok):
R1	p: 44.44 	r: 40.00 	f: 42.11
R2	p: 22.73 	r: 20.41 	f: 21.51
RL	p: 35.56 	r: 32.00 	f: 33.68
ST	78.46		W2V	68.54

 The potential sensitivity of a given cancer to immuno-oncology therapies will depend on the ability
of the tumor to trigger an immune response (immunogenicity) Cancer is characterized by an
accumulation of genetic mutations, many of which result in the expression of cancer-specific
antig

/home/marco/epfl/magma/fine-tuning/ft_bart_para_wordembed/checkpoint-196 (14 tok):
R1	p: 18.18 	r: 18.18 	f: 18.18
R2	p: 0.00 	r: 0.00 	f: 0.00
RL	p: 18.18 	r: 18.18 	f: 18.18
ST	13.66		W2V	22.45

There is no diagnostic criteria for the diagnosis of MPN-BP.

/home/marco/epfl/magma/fine-tuning/ft_bart_para_wordembed/checkpoint-196 (11 tok):
R1	p: 0.00 	r: 0.00 	f: 0.00
R2	p: 0.00 	r: 0.00 	f: 0.00
RL	p: 0.00 	r: 0.00 	f: 0.00
ST	7.84		W2V	-3.50

There is no diagnostic criteria for MPN-BP.

/home/marco/epfl/magma/fine-tuning/ft_bart_para_wordembed/checkpoint-196 (12 tok):
R1	p: 0.00 	r: 0.00 	f: 0.00
R2	p: 0.00 	r: 0.00 	f: 0.00
RL	p: 0.00 	r: 0.00 	f: 0.00
ST	7.89		W2V	-1.24

There are no specific diagnostic criteria for MPN-BP.

/home/marco/epfl/magma/fine-tuning/ft_bart_para_wordembed/checkpoint-196 (11 tok):
R1	p: 0.00 	r: 0.00 	f: 0.00
R2	p: 0.00 	r: 0.00 	f: 0.00
RL	p: 0.00 	r: 0.00 	f: 0.00
ST	8.00		W2V	-2.31

There are no diagnostic criteria for MPN-BP.


########################

/home/marco/epfl/magma/fine-tuning/ft_bart_para_wordembed/checkpoint-196 (25 tok):
R1	p: 28.57 	r: 42.86 	f: 34.29
R2	p: 20.00 	r: 30.77 	f: 24.24
RL	p: 23.81 	r: 35.71 	f: 28.57
ST	71.71		W2V	35.96

Treatment to reduce cardiovascular risk factors and promotion of a generally healthy lifestyle is
sufficient intervention for very-low-risk patients.

/home/marco/epfl/magma/fine-tuning/ft_bart_para_wordembed/checkpoint-196 (26 tok):
R1	p: 27.27 	r: 42.86 	f: 33.33
R2	p: 19.05 	r: 30.77 	f: 23.53
RL	p: 22.73 	r: 35.71 	f: 27.78
ST	71.81		W2V	35.96

Treatment to reduce cardiovascular risk factors and promotion of a generally healthy lifestyle is a
sufficient intervention for very-low-risk patients.


####################################################################################################

(9781908541277, 'ch_11')
There are no clear data indicating differences in safety among PHT, CBZ, PB and PRM. Current
evidence suggests that the risk of major congenital malformations is two to

#### Print Test Examples

In [54]:
print_examples([
    'sshleifer/distilbart-cnn-12-6',
    magma_dir+'fine-tuning/ft_bart_para_wordembed/checkpoint-196'],
    df_test)

(9781908541277, 'ch_8')
Indications. OXC has a similar spectrum of efficacy to CBZ against partial and tonic-clonic
seizures. It tends to be better tolerated than CBZ with fewer neurotoxic side effects. Dosage. The
recommended starting dose for OXC in adults is 150-600 mg daily in two doses. The dose can be
titrated upwards as clinically indicated to 3000-4000 mg daily. A starting dose of 5 mg/kg daily in
children over 3 years of age can be prescribed, increasing gradually to a maintenance dose of about
30 mg/kg daily. Patients already on CBZ may be switched immediately to OXC using a dosage ratio of
1.5 OXC to 1 CBZ. Particular care in immediate switching needs to be taken when the daily CBZ dose
exceeds 1200 mg. Plasma concentrations of the clinically active metabolite of OXC increase linearly
with dose. No studies, however, have attempted to relate elevated plasma levels to efficacy or
toxicity.

Reference:
This wider choice of AEDs permits pharmacological treatment to be better mat

/home/marco/epfl/magma/fine-tuning/ft_bart_para_wordembed/checkpoint-196 (80 tok):
R1	p: 45.45 	r: 48.78 	f: 47.06
R2	p: 18.60 	r: 20.00 	f: 19.28
RL	p: 34.09 	r: 36.59 	f: 35.29
ST	78.32		W2V	67.85

Transabdominal ultrasonography is the most useful test for acute cholecystitis as it establishes the
presence of gallstones. A thickened gallbladder wall (> 4 mm) and pericholecystic fluid are highly
suggestive of acute choleicystitis. A Tc-HIDA scan is a technetium-labeled analog of iminodiacetic
acid.

/home/marco/epfl/magma/fine-tuning/ft_bart_para_wordembed/checkpoint-196 (57 tok):
R1	p: 40.62 	r: 31.71 	f: 35.62
R2	p: 12.90 	r: 10.00 	f: 11.27
RL	p: 34.38 	r: 26.83 	f: 30.14
ST	71.41		W2V	50.35

Transabdominal ultrasonography is the most useful test for acute cholecystitis as it establishes the
presence of gallstones. A thickened gallbladder wall (> 4 mm) and pericholecystic fluid are highly
suggestive of acute choleicystitis.


########################################################

sshleifer/distilbart-cnn-12-6 (48 tok):
R1	p: 6.45 	r: 28.57 	f: 10.53
R2	p: 0.00 	r: 0.00 	f: 0.00
RL	p: 6.45 	r: 28.57 	f: 10.53
ST	52.55		W2V	15.24

 Obesity is associated with hypertension, tachycardia, left ventricular hypertrophy, increased
collagen deposition, reduced cardiac contractility and increased end-diastolic pressure. The major
circulatory complications are increased total and pulmonary blood volume.

sshleifer/distilbart-cnn-12-6 (35 tok):
R1	p: 10.00 	r: 28.57 	f: 14.81
R2	p: 0.00 	r: 0.00 	f: 0.00
RL	p: 10.00 	r: 28.57 	f: 14.81
ST	55.92		W2V	4.68

 Obesity is associated with hypertension, tachycardia, left ventricular hypertrophy, increased
collagen deposition, reduced cardiac contractility and increased end-diastolic pressure.

sshleifer/distilbart-cnn-12-6 (39 tok):
R1	p: 8.00 	r: 28.57 	f: 12.50
R2	p: 0.00 	r: 0.00 	f: 0.00
RL	p: 8.00 	r: 28.57 	f: 12.50
ST	54.88		W2V	12.87

 Obesity is associated with hypertension, tachycardia, left ventricular hypertrophy and i

sshleifer/distilbart-cnn-12-6 (72 tok):
R1	p: 51.72 	r: 50.00 	f: 50.85
R2	p: 19.30 	r: 18.64 	f: 18.97
RL	p: 31.03 	r: 30.00 	f: 30.51
ST	71.90		W2V	49.29

 PD Med study is a multicenter real-life pragmatic study, reporting on quality of life-related
outcomes in both young and old patients with Parkinson's disease. In younger patients, the issues of
neuroprotection and dyskinesias should be considered. Initial treatment with a MAOB inhibitor
appeared to be at least as effective as treatment with a dopamine agonist.

sshleifer/distilbart-cnn-12-6 (72 tok):
R1	p: 51.72 	r: 50.00 	f: 50.85
R2	p: 19.30 	r: 18.64 	f: 18.97
RL	p: 29.31 	r: 28.33 	f: 28.81
ST	72.29		W2V	49.29

 PD Med study is a multicenter real-life pragmatic study, reporting on quality of life-related
outcomes in both young and old patients with Parkinson's disease. Initial treatment with a MAOB
inhibitor appeared to be at least as effective as treatment with a dopamine agonist. In younger
patients, the issues of neuroprot

sshleifer/distilbart-cnn-12-6 (81 tok):
R1	p: 41.18 	r: 75.00 	f: 53.16
R2	p: 22.00 	r: 40.74 	f: 28.57
RL	p: 37.25 	r: 67.86 	f: 48.10
ST	78.04		W2V	68.87

 Gallbladder carcinoma is the fifth most common gastrointestinal (GI) cancer in the USA. Incidence
and mortality are very high in certain Latin American countries, especially Chile. Inflammation may
occur as a result of gallstones, gallbladder polyps, chronic Salmonella infection, congenital
biliary cysts, abnormal pancreaticobiliary duct junction, carcinogen exposure and certain drugs.

sshleifer/distilbart-cnn-12-6 (81 tok):
R1	p: 41.18 	r: 75.00 	f: 53.16
R2	p: 22.00 	r: 40.74 	f: 28.57
RL	p: 37.25 	r: 67.86 	f: 48.10
ST	77.84		W2V	68.84

 Gallbladder carcinoma is the fifth most common gastrointestinal (GI) cancer in the USA.
Inflammation may occur as a result of gallstones, gallbladder polyps, chronic Salmonella infection,
congenital biliary cysts, abnormal pancreaticobiliary duct junction, carcinogen exposure and certain
drugs

#### Summarize Test

In [19]:
def summarize(model_name_or_path, df, batch_size):
    model, tokenizer = import_model_tok(model_name_or_path)
    model = model.to(device)

    input_ids = tokenizer(df.text.tolist(), return_tensors='pt', padding=True).input_ids
    input_ids = input_ids.split(batch_size)

    summs = []

    pbar = tqdm(total=len(input_ids), 
                position=0,
                leave=True,
                file=sys.stdout)
    for batch in input_ids:

        summ_enc = model.generate(
            batch.to(device),
            min_length = config.ONE_BULLET_MIN_LEN,
            max_length = config.ONE_BULLET_MAX_LEN,
            length_penalty = config.LENGTH_PENALTY,
            num_beams = config.NUM_BEAMS,
            no_repeat_ngram_size = config.NO_REPEAT_NGRAM_SIZE,
            early_stopping = True)
        summ = tokenizer.batch_decode(summ_enc, skip_special_tokens=True)
        summs += summ

        pbar.update(1)
    pbar.close()

    df['summary'] = summs
    
    df.to_csv(OUTPUT_PATH+model_name_or_path.replace('/', '?')+'.csv')
    
    return df

In [20]:
summarize('sshleifer/distilbart-cnn-12-6', df_test, 10)

100%|██████████| 25/25 [17:07<00:00, 41.10s/it]


Unnamed: 0_level_0,Unnamed: 1_level_0,bullets,text,num_bulls,summary
book,chapter,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
9781910797631,chp6,The treatment rather than the prevention of de...,Prevention and management. By far the most res...,2319,Preventive strategies may be directed toward ...
9781910797631,chp6,"For milder forms of depression, self-help, rel...",The forms of psychological therapy best suppor...,2319,The forms of psychological therapy best suppo...
9781910797631,chp6,There are a range of treatments that may be ef...,The effectiveness of individual treatments is ...,2319,Current evidence suggests that better outcome...
9781910797631,chp6,Selective serotonin-reuptake inhibitor antidep...,Monitoring use of antidepressants. For people ...,2319,Many people discontinue their antidepressant ...
9781910797631,chp6,"Depression is complex, and understanding and h...",Suicidal ideas and acts may be increased in th...,2319,Suicidal ideas and acts may be increased in t...
...,...,...,...,...,...
9781908541680,ch_4,Obesity significantly affects cardiometabolic ...,"Overweight/obesity as a whole predisposes to, ...",2556,"Obesity is associated with hypertension, tach..."
9781908541680,ch_4,Lipid disorders and hypertension are exacerbat...,"Overweight/obesity as a whole predisposes to, ...",2556,"Obesity is associated with hypertension, tach..."
9781908541680,ch_4,Weight loss rapidly and effectively reduces ca...,"Overweight/obesity as a whole predisposes to, ...",2556,"Obesity is associated with hypertension, tach..."
9781908541680,ch_4,The metabolic syndrome defines the clustering ...,'Metabolic syndrome' describes the clustering ...,2556,The International Diabetes Federation updated...


In [21]:
summarize(
    magma_dir+'fine-tuning/ft_bart_para_wordembed/checkpoint-196',
    df_test, 10)

100%|██████████| 25/25 [13:17<00:00, 31.89s/it]


Unnamed: 0_level_0,Unnamed: 1_level_0,bullets,text,num_bulls,summary
book,chapter,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
9781910797631,chp6,The treatment rather than the prevention of de...,Prevention and management. By far the most res...,2319,Prevention and management of depression is a k...
9781910797631,chp6,"For milder forms of depression, self-help, rel...",The forms of psychological therapy best suppor...,2319,Psychological therapy is the best supported by...
9781910797631,chp6,There are a range of treatments that may be ef...,The effectiveness of individual treatments is ...,2319,The development and evaluation of ways of deli...
9781910797631,chp6,Selective serotonin-reuptake inhibitor antidep...,Monitoring use of antidepressants. For people ...,2319,Many people discontinue their antidepressant t...
9781910797631,chp6,"Depression is complex, and understanding and h...",Suicidal ideas and acts may be increased in th...,2319,Suicidal ideas and acts may be increased in th...
...,...,...,...,...,...
9781908541680,ch_4,Obesity significantly affects cardiometabolic ...,"Overweight/obesity as a whole predisposes to, ...",2556,"Overweight/obesity predisposes to, or is assoc..."
9781908541680,ch_4,Lipid disorders and hypertension are exacerbat...,"Overweight/obesity as a whole predisposes to, ...",2556,"Overweight/obesity predisposes to, or is assoc..."
9781908541680,ch_4,Weight loss rapidly and effectively reduces ca...,"Overweight/obesity as a whole predisposes to, ...",2556,"Overweight/obesity predisposes to, or is assoc..."
9781908541680,ch_4,The metabolic syndrome defines the clustering ...,'Metabolic syndrome' describes the clustering ...,2556,'Metabolic syndrome' describes the clustering ...


### **Evaluation**

#### Evaluate summaries bullet by bullet

In [23]:
def evaluate_model_bullet_by_bullet(model_name_or_path):
    df_eval = pd.read_csv(OUTPUT_PATH+model_name_or_path.replace('/', '?')+\
        '.csv').set_index(['book', 'chapter'])
    
    rouge_res =\
        df_eval[['bullets', 'summary']]\
        .apply(lambda row:
        metric.compute(
            predictions = [row[1]],
            references = [row[0]],
            rouge_types = config.ROUGE_TYPES,
            use_agregator = False), axis=1)
    for r in config.ROUGE_TYPES:
        for i, prf in enumerate(['precision', 'recall', 'fmeasure']):
            df_eval[r+'_'+prf] =\
                rouge_res.map(lambda score: 100*score[r][0][i])
    
    cosine_sim = lambda a, b: (np.dot(a, b) / (np.linalg.norm(a)*np.linalg.norm(b)))
    df_eval['st_cosine_sim'] =\
        df_eval[['bullets', 'summary']]\
        .apply(lambda row:
        100*cosine_sim(
            sentence_distilroberta.encode(row[1]),
            sentence_distilroberta.encode(row[0])), axis=1)
    
    def cosine_sim_w2v(s, b):
        s = gensim.utils.simple_preprocess(s, deacc=True)
        b = gensim.utils.simple_preprocess(b, deacc=True)
        corpus = [s, b]
        w2v = gensim.models.Word2Vec(
            corpus,
            min_count=1,
            sg=1,
            seed = config.SEED)
        s_embed = np.mean([w2v.wv[word] for word in s], axis=0)
        b_embed = np.mean([w2v.wv[word] for word in b], axis=0)
        return cosine_sim(s_embed, b_embed)
        
    df_eval['w2v_cosine_sim'] =\
        df_eval[['bullets', 'summary']]\
        .apply(lambda row:
        100*cosine_sim_w2v(row[1], row[0]), axis=1)
            
    df_eval.to_csv(OUTPUT_PATH+model_name_or_path.replace('/', '?')+'_bullet_by_bullet.csv')
    
    return df_eval

In [25]:
evaluate_model_bullet_by_bullet('sshleifer/distilbart-cnn-12-6')

evaluate_model_bullet_by_bullet(
    magma_dir+'fine-tuning/ft_bart_para_wordembed/checkpoint-196')

Unnamed: 0_level_0,Unnamed: 1_level_0,bullets,text,num_bulls,summary,rouge1_precision,rouge1_recall,rouge1_fmeasure,rouge2_precision,rouge2_recall,rouge2_fmeasure,rougeL_precision,rougeL_recall,rougeL_fmeasure,st_cosine_sim,w2v_cosine_sim
book,chapter,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
9781910797631,chp6,The treatment rather than the prevention of de...,Prevention and management. By far the most res...,2319,Prevention and management of depression is a k...,56.250000,21.428571,31.034483,13.333333,4.878049,7.142857,37.500000,14.285714,20.689655,59.528840,57.677156
9781910797631,chp6,"For milder forms of depression, self-help, rel...",The forms of psychological therapy best suppor...,2319,Psychological therapy is the best supported by...,31.428571,22.916667,26.506024,8.823529,6.382979,7.407407,22.857143,16.666667,19.277108,60.618716,59.286785
9781910797631,chp6,There are a range of treatments that may be ef...,The effectiveness of individual treatments is ...,2319,The development and evaluation of ways of deli...,50.000000,25.000000,33.333333,13.333333,6.451613,8.695652,25.000000,12.500000,16.666667,48.931929,51.530373
9781910797631,chp6,Selective serotonin-reuptake inhibitor antidep...,Monitoring use of antidepressants. For people ...,2319,Many people discontinue their antidepressant t...,16.666667,12.000000,13.953488,0.000000,0.000000,0.000000,11.111111,8.000000,9.302326,51.113927,31.571856
9781910797631,chp6,"Depression is complex, and understanding and h...",Suicidal ideas and acts may be increased in th...,2319,Suicidal ideas and acts may be increased in th...,21.428571,12.000000,15.384615,0.000000,0.000000,0.000000,14.285714,8.000000,10.256410,28.403780,34.359947
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9781908541680,ch_4,Obesity significantly affects cardiometabolic ...,"Overweight/obesity as a whole predisposes to, ...",2556,"Overweight/obesity predisposes to, or is assoc...",8.333333,28.571429,12.903226,0.000000,0.000000,0.000000,8.333333,28.571429,12.903226,53.125530,15.103170
9781908541680,ch_4,Lipid disorders and hypertension are exacerbat...,"Overweight/obesity as a whole predisposes to, ...",2556,"Overweight/obesity predisposes to, or is assoc...",12.500000,23.076923,16.216216,0.000000,0.000000,0.000000,8.333333,15.384615,10.810811,60.557866,29.030201
9781908541680,ch_4,Weight loss rapidly and effectively reduces ca...,"Overweight/obesity as a whole predisposes to, ...",2556,"Overweight/obesity predisposes to, or is assoc...",16.666667,22.222222,19.047619,4.347826,5.882353,5.000000,16.666667,22.222222,19.047619,58.118296,37.115124
9781908541680,ch_4,The metabolic syndrome defines the clustering ...,'Metabolic syndrome' describes the clustering ...,2556,'Metabolic syndrome' describes the clustering ...,40.909091,60.000000,48.648649,19.047619,28.571429,22.857143,36.363636,53.333333,43.243243,81.833273,47.363037


#### Evaluate summaries grouping bullets

In [26]:
def evaluate_model_grouping_bullets(model_name_or_path):
    df_eval = pd.read_csv(OUTPUT_PATH+model_name_or_path.replace('/', '?')+\
        '.csv').set_index(['book', 'chapter'])
    
    df_eval = df_eval.groupby(['book', 'chapter'], sort=False).agg({
        'text': lambda t: ' '.join(list(t)),
        'bullets': lambda b: ' '.join(list(b)),
        'summary': lambda s: ' '.join(list(s))})
    
    rouge_res =\
        df_eval[['bullets', 'summary']]\
        .apply(lambda row:
        metric.compute(
            predictions = [row[1]],
            references = [row[0]],
            rouge_types = config.ROUGE_TYPES,
            use_agregator = False), axis=1)
    for r in config.ROUGE_TYPES:
        for i, prf in enumerate(['precision', 'recall', 'fmeasure']):
            df_eval[r+'_'+prf] =\
                rouge_res.map(lambda score: 100*score[r][0][i])
            
    cosine_sim = lambda a, b: (np.dot(a, b) / (np.linalg.norm(a)*np.linalg.norm(b)))
    df_eval['st_cosine_sim'] =\
    df_eval[['bullets', 'summary']]\
        .apply(lambda row:
        100*cosine_sim(
            sentence_distilroberta.encode(row[1]),
            sentence_distilroberta.encode(row[0])), axis=1)
    
    def cosine_sim_w2v(s, b):
        s = gensim.utils.simple_preprocess(s, deacc=True)
        b = gensim.utils.simple_preprocess(b, deacc=True)
        corpus = [s, b]
        w2v = gensim.models.Word2Vec(
            corpus,
            min_count=1,
            sg=1,
            seed = config.SEED)
        s_embed = np.mean([w2v.wv[word] for word in s], axis=0)
        b_embed = np.mean([w2v.wv[word] for word in b], axis=0)
        return cosine_sim(s_embed, b_embed)
        
    df_eval['w2v_cosine_sim'] =\
        df_eval[['bullets', 'summary']]\
        .apply(lambda row:
        100*cosine_sim_w2v(row[1], row[0]), axis=1)
            
    df_eval.to_csv(OUTPUT_PATH+model_name_or_path.replace('/', '?')+'_grouped.csv')
    
    return df_eval

In [27]:
evaluate_model_grouping_bullets('sshleifer/distilbart-cnn-12-6')

evaluate_model_grouping_bullets(
    magma_dir+'fine-tuning/ft_bart_para_wordembed/checkpoint-196')

Unnamed: 0_level_0,Unnamed: 1_level_0,text,bullets,summary,rouge1_precision,rouge1_recall,rouge1_fmeasure,rouge2_precision,rouge2_recall,rouge2_fmeasure,rougeL_precision,rougeL_recall,rougeL_fmeasure,st_cosine_sim,w2v_cosine_sim
book,chapter,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
9781910797631,chp6,Prevention and management. By far the most res...,The treatment rather than the prevention of de...,Prevention and management of depression is a k...,47.474747,27.325581,34.686347,12.244898,7.017544,8.921933,26.262626,15.116279,19.188192,80.113024,88.008338
9781910797426,ch03,Epidemiology and risk factors. Gallstones are ...,The prevalence of gallstones is greater in peo...,Gallstones are a significant cause of morbidit...,50.37037,53.125,51.711027,18.656716,19.685039,19.157088,31.111111,32.8125,31.939163,77.22038,89.696306
9781908541277,ch_12,Nearly 1 in 3 patients reports significant con...,Depression and anxiety are common in patients ...,Depression is the most prevalent psychiatric c...,59.42029,55.033557,57.142857,33.576642,31.081081,32.280702,47.826087,44.295302,45.993031,83.549666,94.057035
9781910797907,chp6,When to transfuse. There is no set standard wi...,The decision for transfusion therapy relates t...,There is no set standard with regard to transf...,50.0,48.091603,49.027237,18.4,17.692308,18.039216,28.571429,27.480916,28.015564,83.413237,87.753701
9781908541277,ch_8,Modern antiepileptic drugs. After a hiatus of ...,Sixteen new antiepileptic drugs (AEDs) have be...,A number of new antiepileptic drugs have been ...,35.9375,44.230769,39.655172,11.111111,13.72549,12.280702,28.125,34.615385,31.034483,70.776069,55.478108
9781910797006,ch08,Acute exacerbations of chronic obstructive pul...,Acute exacerbations of COPD are common and pla...,The healthcare burden of chronic obstructive p...,31.851852,48.314607,38.392857,13.432836,20.454545,16.216216,19.259259,29.213483,23.214286,83.612615,71.803659
9781910797105,ch06,Morphological findings. MRI studies confirmed ...,People with schizophrenia exhibit gray matter ...,People with schizophrenia exhibit decreased vo...,50.420168,58.252427,54.054054,29.661017,34.313725,31.818182,41.176471,47.572816,44.144144,82.696056,90.841591
9783318068207,hh-5,The recent improvements in terms of available ...,Comprehensive genomic profiles offer a complet...,The use of broad NGS gene panels may support t...,39.175258,31.147541,34.703196,4.166667,3.305785,3.686636,21.649485,17.213115,19.178082,72.29085,76.270086
9781910797105,ch04,Studies in twins involve probands with schizop...,Identical twins show a 45% concordance rate. H...,The concordance rates in schizophrenia are 45%...,40.298507,38.028169,39.130435,9.090909,8.571429,8.823529,22.38806,21.126761,21.73913,82.743853,55.53695
9781910797006,ch03,The characteristic symptom of COPD is breathle...,Usually (in 80% of patients) there is a signif...,The characteristic symptom of COPD is breathle...,48.648649,41.618497,44.859813,20.408163,17.44186,18.808777,26.351351,22.543353,24.299065,77.38958,89.411503


#### Plot evaluation bullet by bullet

In [None]:
plot_evaluation_bullet_by_bullet('sshleifer/distilbart-cnn-12-6')

In [None]:
plot_evaluation_bullet_by_bullet(
    magma_dir+'fine-tuning/sshleifer?distilbart-cnn-12-6_karger_books_para_wordembed_train/')

In [None]:
plot_evaluation_bullet_by_bullet(
    magma_dir+'fine-tuning/sshleifer?distilbart-cnn-12-6_karger_books_para_wordembed_no_freeze_train/')

#### Plot evaluation grouping bullets

In [None]:
plot_evaluation_grouping_bullets('sshleifer/distilbart-cnn-12-6')

In [None]:
plot_evaluation_grouping_bullets(
    magma_dir+'fine-tuning/sshleifer?distilbart-cnn-12-6_karger_books_para_wordembed_train/')

In [None]:
plot_evaluation_grouping_bullets(
    magma_dir+'fine-tuning/sshleifer?distilbart-cnn-12-6_karger_books_para_wordembed_no_freeze_train/')