In [None]:
! pip install transformers
! pip install datasets
! pip install rouge_score
! pip install evaluate

In [3]:
import re
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from torch.utils.data import DataLoader
import torch

from torch.utils.data import Dataset
import pandas as pd

from torch.optim import Adam
from tqdm.auto import tqdm

import evaluate
import rouge_score

rouge_score = evaluate.load("rouge")

Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

In [None]:
device = torch.device("cuda:0")

In [None]:
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2_abstractive_extractive").to(device)

tokenizer.add_special_tokens({"pad_token": "<pad>",
                              "bos_token": "<startofstring>",
                              "eos_token": "<endofstring>"
                             })
# separate token for summary
tokenizer.add_tokens(["<abstractive>"])
tokenizer.add_tokens(["<extractive>"])

model.resize_token_embeddings(len(tokenizer))

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Embedding(50262, 768)

# Rouge evaluation

In [None]:
import pandas as pd

In [None]:
test = pd.read_csv('ami_test_1024.csv')
test_texts = test['text'].values
test_summaries_abstractive = test['abstract'].values # set golden column
test_summaries_extractive = test['extractive'].values # set golden column


In [None]:
len(test_summaries_abstractive)

30

In [None]:
def evaluate_rouge(predictions, golden):
  metrics = rouge_score.compute(predictions=predictions, references=golden)
  rouge_names = ["rouge1", "rouge2", "rougeL", "rougeLsum"]
  rouge_dict = dict((rn, round(metrics[rn] * 100, 2)) for rn in rouge_names)
  return(rouge_dict)

In [None]:
all_predictions_abstractive = {}
all_predictions_extractive = {}


In [None]:
all_rouge_abstractive = {}
all_rouge_extractive = {}


In [None]:
def extract_strings(input_string):
    start_tag1 = "<abstractive>"
    end_tag1 = "<extractive>"
    start_tag2 = "<extractive>"
    end_tag2 = " <"

    # Find the start and end positions of the tags
    start_pos1 = input_string.find(start_tag1) + len(start_tag1)
    end_pos1 = input_string.find(end_tag1)
    start_pos2 = input_string.find(start_tag2, end_pos1) + len(start_tag2)
    end_pos2 = input_string.find(end_tag2, start_pos2)

    # Extract the strings between the tags
    text1 = input_string[start_pos1:end_pos1].strip()
    text2 = input_string[start_pos2:end_pos2].strip()

    return [text1, text2]

In [4]:
def add_results(predictions, key):
  abstractive = []
  extractive = []

  for p in predictions:
    abstractive.append(p[0])
    extractive.append(p[1])

  rouge_a = evaluate_rouge(abstractive, test_summaries_abstractive)
  rouge_e = evaluate_rouge(extractive, test_summaries_extractive)

  all_predictions_abstractive[key] = abstractive
  all_predictions_extractive[key] = extractive

  all_rouge_abstractive[key] = rouge_a
  all_rouge_extractive[key] = rouge_e

  print(f'abstr: {rouge_a}')
  print(f'extr: {rouge_e}')

# Generation methods

# Greedy

In [None]:
def greedy_search(text, model, max_length, no_repeat_ngram_size=3):
   text = "<startofstring> " + text + " <abstractive> "
   text = tokenizer(text, return_tensors="pt")
   X = text["input_ids"].to(device)
   a = text["attention_mask"].to(device)
   # beam search generation
   output = model.generate(X, attention_mask=a, max_length=1024, no_repeat_ngram_size=no_repeat_ngram_size, early_stopping=True)
   output = tokenizer.decode(output[0])
   output = extract_strings(output)
   return output

## Beam

In [None]:
def beam_search(text, model, max_length, num_beams, no_repeat_ngram_size=3):
   text = "<startofstring> " + text + " <abstractive> "
   text = tokenizer(text, return_tensors="pt")
   X = text["input_ids"].to(device)
   a = text["attention_mask"].to(device)
   # beam search generation
   output = model.generate(X, attention_mask=a, max_length=1024, num_beams = num_beams, no_repeat_ngram_size=no_repeat_ngram_size, early_stopping=True)
   output = tokenizer.decode(output[0])
   output = extract_strings(output)
   return output

## Top-k

In [None]:
def top_k(text, model, max_length, top_k=0, temperature=None, no_repeat_ngram_size=3):
   text = "<startofstring> " + text + " <abstractive> "
   text = tokenizer(text, return_tensors="pt")
   X = text["input_ids"].to(device)
   a = text["attention_mask"].to(device)
   # beam search generation
   output = model.generate(X, attention_mask=a, max_length=1024, do_sample=True, top_k=top_k, temperature=temperature, no_repeat_ngram_size=no_repeat_ngram_size, early_stopping=True)
   output = tokenizer.decode(output[0])
   output = extract_strings(output)
   return output

## Contrasive

In [None]:
def contrasive_search(text, model, penalty_alpha=0.6, top_k=2, no_repeat_ngram_size=3):
   text = "<startofstring> " + text + " <abstractive> "
   text = tokenizer(text, return_tensors="pt")
   X = text["input_ids"].to(device)
   a = text["attention_mask"].to(device)
   # beam search generation
   output = model.generate(X, attention_mask=a, max_length=1024,  penalty_alpha=penalty_alpha, top_k=top_k, no_repeat_ngram_size=no_repeat_ngram_size, early_stopping=True)
   output = tokenizer.decode(output[0])
   output = extract_strings(output)
   return output

## Getting predictions

In [None]:
test_summaries_abstractive = test['abstract'].values # set golden column
test_summaries_extractive = test['extractive'].values # set golden column

In [None]:
greedy_predictions = [greedy_search(t, model, 1024, 3) for t in tqdm(test_texts)]

In [None]:
add_results(greedy_predictions, 'greedy')

abstr: {'rouge1': 28.38, 'rouge2': 11.95, 'rougeL': 25.13, 'rougeLsum': 25.38}
extr: {'rouge1': 15.92, 'rouge2': 4.05, 'rougeL': 11.79, 'rougeLsum': 15.19}


In [None]:
beam_2_predictions = [beam_search(t, model, 1024, 2, 3) for t in tqdm(test_texts)]

In [None]:
add_results(beam_2_predictions, 'beam_2')

abstr: {'rouge1': 29.5, 'rouge2': 10.55, 'rougeL': 25.54, 'rougeLsum': 25.52}
extr: {'rouge1': 27.92, 'rouge2': 8.7, 'rougeL': 18.18, 'rougeLsum': 27.41}


In [None]:
beam_3_predictions = [beam_search(t, model, 1024, 3, 3) for t in tqdm(test_texts)]

In [None]:
add_results(beam_3_predictions, 'beam_3')

In [None]:
beam_4_predictions = [beam_search(t, model, 1024, 4, 3) for t in tqdm(test_texts)]

In [None]:
add_results(beam_4_predictions, 'beam_4')

In [None]:
beam_5_predictions = [beam_search(t, model, 1024, 5, 3) for t in tqdm(test_texts)]

In [None]:
add_results(beam_5_predictions, 'beam_5')

abstr: {'rouge1': 30.17, 'rouge2': 12.05, 'rougeL': 24.64, 'rougeLsum': 24.71}
extr: {'rouge1': 23.6, 'rouge2': 6.09, 'rougeL': 14.45, 'rougeLsum': 22.59}


In [None]:
topk_2_07_predictions = [top_k(t, model, 1024, 2, 0.7, 3) for t in tqdm(test_texts)]

In [None]:
add_results(topk_2_07_predictions, 'topk_2_07')

abstr: {'rouge1': 28.14, 'rouge2': 11.2, 'rougeL': 25.15, 'rougeLsum': 25.77}
extr: {'rouge1': 29.45, 'rouge2': 12.47, 'rougeL': 21.5, 'rougeLsum': 28.05}


In [None]:
topk_2_05_predictions = [top_k(t, model, 1024, 2, 0.5, 3) for t in tqdm(test_texts)]

In [None]:
add_results(topk_2_05_predictions, 'topk_2_05')

abstr: {'rouge1': 30.72, 'rouge2': 11.79, 'rougeL': 27.93, 'rougeLsum': 27.78}
extr: {'rouge1': 31.87, 'rouge2': 13.69, 'rougeL': 22.1, 'rougeLsum': 30.8}


In [None]:
topk_2_02_predictions = [top_k(t, model, 1024, 2, 0.2, 3) for t in tqdm(test_texts)]

In [None]:
add_results(topk_2_02_predictions, 'topk_2_02')

abstr: {'rouge1': 27.38, 'rouge2': 9.37, 'rougeL': 23.42, 'rougeLsum': 23.64}
extr: {'rouge1': 26.48, 'rouge2': 10.88, 'rougeL': 19.93, 'rougeLsum': 25.62}


In [None]:
contrasive_06_02_predictions = [contrasive_search(t, model, 0.6, 2, 3) for t in tqdm(test_texts)]

In [None]:
add_results(contrasive_06_02_predictions, 'contr_06_02')

abstr: {'rouge1': 29.86, 'rouge2': 11.09, 'rougeL': 26.43, 'rougeLsum': 26.08}
extr: {'rouge1': 27.94, 'rouge2': 10.79, 'rougeL': 20.06, 'rougeLsum': 26.76}


## Save results to pickle files

Saving abstractive

In [None]:
import pickle


with open('ae1024_preds_a.pickle', 'wb') as handle:
    pickle.dump(all_predictions_abstractive, handle)

with open('ae1024_preds_a.pickle', 'rb') as handle:
    b = pickle.load(handle)


True


In [None]:
with open('ae1024_rouge_a.pickle', 'wb') as handle:
    pickle.dump(all_rouge_abstractive, handle)

with open('ae1024_rouge_a.pickle', 'rb') as handle:
    b = pickle.load(handle)

print(all_rouge_abstractive == b)

True


Saving extractive

In [None]:
with open('ae1024_preds_e.pickle', 'wb') as handle:
    pickle.dump(all_predictions_extractive, handle)

with open('ae1024_preds_e.pickle', 'rb') as handle:
    b = pickle.load(handle)

print(all_predictions_extractive == b)

True


In [None]:
with open('ae1024_rouge_e.pickle', 'wb') as handle:
    pickle.dump(all_rouge_extractive, handle)

with open('ae1024_rouge_e.pickle', 'rb') as handle:
    b = pickle.load(handle)

print(all_rouge_extractive == b)

True
