## Baselines for dialogues summarization

In [None]:
! pip install rouge_score
! pip install evaluate
! pip install nltk

import random

import nltk
nltk.download("punkt")
from nltk.tokenize import sent_tokenize

import pandas as pd

import evaluate
import rouge_score

rouge_score = evaluate.load("rouge")

In [None]:
from tqdm.auto import tqdm

Test dataset loading

In [None]:
test_data = pd.read_csv('ami_test_1024.csv')
test_texts = test_data['text'].values
test_summaries_extractive = test_data['extractive'].values
test_summaries_abstractive = test_data['abstract'].values

Rouge scoring

In [None]:
def evaluate_rouge(predictions, golden):
  metrics = rouge_score.compute(predictions=predictions, references=golden)
  rouge_names = ["rouge1", "rouge2", "rougeL", "rougeLsum"]
  rouge_dict = dict((rn, round(metrics[rn] * 100, 2)) for rn in rouge_names)
  return(rouge_dict)

# Abstractive summarization baselines

#### TL:DR

In [None]:
! pip install transformers
! pip install datasets

import re
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from torch.utils.data import DataLoader
import torch

In [None]:
device = torch.device("cuda:0")

In [None]:
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2").to(device)

To use gpt2 model for summarization task we can add TL;DR: tag at the end of the input sequence:

In [None]:
test_texts_tldr = [t + ' TL;DR: ' for t in test_texts]

In [None]:
def extract_tldr(text, taget):
    index = text.find(taget)

    return text[index + len(taget):].strip()

Generate results with top k approach

In [None]:
import re
def top_k(text, model, max_length, top_k=0, temperature=None, target='TL;DR:'):
   text = tokenizer(text, return_tensors="pt")
   X = text["input_ids"].to(device)
   a = text["attention_mask"].to(device)
   output = model.generate(X, attention_mask=a, max_length=1024, do_sample=True, top_k=top_k, no_repeat_ngram_size=3, temperature=temperature)
   output = tokenizer.decode(output[0])
   output = extract_tldr(output, target)
   return ''.join(sent_tokenize(output)[:1]) # take only first sentence, since abstactive summaries from AMI present one-sentence headline

In [None]:
print(top_k(test_texts_tldr[0], model, 1024, 2, 0.5, 'TL;DR:'))

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


The more buttons you can make a trendier design.


In [None]:
tldr_summaries = [top_k(t,model, 1024, 2, 0.5, 'TL;DR:') for t in tqdm(test_texts_tldr)]

In [None]:
rouges_tldr = evaluate_rouge(tldr_summaries, test_summaries_abstractive)

In [None]:
rouges_tldr

{'rouge1': 11.63, 'rouge2': 1.44, 'rougeL': 9.18, 'rougeLsum': 9.38}

#### TL;DR no hint

In [None]:
no_hint_summaries = [top_k(t,model, 1024, 2, 0.5, t) for t in tqdm(test_texts)]

In [None]:
no_hint_rouges = evaluate_rouge(no_hint_summaries, test_summaries_abstractive)

In [None]:
no_hint_rouges

{'rouge1': 6.69, 'rouge2': 0.35, 'rougeL': 5.47, 'rougeLsum': 5.49}

# Extractive dialogue summarization baselines

#### LEAD-3 baseline

In [None]:
def three_sentence_summary(text):
    return "\n".join(sent_tokenize(text)[:3])

In [None]:
lead_3_summaries = [three_sentence_summary(t) for t in test_texts]

In [None]:
rouges_lead3 = evaluate_rouge(lead_3_summaries, test_summaries_extractive)

In [None]:
rouges_lead3

{'rouge1': 27.8, 'rouge2': 21.24, 'rougeL': 24.57, 'rougeLsum': 27.32}

#### Random-3 baseline

In [None]:
random.seed(101)

In [None]:
def random_three_sentence_summary(text):
    return "\n".join(random.sample(sent_tokenize(text), 3))

In [None]:
three_random_summaries = [random_three_sentence_summary(t) for t in test_texts]

In [None]:
rouges_random3 = evaluate_rouge(three_random_summaries, test_summaries_extractive)

In [None]:
rouges_random3

{'rouge1': 24.69, 'rouge2': 14.35, 'rougeL': 19.09, 'rougeLsum': 23.22}

#### TL-DR-3

In [None]:
def top_k_extractive(text, model, max_length, top_k=0, temperature=None, target='TL;DR:'):
   text = tokenizer(text, return_tensors="pt")
   X = text["input_ids"].to(device)
   a = text["attention_mask"].to(device)
   output = model.generate(X, attention_mask=a, max_length=1024, do_sample=True, top_k=top_k, no_repeat_ngram_size=3, temperature=temperature)
   output = tokenizer.decode(output[0])
   output = extract_tldr(output, target)
   return '\n'.join(sent_tokenize(output)[:3]) # for extractive summaries take first 3 sentences

In [None]:
tldr_summaries_extractive = [top_k_extractive(t,model, 1024, 2, 0.5, 'TL;DR:') for t in tqdm(test_texts_tldr)]

In [None]:
tldr_extractive_rouges = evaluate_rouge(tldr_summaries_extractive, test_summaries_extractive)

In [None]:
tldr_extractive_rouges

{'rouge1': 22.8, 'rouge2': 4.72, 'rougeL': 14.96, 'rougeLsum': 21.65}

#### TL-DR-3 no hint Extractive

In [None]:
nohint_summaries_extractive = [top_k_extractive(t,model, 1024, 2, 0.5, t) for t in tqdm(test_texts_tldr)]

In [None]:
nohint_extractive_rouges = evaluate_rouge(nohint_summaries_extractive, test_summaries_extractive)

In [None]:
nohint_extractive_rouges

{'rouge1': 18.97, 'rouge2': 3.51, 'rougeL': 12.0, 'rougeLsum': 17.69}