In [29]:
!pip install transformers datasets rouge-score -q

In [30]:
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration
from datasets import load_dataset
from rouge_score import rouge_scorer

In [31]:
def load_model(model_name):
  tokenizer = T5Tokenizer.from_pretrained(model_name)
  model = T5ForConditionalGeneration.from_pretrained(model_name)
  return model, tokenizer

In [33]:
dataset = load_dataset("cnn_dailymail", "3.0.0", split="test")
texts = dataset['article'][6:11]
references = dataset['highlights'][6:11]

In [34]:
def summarize(text, model, tokenizer):
    input_ids = tokenizer.encode(text, return_tensors="pt", max_length=512, truncation=True)
    with torch.no_grad():
        output = model.generate(input_ids, max_length=150, num_beams=4, early_stopping=True)
    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    return summary


In [35]:
def calculate_rouge(summaries, references,model_name):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = { 'rouge1': 0, 'rouge2': 0, 'rougeL': 0 }
    i=0
    for summary, reference in zip(summaries, references):
        i+=1

        score = scorer.score(reference, summary)
        print(f'rouge1 score of summary{i}:{score["rouge1"].fmeasure}')
        print(f'rouge2 score of summary{i}:{score["rouge2"].fmeasure}')
        print(f'rougeL score of summary{i}:{score["rougeL"].fmeasure}')
        scores['rouge1'] += score['rouge1'].fmeasure
        scores['rouge2'] += score['rouge2'].fmeasure
        scores['rougeL'] += score['rougeL'].fmeasure
        ws.cell(row=i+1, column=4, value=scores['rouge1'])
        ws.cell(row=i+1, column=5, value=scores['rouge2'])
        ws.cell(row=i+1, column=6, value=scores['rougeL'])

    num_samples = len(summaries)
    scores = { k: v / num_samples for k, v in scores.items() }
    for j, val in enumerate(scores.values()):
        ws.cell(row=i+2, column=j+4, value=val)
    return scores


In [36]:
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

def calculate_bleu(summaries, references, model_name):

    bleu_scores = []
    smoothie = SmoothingFunction().method4
    i=0
    for summary, reference in zip(summaries, references):
        i+=1
        reference_tokens = [reference.split()]
        summary_tokens = summary.split()

        score = sentence_bleu(reference_tokens, summary_tokens,weights=(0.5,0.5,0,0) ,smoothing_function=smoothie)
        bleu_scores.append(score)
        print(f"BLEU score of summary{i}: {score}")
        ws.cell(row=i+1, column=7, value=score)

    average_bleu = sum(bleu_scores) / len(bleu_scores)
    ws.cell(row=i+2, column=7, value=average_bleu)
    return average_bleu

In [37]:
def run_summarization(model_name):
  model, tokenizer = load_model(model_name)

  summaries = [summarize(text, model, tokenizer) for text in texts]

  for i, summary in enumerate(summaries):
    print(f"\nGenerated Summary {i+1} ({model_name}):\n", summary)
  column=['Text',"reference Summary","Summary","rouge 1","rouge 2","rouge L","Bleu score"]
  for i,val in enumerate(column):
      ws.cell(row=1, column=i+1, value=val)
  for i, text in enumerate(texts):
      ws.cell(row=i+2, column=1, value=text)
  for i, summary in enumerate(summaries):
      ws.cell(row=i+2, column=3, value=summary)
  for i, ref in enumerate(references):
      ws.cell(row=i+2, column=2, value=ref)

  rouge_scorers = calculate_rouge(summaries, references,model_name)
  print(f"\nROUGE Scores for {model_name}:", rouge_scorers)
  bleu_score = calculate_bleu(summaries, references,model_name)
  print(f"Average BLEU score: {bleu_score}")

In [38]:
!pip install openpyxl
import openpyxl
from openpyxl import Workbook , load_workbook



In [39]:
wb = wb = load_workbook('/content/drive/My Drive/summary_results.xlsx')

In [40]:
ws = wb.create_sheet(title="t5-small")

In [41]:
run_summarization("t5-small")


Generated Summary 1 (t5-small):
 the number of executions worldwide has gone down by almost 22% on the previous year. at least 607 people were executed around the world in 2014, compared to 778 in 2013. the number of executions worldwide has gone down by almost 22% on the previous year.

Generated Summary 2 (t5-small):
 : "A rise in my blood pressure places me in grave risk of substantial and irreparable injury or death" a coroner's preliminary assessment is there was no foul play involved in the death of Andrew Getty. the grandson of oil tycoon J. Paul Getty appears to have died of natural causes.

Generated Summary 3 (t5-small):
 5 p.m. (5 a.m. ET) and heading west at a 12.5 mph clip. it's expected to make landfall on the southeastern coast of Isabela province and be out of the Philippines by Monday.

Generated Summary 4 (t5-small):
 (CNN)For the first time in eight years, a TV legend returned to doing what he does best. contestants told to "come on down!" on the April 1 edition of 

In [42]:
ws = wb.create_sheet(title="t5-base")

In [43]:
run_summarization("t5-base")


Generated Summary 1 (t5-base):
 the threat of terrorism to advance executions, Amnesty International alleges in its annual report on the death penalty. on one hand, the number of executions worldwide has gone down by almost 22% on the previous year. at least 2,466 people were confirmed to have been sentenced to death in 2014, an increase of 28%. "We are seeing a decrease in the number of executions (worldwide)," Audrey Gaughran, Amnesty's Director of Global Issues, told CNN

Generated Summary 2 (t5-base):
 the coroner's preliminary assessment is there was no foul play involved in the death of Andrew Getty, a police spokesman said. Getty is one of three living sons of oil tycoon J. Paul Getty, who died in 1976. he spearheaded the controversial sale of Getty to Texaco for $10 billion in 1984.

Generated Summary 3 (t5-base):
 tourists will be advised to return to their respective places... and will be advised to return to their respective places," an official told PNA. Maysak is expected

In [44]:
ws = wb.create_sheet(title="t5-large")

In [45]:
run_summarization("t5-large")


Generated Summary 1 (t5-large):
 in the number of people sentenced to death last year. At least 2,466 people are confirmed to have been handed the sentence last year, an increase of 28% in the number of people sentenced to death. At least 2,466 people are confirmed to have been handed the sentence last year, an increase of 28% in the number of people sentenced to death. The report, released Wednesday, also notes that executions are increasing globally despite the decline.

Generated Summary 2 (t5-large):
 Getty, 47, had "several health issues," a police spokesman says. the coroner's preliminary assessment is there was no foul play involved, a police spokesman says. Andrew Getty had "several health issues," a police spokesman says.

Generated Summary 3 (t5-large):
 and be out of the country by Monday.. "It will be like a typhoon.". "We do not know what the impact will be once it will make landfall," Dry said.. (CNN) for flash floods and landslides. to be on guard for flash floods and l

In [46]:
wb.save('/content/drive/My Drive/summary_results.xlsx')