In [9]:
from transformers import (
        AutoTokenizer,
        AutoModelForSeq2SeqLM,
)


In [10]:
model_path = './models/flan-t5-small-xsum'
model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

In [11]:
def clean_text(example):
    cleaned_article = example["document"].replace('\n', ' ').strip()
    cleaned_summary = example["summary"].replace('\n', ' ').strip()
    return {
        "document": cleaned_article,
        "summary": cleaned_summary
    }


In [12]:
from datasets import load_from_disk
dataset = load_from_disk("./data/EdinburghNLP-xsum")["test"]

In [13]:
max_input_length = 512
max_target_length = 64

dataset = dataset.filter(lambda x: len(x['document']) < max_input_length)
dataset = dataset.map(clean_text)


In [15]:
dataset[0]

{'document': 'Officers searched properties in the Waterfront Park and Colonsay View areas of the city on Wednesday. Detectives said three firearms, ammunition and a five-figure sum of money were recovered. A 26-year-old man who was arrested and charged appeared at Edinburgh Sheriff Court on Thursday.',
 'summary': 'A man has appeared in court after firearms, ammunition and cash were seized by police in Edinburgh.',
 'id': '34227252'}

In [16]:
def print_summary(idx):
    example = dataset[idx]
    article = example['document']
    summary = example['summary']
    inputs = tokenizer(article, return_tensors="pt")
    outputs = model.generate(
        **inputs,
        max_length=128,
        num_beams=4,
        decoder_start_token_id = model.config.decoder_start_token_id
    )
    prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)

    print(f"'>>> Article: {article}'")
    print(f"\n'>>> Prediction: {prediction}'")
    print(f"\n'>>> Summary: {summary}'")


In [19]:
print_summary(117)

'>>> Article: The device was discovered on Foreglen Road on Wednesday morning following a telephone call to a local hospital. The road has reopened after a security alert in the area. The device was taken away for forensic examination. Det Insp Bob Blemmings said: "We appreciate that the alert caused traffic disruption and are grateful for the patience and support shown by the community and commuters."'

'>>> Prediction: A lorry has been reopened after a device was found on a road in Aberdeenshire.'

'>>> Summary: The PSNI have found a "viable" pipe bomb in Dungiven, County Londonderry.'
