### Exploration of the dataset and the early stage modeling

In [1]:
## import libraries
import os
import sys
import time
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datasets import Dataset, DatasetDict, load_dataset
import evaluate
from transformers import BartTokenizer, BartForConditionalGeneration, pipeline, DataCollatorForSeq2Seq
from transformers import Seq2SeqTrainingArguments, Seq2SeqTrainer

import warnings
warnings.filterwarnings("ignore")

  from .autonotebook import tqdm as notebook_tqdm
  torch.utils._pytree._register_pytree_node(


In [2]:
## import dataset from huggingface
dataset = load_dataset("cnn_dailymail", "3.0.0")
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['article', 'highlights', 'id'],
        num_rows: 287113
    })
    validation: Dataset({
        features: ['article', 'highlights', 'id'],
        num_rows: 13368
    })
    test: Dataset({
        features: ['article', 'highlights', 'id'],
        num_rows: 11490
    })
})


In [3]:
## Summarization of one sample
sample = dataset['test'][5]
print("ARTICLE:\n", sample['article'])
print("\nHIGHLIGHTS:\n", sample['highlights'])


ARTICLE:
 (CNN)He's a blue chip college basketball recruit. She's a high school freshman with Down syndrome. At first glance Trey Moses and Ellie Meredith couldn't be more different. But all that changed Thursday when Trey asked Ellie to be his prom date. Trey -- a star on Eastern High School's basketball team in Louisville, Kentucky, who's headed to play college ball next year at Ball State -- was originally going to take his girlfriend to Eastern's prom. So why is he taking Ellie instead? "She's great... she listens and she's easy to talk to" he said. Trey made the prom-posal (yes, that's what they are calling invites to prom these days) in the gym during Ellie's P.E. class. Trina Helson, a teacher at Eastern, alerted the school's newspaper staff to the prom-posal and posted photos of Trey and Ellie on Twitter that have gone viral. She wasn't surpristed by Trey's actions. "That's the kind of person Trey is," she said. To help make sure she said yes, Trey entered the gym armed with fl

### Load Pre-trained BART Model and Tokenizer


In [4]:
model_name = "facebook/bart-large-cnn"
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name)

In [5]:
## use GPU if available
model = model.to("cuda" if torch.cuda.is_available() else "cpu")

### Summarize a News Article

In [6]:
def summarize_text(text, max_input=1024, max_output=150):
    """
    Summarize the input text using the BART model. 
    Args:
        text (str): The input text to summarize.
        max_input (int): The maximum length of the input text.
        max_output (int): The maximum length of the output summary.
    Returns:
        str: The generated summary.
    """
    inputs = tokenizer.encode(text, return_tensors="pt", max_length=max_input, truncation=True) # tokenizes the input text into numerical IDs, returns a PyTorch tensor.
    inputs = inputs.to(model.device) # move the input tensor to the same device as the model

    summary_ids = model.generate( 
        inputs,
        max_length=max_output,
        min_length=40,
        length_penalty=2.0,
        num_beams=4,
        early_stopping=True
    ) 
    # The model generates a summary based on the input tensor, with specified parameters for length and beam search.
    return tokenizer.decode(summary_ids[0], skip_special_tokens=True) # The generated summary IDs are then decoded back into human-readable text using the tokenizer.


In [7]:
## Test the summarization function
article = sample['article'] # The article text to summarize
summary = summarize_text(article) # The generated summary of the article

print("MODEL SUMMARY:\n", summary)
print("\nREFERENCE SUMMARY:\n", sample['highlights'])


MODEL SUMMARY:
 Trey Moses asked Ellie Meredith, a freshman with Down syndrome, to be his prom date. Trey made the prom-posal in the gym during Ellie's P.E. class. "She's great... she listens and she's easy to talk to," he said.

REFERENCE SUMMARY:
 College-bound basketball star asks girl with Down syndrome to high school prom .
Pictures of the two during the "prom-posal" have gone viral .


In [8]:
## Evaluate ROUGE Score

rogue = evaluate.load("rouge")
rogue_score = rogue.compute(predictions=[summary], references=[sample['highlights']], use_stemmer=True)
print("ROUGE-1 Score:", rogue_score['rouge1'])
print("ROUGE-2 Score:", rogue_score['rouge2'])
print("ROUGE-L Score:", rogue_score['rougeL'])

# rouge = evaluate.load("rouge")
# results = rouge.compute(predictions=[summary], references=[sample['highlights']])
# print("ROUGE Evaluation:", results)


ROUGE-1 Score: 0.3283582089552239
ROUGE-2 Score: 0.15384615384615383
ROUGE-L Score: 0.26865671641791045
