<a href="https://colab.research.google.com/github/parisa-kavian/Xsum-FlanT5/blob/main/xsum_flanT5_large.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install langchain-community langchain transformers datasets rouge bert-score

# 1. Import Libraries and Load Model

In [None]:
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
from datasets import load_dataset
import pandas as pd

# Load the model and tokenizer
model_name = 'google/flan-t5-large'
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# 2. Load Dataset

In [None]:
dataset = load_dataset('xsum', trust_remote_code=True)
test_dataset = dataset['test']
test_df = test_dataset.to_pandas()

# Display the first few rows to verify
print("Dataset loaded successfully. Here are the first 5 rows:")
test_df.head()

# 3. Generate Summaries

In [None]:
# Create a text generation pipeline
summarizer = pipeline('text2text-generation', model=model, tokenizer=tokenizer, device=0) # Use device=0 for GPU

# Define a function to generate summary for a single text
def generate_summary(text):
    # The pipeline returns a list of dictionaries
    summary_output = summarizer(text, max_length=150, min_length=30, do_sample=False)
    return summary_output[0]['generated_text']

# Take a smaller sample for faster processing (e.g., first 25 rows)
num_samples = 25
sample_df = test_df.head(num_samples).copy() # Use .copy() to avoid SettingWithCopyWarning

# Apply the function to the 'document' column
# This will take some time to run
print(f"Generating summaries for the first {num_samples} articles...")
sample_df['model_generated'] = sample_df['document'].apply(generate_summary)

print("Summaries generated successfully!")
sample_df[['summary', 'model_generated']].head()

# 4. Evaluation Metrics (Rouge, Bleu, BERTScore)

In [None]:
from rouge import Rouge
from nltk.translate.bleu_score import corpus_bleu
from bert_score import score

# Prepare the generated and reference summaries
generated_summaries = sample_df['model_generated'].tolist()
reference_summaries = sample_df['summary'].tolist()

# --- ROUGE Score ---
rouge = Rouge()
rouge_scores = rouge.get_scores(generated_summaries, reference_summaries, avg=True)
print("--- ROUGE Scores ---")
print(rouge_scores)

# --- BLEU Score ---
# BLEU score requires tokenized text, so we split strings into lists of words
reference_bleu = [[text.split()] for text in reference_summaries]
generated_bleu = [text.split() for text in generated_summaries]
bleu_score = corpus_bleu(reference_bleu, generated_bleu)
print("\n--- BLEU Score ---")
print(f"BLEU Score: {bleu_score}")

# --- BERTScore ---
P, R, F1 = score(generated_summaries, reference_summaries, lang="en", verbose=True)
print("\n--- BERTScore ---")
print(f"BERT Precision: {P.mean().item():.4f}")
print(f"BERT Recall: {R.mean().item():.4f}")
print(f"BERT F1 Score: {F1.mean().item():.4f}")