<a href="https://colab.research.google.com/github/nowshinJahan17/Text-Summarization/blob/Nowshin_Jahan/accuracy_metrics_with_webpage.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install necessary packages and resolve dependency conflicts
!pip install --upgrade datasets transformers
!pip install evaluate sacrebleu rouge_score huggingface_hub

# Import required libraries
import pandas as pd
import numpy as np
from transformers import pipeline
import nltk
from nltk.tokenize import word_tokenize
nltk.download('punkt')

# Import datasets and evaluation metrics
from datasets import load_dataset
from evaluate import load as load_metric

# Load the dataset
dataset = load_dataset("cnn_dailymail", "3.0.0")

# Define models and summarization pipelines
models = {
    "bart": "facebook/bart-large-cnn",
    "pegasus": "google/pegasus-cnn_dailymail",
    "t5": "t5-small"
}
summarizers = {
    name: pipeline("summarization", model=checkpoint, tokenizer=checkpoint, truncation=True)
    for name, checkpoint in models.items()
}

# Sample a subset of test data
test_sampled = dataset['test'].shuffle(seed=42).select(range(10))

# Generate summaries with dynamic length handling
summaries = {model_name: [] for model_name in models.keys()}
reference_summaries = []

for sample in test_sampled:
    article = sample['article']
    reference_summary = sample['highlights']
    reference_summaries.append(reference_summary)

    # Tokenize the article to check its length
    tokenized_article = word_tokenize(article)
    if len(tokenized_article) > 1024:
        # Truncate the article to the first 1024 tokens
        article = ' '.join(tokenized_article[:1024])

    for model_name, summarizer in summarizers.items():
        try:
            # Set truncation and adjust max_length and min_length
            summary = summarizer(
                article,
                max_length=150,  # Adjust as needed
                min_length=40,   # Ensure min_length < max_length
                truncation=True
            )[0]['summary_text']
            summaries[model_name].append(summary)
        except Exception as e:
            print(f"Error encountered for model {model_name}: {e}")
            summaries[model_name].append("")

# Define function to compute classification-style metrics
def classification_metrics(generated, reference):
    gen_words = set(word_tokenize(generated.lower()))
    ref_words = set(word_tokenize(reference.lower()))
    true_positive = len(gen_words & ref_words)
    false_positive = len(gen_words - ref_words)
    false_negative = len(ref_words - gen_words)
    precision = true_positive / (true_positive + false_positive) if (true_positive + false_positive) else 0
    recall = true_positive / (true_positive + false_negative) if (true_positive + false_negative) else 0
    f1 = (2 * precision * recall) / (precision + recall) if (precision + recall) else 0
    return precision, recall, f1

# Initialize metrics dictionary
metrics = {model_name: {'precision': [], 'recall': [], 'f1': []} for model_name in models.keys()}

# Calculate metrics for each model
for model_name, model_summaries in summaries.items():
    for gen_sum, ref_sum in zip(model_summaries, reference_summaries):
        precision, recall, f1 = classification_metrics(gen_sum, ref_sum)
        metrics[model_name]['precision'].append(precision)
        metrics[model_name]['recall'].append(recall)
        metrics[model_name]['f1'].append(f1)

# Compute average metrics
average_metrics = {
    model: {metric: np.mean(scores) for metric, scores in model_metrics.items()}
    for model, model_metrics in metrics.items()
}
df = pd.DataFrame.from_dict(average_metrics, orient='index')
print("Classification Metrics:\n", df)

# Calculate ROUGE and BLEU scores
rouge_metric = load_metric("rouge")
bleu_metric = load_metric("sacrebleu")
rouge_scores = {}
bleu_scores = {}

for model_name, model_summaries in summaries.items():
    rouge_metric.add_batch(predictions=model_summaries, references=reference_summaries)
    rouge_score = rouge_metric.compute()
    rouge_scores[model_name] = rouge_score

    bleu_metric.add_batch(predictions=model_summaries, references=[[ref] for ref in reference_summaries])
    bleu_score = bleu_metric.compute()
    bleu_scores[model_name] = bleu_score['score']

# Display ROUGE and BLEU scores
rouge_df = pd.DataFrame(rouge_scores).T
print("\nROUGE Scores:\n", rouge_df)

bleu_df = pd.DataFrame(bleu_scores, index=["BLEU Score"]).T
print("\nBLEU Scores:\n", bleu_df)


Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting transformers
  Downloading transformers-4.46.2-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.1/44.1 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Collecting tokenizers<0.21,>=0.20 (from transformers)
  Downloading tokenizers-0.20.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading datasets-3.1.0

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/15.6k [00:00<?, ?B/s]

train-00000-of-00003.parquet:   0%|          | 0.00/257M [00:00<?, ?B/s]

train-00001-of-00003.parquet:   0%|          | 0.00/257M [00:00<?, ?B/s]

train-00002-of-00003.parquet:   0%|          | 0.00/259M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/34.7M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/30.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/287113 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/13368 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/11490 [00:00<?, ? examples/s]

config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


config.json:   0%|          | 0.00/1.12k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-cnn_dailymail and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


generation_config.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/88.0 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Error encountered for model bart: index out of range in self
Error encountered for model bart: index out of range in self
Error encountered for model bart: index out of range in self
Error encountered for model bart: index out of range in self
Classification Metrics:
          precision    recall        f1
bart      0.250568  0.319942  0.279287
pegasus   0.366080  0.494426  0.412568
t5        0.360520  0.414289  0.379004


Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/8.15k [00:00<?, ?B/s]


ROUGE Scores:
            rouge1    rouge2    rougeL  rougeLsum
bart     0.265753  0.098383  0.165675   0.207899
pegasus  0.396335  0.181157  0.286899   0.343450
t5       0.353363  0.124031  0.227454   0.297865

BLEU Scores:
          BLEU Score
bart       9.116494
pegasus   10.359004
t5         7.771044


In [1]:
!pip install --upgrade transformers
!pip install gradio


Collecting transformers
  Downloading transformers-4.46.3-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.1/44.1 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
Downloading transformers-4.46.3-py3-none-any.whl (10.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.0/10.0 MB[0m [31m35.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.46.2
    Uninstalling transformers-4.46.2:
      Successfully uninstalled transformers-4.46.2
Successfully installed transformers-4.46.3
Collecting gradio
  Downloading gradio-5.6.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.5-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Down

In [2]:
from transformers import pipeline
import gradio as gr


In [3]:
# Load the Pegasus model
model_name = "google/pegasus-cnn_dailymail"
summarizer = pipeline("summarization", model=model_name, tokenizer=model_name, truncation=True)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.12k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-cnn_dailymail and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


generation_config.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/88.0 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


In [4]:
def summarize_text(article):
    # Generate the summary
    summary = summarizer(
        article,
        max_length=150,  # Adjust as needed
        min_length=50,   # Ensure min_length < max_length
        truncation=True
    )[0]['summary_text']
    return summary


In [6]:
interface = gr.Interface(
    fn=summarize_text,
    inputs=gr.Textbox(lines=15, placeholder="Enter the article text here..."),
    outputs="text",
    title="Pegasus Summarization",
    description="Enter text to summarize using the Pegasus model.",
    examples=[
        ["""In a shocking turn of events, the stock market experienced unprecedented volatility today as major indices plunged sharply. Economic analysts attribute the sudden drop to a combination of global trade tensions and uncertainty over upcoming fiscal policies. Investors are advised to exercise caution as the market adjusts to these developments."""]  # Example article
    ]
)

In [7]:
# Launch the app
interface.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://5ebf039376d269c9ce.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


