In [1]:
# -*- coding: utf-8 -*-
"""
Text Summarization Tool

Deliverable:
A Python script (this file) showcasing input text and concise summaries
using Natural Language Processing techniques.
"""

# Import necessary libraries
from transformers import pipeline
import torch
from typing import Optional, List

def summarize_text(text: str,
                   max_length: int = 200,
                   min_length: int = 50,
                   model_name: str = "facebook/bart-large-cnn",
                   include_title: bool = True,
                   add_disclaimer: bool = True) -> str:
    """
    Summarizes the input text using the Hugging Face Transformers library.

    Args:
        text (str): The text to be summarized.
        max_length (int, optional): The maximum length of the summary. Defaults to 200.
        min_length (int, optional): The minimum length of the summary. Defaults to 50.
        model_name (str, optional): The name of the pre-trained model to use.
            Defaults to "facebook/bart-large-cnn".
        include_title (bool, optional): Whether to include a title in the summary. Defaults to True.
        add_disclaimer (bool, optional): Whether to add a disclaimer. Defaults to True.

    Returns:
        str: The summarized text. Returns an error message on failure.
    """
    try:
        # Check if the input text is empty or None
        if not text:
            return "Error: Input text is empty."

        # Determine the device to use (GPU if available, otherwise CPU)
        device = "cuda" if torch.cuda.is_available() else "cpu"

        # Create a summarization pipeline using the specified model.
        summarizer = pipeline("summarization", model=model_name, device=device)

        # Generate the summary
        summary_result = summarizer(text, max_length=max_length, min_length=min_length)
        summary_text = summary_result[0]['summary_text']

        # Add a title
        if include_title:
            title = "Summary: "
            summary_text = title + summary_text

        # Add a disclaimer
        if add_disclaimer:
            disclaimer = "\n\nDisclaimer: This summary was generated by an AI.  For critical information, verification is recommended."
            summary_text += disclaimer

        return summary_text

    except Exception as e:
        # Handle potential errors robustly
        error_message = f"An error occurred during summarization: {e}"
        print(error_message)  # Print to console for debugging in Colab
        return "Error: Unable to generate summary."



def main():
    """
    Main function to demonstrate the text summarization tool.
    """
    # Example article text
    article_text = """
    The Tower of London is a historic castle located in central London, England. It lies on the north bank of the River Thames.
    It was founded towards the end of 1066 as part of the Norman Conquest of England. The White Tower, which gives the
    whole castle its name, was built by William the Conqueror in 1078, and was a resented symbol of Norman domination.
    The castle was used as a royal residence, an armoury, a menagerie, a prison, and an execution site.

    The Tower has played a prominent role in English history. It was besieged several times, and controlling it was
    important to controlling the country. The Tower has served variously as an armoury, a treasury, a menagerie,
    the home of the Royal Mint, a public record office, and the home of the Crown Jewels of England.

    The Tower of London is a complex of several buildings set within two concentric rings of defensive walls and a moat.
    There were several phases of expansion, mainly under Kings Richard I, Henry III, and Edward I in the 12th and 13th centuries.
    The general layout established by the end of the 13th century has remained despite later changes.

    The Tower of London is a popular tourist attraction. It is a UNESCO World Heritage Site.
    """

    # Print the original article
    print("Original Article Text:")
    print(article_text)
    print("\n")

    # Summarize the text
    summary = summarize_text(article_text)

    # Print the summary
    print("Summary:")
    print(summary)
    print("\n")

    # Example of handling empty input
    empty_text_summary = summarize_text("")
    print("Summary of empty text:")
    print(empty_text_summary)
    print("\n")

    # Example of very short text.
    short_text = "This is a short sentence."
    short_text_summary = summarize_text(short_text, max_length=50, min_length=5, add_disclaimer=False) # Adjusted lengths for short text
    print("Summary of short text:")
    print(short_text_summary)
    print("\n")

    # Example with title and disclaimer turned off
    summary_no_title_disclaimer = summarize_text(article_text, include_title=False, add_disclaimer=False)
    print("Summary without title/disclaimer:")
    print(summary_no_title_disclaimer)
    print("\n")

    # Example with a different model
    summary_different_model = summarize_text(article_text, model_name="google/pegasus-large")
    print("Summary with a different model (google/pegasus-large):")
    print(summary_different_model)
    print("\n")


if __name__ == "__main__":
    main()


Original Article Text:

    The Tower of London is a historic castle located in central London, England. It lies on the north bank of the River Thames.
    It was founded towards the end of 1066 as part of the Norman Conquest of England. The White Tower, which gives the
    whole castle its name, was built by William the Conqueror in 1078, and was a resented symbol of Norman domination.
    The castle was used as a royal residence, an armoury, a menagerie, a prison, and an execution site.

    The Tower has played a prominent role in English history. It was besieged several times, and controlling it was
    important to controlling the country. The Tower has served variously as an armoury, a treasury, a menagerie,
    the home of the Royal Mint, a public record office, and the home of the Crown Jewels of England.

    The Tower of London is a complex of several buildings set within two concentric rings of defensive walls and a moat.
    There were several phases of expansion, mainly un

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cpu


Summary:
Summary: The Tower of London is a historic castle located in central London, England. It was founded towards the end of 1066 as part of the Norman Conquest of England. The Tower has served variously as an armoury, a treasury, a menagerie, a prison, and an execution site.

Disclaimer: This summary was generated by an AI.  For critical information, verification is recommended.


Summary of empty text:
Error: Input text is empty.




Device set to use cpu
Your max_length is set to 50, but your input_length is only 8. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=4)


Summary of short text:
Summary: This is a short sentence. This is not a short story. It's just a sentence.




Device set to use cpu


Summary without title/disclaimer:
The Tower of London is a historic castle located in central London, England. It was founded towards the end of 1066 as part of the Norman Conquest of England. The Tower has served variously as an armoury, a treasury, a menagerie, a prison, and an execution site.




config.json:   0%|          | 0.00/3.09k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-large and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


generation_config.json:   0%|          | 0.00/260 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/88.0 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

Device set to use cpu


Summary with a different model (google/pegasus-large):
Summary: The castle was used as a royal residence, an armoury, a menagerie, a prison, and an execution site. The Tower has served variously as an armoury, a treasury, a menagerie, the home of the Royal Mint, a public record office, and the home of the Crown Jewels of England.

Disclaimer: This summary was generated by an AI.  For critical information, verification is recommended.


