In [2]:
import torch
from transformers import T5ForConditionalGeneration, T5Tokenizer
import re
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.


True

In [3]:
def install_packages():
    try:
        import torch
        import transformers
    except ImportError:
        import subprocess
        import sys
        subprocess.check_call([sys.executable, "-m", "pip", "install", "torch", "transformers"])
        import torch
        import transformers

install_packages()

In [4]:
def load_model(model_name="t5-small"):
    try:
        tokenizer = T5Tokenizer.from_pretrained(model_name)
        model = T5ForConditionalGeneration.from_pretrained(model_name)
        return tokenizer, model
    except Exception as e:
        print(f"Failed to load model: {e}")
        return None, None

In [13]:
def correct_text_format(text):
    text = text.strip()
    sentences = sent_tokenize(text)
    corrected_sentences = []
    for sentence in sentences:
        words = word_tokenize(sentence)
        if words:
            words[0] = words[0].capitalize()
        corrected_sentences.append(" ".join(words))
    return " ".join(corrected_sentences)

In [14]:
def summarize_text(text, tokenizer, model, max_length=200, min_length=50):
    if not text.strip():
        return "Error: Input text is empty."
    
    text = correct_text_format(text)
    input_ids = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=512, truncation=True)
    summary_ids = model.generate(input_ids, max_length=max_length, min_length=min_length, length_penalty=2.0, num_beams=5, early_stopping=True)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return correct_text_format(summary)

In [15]:
def summarize_batch(texts, tokenizer, model):
    return [summarize_text(correct_text_format(text), tokenizer, model) for text in texts]

In [16]:
tokenizer, model = load_model()

In [17]:
if tokenizer and model:
    user_input = input("Enter text to summarize: ")
    formatted_input = correct_text_format(user_input)
    summary_result = summarize_text(formatted_input, tokenizer, model)
    print("\nGenerated Summary:", summary_result)


Enter text to summarize: The Aztec people are originally from the central part of Mexico. They were most known for their political power and the use of the war drums in battle they posed the Mesoamerica in the 15th and 16th centuries (Skidmore & Smith pp 28). Today they are found in the island of Lake Tex coco and are renowned by their Aztec triple alliance or the Aztec empire.  This was conquered in 1521 by the Spanish hence the fall of the empire (Skidmore & Smith pp 29). Their musical power was due to their strong and complex religious (with more than 100 gods) stands, as well as the great architecture and art work.  Songs and poetry were highly regarded to a point of competing on which were the best in this field. Their music also had some incorporation of the African music from the African slaves as well as the Salvadoran music from Mexico.  Their songs included religious songs for their gods in praises for them, an encouragement to fellow Latin friends to be committed to the reli