Importing Libraries

In [None]:
from transformers import T5Tokenizer, T5ForConditionalGeneration

import glob
import pprint

pp = pprint.PrettyPrinter()

Connecting Google Drive

In [None]:
#Mount Google Drive in Colab

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#Setting a Directory for Cache in Google Drive

import os

cache_dir = '/content/drive/MyDrive/transformers_cache'
os.makedirs(cache_dir, exist_ok=True)

Loading T5 Model and Tokenizer and chaching them in Google Drive

In [None]:
from transformers import T5Tokenizer, T5ForConditionalGeneration

tokenizer = T5Tokenizer.from_pretrained('t5-base', cache_dir=cache_dir)
model = T5ForConditionalGeneration.from_pretrained('t5-base', cache_dir=cache_dir)

Defining a summarize_text function

In [None]:
def summarize_text(text, model, tokenizer, min_length=50, max_length=150, num_beams=5):
    # Preprocess the text
    inputs = tokenizer.encode(
        "summarize: " + text,
        return_tensors='pt',
        max_length=512,  # Keep this high to handle long inputs
        truncation=True
    )

    # Generate the summary
    summary_ids = model.generate(
        inputs,
        min_length=min_length,            # Minimum length to avoid too short a summary
        max_length=max_length,            # Adjust this to control summary length
        num_beams=num_beams,
        length_penalty=2.0,               # Higher values encourage longer summaries
        no_repeat_ngram_size=3,           # Prevent repetitive phrases
        early_stopping=True               # Stops generating when the model thinks it's done
    )

    # Decode the summary
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

    # Calculate the word count of the summary
    word_count = len(summary.split())

    return summary, word_count

Calling the function and generating a summary

In [None]:
text = """
The global smartphone market experienced a significant shift in 2023,
with total shipments reaching 1.21 billion units, a decline of 7.1% compared to the previous year.
This was primarily due to supply chain disruptions, inflationary pressures, and geopolitical tensions.
In contrast, Apple's market share grew from 23.4% in 2022 to 26.5% in 2023, driven by strong demand for the iPhone 14 series,
particularly in North America and Europe. Meanwhile, Samsung's market share slightly decreased from 19.1% to 18.7%,
despite launching the Galaxy S23 and Galaxy Z Fold 5. Xiaomi retained its third position globally,
with an 11.8% market share, but also saw a 10% drop in shipments, largely due to a sluggish market in India.
Additionally, 5G-enabled devices accounted for 70% of total smartphone shipments, up from 57% in 2022,
reflecting increased consumer demand for faster mobile internet speeds. Analysts predict a rebound in 2024,
with smartphone shipments expected to grow by 4.3%, as supply chain constraints ease and
new innovations such as foldable screens and AI-powered features become more mainstream.
"""

# Call the function and generate the summary
summary = summarize_text(text, model, tokenizer)

# Print the summary
print("Summary:")
print(summary)

Summary:
('the global smartphone market experienced a significant shift in 2023. total shipments reached 1.21 billion units, a decline of 7.1% compared to the previous year. analysts predict a rebound in 2024, with smartphone shipments expected to grow by 4.3%.', 39)
