<a href="https://colab.research.google.com/github/kayodeleakinwale/GenAI_Works/blob/main/AY_Transformer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
'''
Choose an LLM from transformers and select a tokenizer
'''

from transformers import AutoTokenizer

checkpoint = "google-t5/t5-small"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

In [7]:
'''
Def function to apply the tokenizer and summarization tool to input text
'''

from transformers import T5ForConditionalGeneration

model = T5ForConditionalGeneration.from_pretrained(checkpoint)

def summarize_text(text):
  """Summarizes the input text using the loaded T5 model.

  Args:
    text: The input text to summarize.

  Returns:
    The summarized text.
  """
  inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
  summary_ids = model.generate(inputs["input_ids"], num_beams=2, min_length=0, max_length=200)
  summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True)[0]
  return summary

# Example usage:
# input_text = "Your long text goes here..."
# summary = summarize_text(input_text)
# print(summary)

In [8]:
'''
Test the summarization tool with an example text input
'''


#input_text = "I am a winner in the name of Jesus. It is a pleasure to have Jesus in your life when everything fails, Jesus never fail."
input_text = "US intelligence agencies had been warning since February that Israel was likely to attempt to strike facilities key to Iran’s nuclear program this year. Israeli Prime Minister Benjamin Netanyahu has repeatedly pushed for a military option to stop Iran’s nuclear program. Recent US intelligence reports say that Israel is seeking to capitalize on the destruction inflicted after it bombed Iran’s missile production facilities and air defenses in October. And overall, Israel is also still pursuing the broader goal of causing regime change in Iran, one such intelligence report said. Israel sees Iran as an existential threat: For years, Israel has said it has the most to lose if Iran, which refuses to recognize Israel’s right to exist, develops a nuclear bomb. Israel and Iran have been fighting a shadow war through proxies and covert actions for decades. Tehran has supported regional armed groups that have engaged in direct conflict with Israel, such as Hezbollah in Lebanon, Hamas in Gaza, and the Houthis in Yemen. Since Israel’s war in Gaza began, attacks by Iran-backed proxy groups have escalated in solidarity with the Palestinians. Netanyahu is in hot water domestically: Opposition to a new military conscription bill has sparked political deadlock, and Israel’s parliament, the Knesset, had met for talks about dissolving itself – which could have ultimately led to early elections that polls show Netanyahu would lose."
summary = summarize_text(input_text)
print(summary)

Israeli prime minister Benjamin Netanyahu has repeatedly pushed for a military option to stop Iran’s nuclear program. recent US intelligence reports say that Israel is likely to attempt to strike facilities key to Iran’s nuclear program this year.


In [9]:

'''
Apply summarization tool to website
Use BeautifulSoup to extract text from the website
'''

!pip install beautifulsoup4 requests

import requests
from bs4 import BeautifulSoup

def get_article_text_from_url(url):
  """Fetches the text content from an article URL.

  Args:
    url: The URL of the article.

  Returns:
    The text content of the article as a string, or None if an error occurs.
  """
  try:
    response = requests.get(url)
    response.raise_for_status()  # Raise an exception for bad status codes

    soup = BeautifulSoup(response.content, 'html.parser')

    # Attempt to find the main article content. This might need adjustment
    # based on the structure of the website you are scraping.
    # Common tags include <article>, <main>, or specific divs with class names
    # like 'article-body', 'content', etc.
    # This is a basic example, you might need more specific selectors.
    article_element = soup.find('article') or soup.find('main') or soup.find('div', class_='article-body')

    if article_element:
        paragraphs = article_element.find_all('p')
        article_text = ' '.join([p.get_text() for p in paragraphs])
        return article_text
    else:
        # If no specific article element is found, try to get all paragraph text
        paragraphs = soup.find_all('p')
        article_text = ' '.join([p.get_text() for p in paragraphs])
        if article_text:
            print("Warning: Specific article element not found. Extracting all paragraph text.")
            return article_text
        else:
            print(f"Could not extract text from the URL: {url}")
            return None

  except requests.exceptions.RequestException as e:
    print(f"Error fetching URL: {e}")
    return None
  except Exception as e:
    print(f"An unexpected error occurred: {e}")
    return None

# Example usage with a URL:
article_url = "https://www.cnn.com/world/live-news/israel-iran-strikes-news-06-12-25-hnk-intl"  # Replace with an actual article URL
article_text = get_article_text_from_url(article_url)

if article_text:
  print("--- Original Article Text ---")
  print(article_text[:500] + "...") # Print a snippet of the text
  print("\n--- Summarized Text ---")
  summary = summarize_text(article_text)
  print(summary)
else:
  print("Could not retrieve article text to summarize.")


--- Original Article Text ---

• Iran and Israel exchange strikes: There have been more explosions tonight in Tehran and Tel Aviv as the conflict between the Middle East foes escalates following Israel’s unprecedented attack early Friday on Iranian nuclear and military targets.
     
• Casualties on both sides: Two people have been killed in Israel and dozens injured by Iranian strikes, Israeli authorities said. Iran reported at least 78 people killed in Israeli strikes, including senior military officials, and 320 wounded, ...

--- Summarized Text ---
Iran and Israel exchange strikes: There have been more explosions tonight in Tehran and Tel Aviv as the conflict between the Middle East foes escalates following Israel’s unprecedented attack early Friday on Iranian nuclear and military targets. Iran reported at least 78 people killed in Israel and dozens injured by Iranian strikes, including senior military officials, and 320 wounded, mostly civilians.


In [10]:
'''
Apply tool to input file
prompt user to upload file ... use simple txt file.
'''

import ipywidgets as widgets
from IPython.display import display
from io import StringIO
from google.colab import files
from transformers import AutoTokenizer
from transformers import T5ForConditionalGeneration

# Assuming checkpoint and model are already defined and loaded from the preceding code
# checkpoint = "google-t5/t5-small"
# tokenizer = AutoTokenizer.from_pretrained(checkpoint)
# model = T5ForConditionalGeneration.from_pretrained(checkpoint)

def summarize_text(text):
  """Summarizes the input text using the loaded T5 model.

  Args:
    text: The input text to summarize.

  Returns:
    The summarized text.
  """
  # Ensure text is not empty
  if not text or not text.strip():
      return "No text found in the document to summarize."

  inputs = tokenizer(text, return_tensors="pt", max_length=5000, truncation=True)
  summary_ids = model.generate(inputs["input_ids"], num_beams=2, min_length=0, max_length=200)
  summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True)[0]
  return summary

# Function to handle the file upload and summarization
def upload_and_summarize(change):
  uploaded_files = files.upload()
  for filename, content in uploaded_files.items():
    print(f'Uploaded file: {filename}')
    try:
      # Decode the content (assuming it's text)
      text_content = content.decode('utf-8')
      print("\n--- Original Document Content (first 500 chars) ---")
      print(text_content[:500] + "...")
      print("\n--- Summarized Document Content ---")
      summarized_content = summarize_text(text_content)
      print(summarized_content)
    except Exception as e:
      print(f"Error processing file {filename}: {e}")

# Create a button to trigger the file upload
upload_button = widgets.Button(description="Upload Document for Summarization")
upload_button.on_click(upload_and_summarize)

# Display the button
display(upload_button)



Button(description='Upload Document for Summarization', style=ButtonStyle())