In [3]:
!pip install torch transformers



In [4]:
!pip install sentencepiece



In [5]:
!pip install requests



In [6]:
!pip install --upgrade ipywidgets



In [7]:
import ipywidgets as widgets 
widgets.IntSlider()

IntSlider(value=0)

In [16]:
import torch
import requests
import logging
from transformers import BartTokenizer, BartForConditionalGeneration

# Suppress unnecessary Hugging Face warnings
logging.getLogger("transformers").setLevel(logging.ERROR)

# Step 1: Fetch News Articles from API
API_KEY = "2f4d7b2a3e2a4601bda2e6c690b9b3d4"  # Replace with your actual key
url = f"https://newsapi.org/v2/top-headlines?country=us&apiKey={API_KEY}"

response = requests.get(url)
data = response.json()

if data["status"] == "ok":
    articles = data["articles"]
    article_text = articles[0]['content'] if articles else "No news articles found."
else:
    article_text = "No news articles found."

# Ensure text length is within model limits
article_text = " ".join(article_text.split()[:150])  # Keep first 150 words

# Optimize for CPU
torch.set_num_threads(2)  # Prevent excessive CPU usage

device = "cpu"  # Force CPU execution

# Use BART for better abstractive summarization
tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")
model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn").to(device)
model.eval()  # Ensure model runs in inference mode

def summarize_text(text, max_length=60, min_length=20):
    """Summarizes input text using a BART model."""
    input_text = "summarize: " + text
    inputs = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True).to(device)
    
    print("Tokenized Input Length:", len(inputs[0]))  # Debugging input length
    
    with torch.no_grad():  # Disable gradients for faster inference
        summary_ids = model.generate(
            inputs,
            max_length=max_length,
            min_length=min_length,
            num_beams=5,  # Use beam search for better quality
            length_penalty=2.0,  # Encourage more abstraction
            early_stopping=True,
            do_sample=True,  # Enable sampling for diversity
            top_k=50,  # Sample from top 50 possible words
            top_p=0.95  # Nucleus sampling
        )
    
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

# Step 3: Summarize and Display Results
print("Original Article:")
print(article_text)

print("\nGenerated Summary:")
print(summarize_text(article_text))


model.safetensors:  85%|########4 | 1.37G/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

Original Article:
Users attempting to add their Signal.me URL to posts, DMs, and bio descriptions are being met with error messages. Users attempting to add their Signal.me URL to posts, DMs, and bio descriptions are… [+2131 chars]

Generated Summary:
Tokenized Input Length: 56
Users attempting to add their Signal.me URL to posts, DMs, and bio descriptions are being met with error messages.
