In [None]:
!pip install -U langchain-community



In [None]:
!pip install lxml[html_clean]
!pip install transformers duckduckgo-search newspaper3k
!pip install -U duckduckgo-search requests

In [None]:
import os
from google.colab import userdata
from duckduckgo_search import DDGS
from newspaper import Article
from transformers import pipeline

# Set up Hugging Face API token from Colab secrets
# Make sure you have added your Hugging Face API token to Colab's Secrets Manager as HF_API_TOKEN
# This helps prevent the UserWarning from huggingface_hub during model loading
try:
    hf_token = userdata.get("HF_API_TOKEN")
    if hf_token is not None:
        os.environ["HF_API_TOKEN"] = hf_token
        print("Hugging Face API token successfully loaded from Colab secrets.")
    else:
        print("Hugging Face API token not found in Colab secrets. Proceeding without authentication (for public models).")
except Exception as e:
    print(f"Error accessing Hugging Face API token from Colab secrets: {e}")
    print("Proceeding without authentication.")


# Load summarization model from Hugging Face Transformers
# Using 'facebook/bart-large-cnn', a pre-trained summarization model
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

# Function to search DuckDuckGo for news articles
def get_news_links(query, max_results=5):
    # Use DuckDuckGo Search to find relevant links
    with DDGS() as ddgs:
        # Perform a text search for the query, limiting the number of results
        results = [r for r in ddgs.text(query, max_results=max_results)]
    # Return a list of the URLs from the search results
    return [r["href"] for r in results]

# Function to extract the full text content from a news article URL
def extract_text(url):
    try:
        # Initialize and download the article content
        article = Article(url)
        article.download()
        # Parse the downloaded article to extract text and metadata
        article.parse()
        # Return the extracted article text
        return article.text
    except:
        # If an error occurs during download or parsing, return an empty string
        return ""

# Function to summarize news based on a given topic
def summarize_news(topic):
    # Get news links related to the topic (added site:news filter back)
    links = get_news_links(f"{topic} site:news")
    summaries = []

    # Iterate through each link
    for link in links:
        # Extract text from the article URL
        text = extract_text(link)
        # Summarize only if text is available and reasonably long (more than 100 words)
        if text and len(text.split()) > 100:
            # Truncate text for summarization if it's too long for the model's context window (max 1024 tokens for BART)
            # Use the loaded summarizer pipeline
            # Increased max_length and min_length for longer summaries based on user request
            # The warnings about input_length being less than max_length are just suggestions and can be ignored
            summary = summarizer(text[:1024], max_length=200, min_length=50, do_sample=False)[0]['summary_text']
            summaries.append(f"🔗 {link}\n📝 {summary}\n")

    return "\n\n".join(summaries) if summaries else "No good articles found."

# Example usage (commented out):
# print(summarize_news("AI in Healthcare"))

Hugging Face API token successfully loaded from Colab secrets.


Device set to use cpu


In [None]:
print(summarize_news("International students"))

  with DDGS() as ddgs:
Your max_length is set to 200, but your input_length is only 150. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=75)


🔗 https://www.internationalstudent.com/
📝 Study Centers offer advice for students that want to study in the USA, the UK, and Australia, and for US students studying abroad. Each center offers information about the country, the culture, the climate, etc., but will also provide you with detailed information on the education system.


🔗 https://www.npr.org/2025/06/07/nx-s1-5423535/international-students-college-data-breakdown
📝 The White House says it's taking these actions due to national security, crime and civil rights concerns. It's just the latest in a string of immigration actions that have placed international students in the government's crosshairs. The president's action also calls on the secretary of state to review visas issued to other foreign nationals.


🔗 https://www.usnews.com/education/best-colleges/articles/what-international-students-in-the-u-s-need-to-know-risks-rights-and-resources
📝 The speed and reach of the Trump administration's efforts to deport international stu

In [None]:
import gradio as gr

# Create the Gradio interface
# It takes text input (the news topic) and outputs text (the summarized news)
# The title is set to "News Summarizer Agent (Free)" as per your earlier input
# The function used is the summarize_news function defined in the previous cell
gr.Interface(fn=summarize_news, inputs="text", outputs="text", title="Search and Summarize").launch(share=True)

  from websockets.server import WebSocketServerProtocol


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://d9c013d5bb3a579bfd.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


