In [None]:
import feedparser
import urllib.parse
import time
from datetime import datetime
import os
from dotenv import load_dotenv
from cerebras.cloud.sdk import Cerebras
from newspaper import Article
import json
import difflib

#  Only include items on/after Sept 25, 2025
start = datetime(2025, 9, 25, 0, 0, 0)

def get_news_feed(query: str, limit: int = 10, start_date: datetime = start):
    # Encode the query into a URL
    encoded_query = urllib.parse.quote(query)
    feed_url = f"https://news.google.com/rss/search?q={encoded_query}"
    
    feed = feedparser.parse(feed_url)
    length = len(feed.entries)

    output = ""
    output_dict = {}

    for i in range(length):
        entry = feed.entries[i]
        published = getattr(entry, "published", None)
        published_parsed = getattr(entry, "published_parsed", None)

        # Skip if no timestamp
        if not published_parsed:
            continue

        entry_date = datetime.fromtimestamp(time.mktime(published_parsed))

        # Filter: skip if before start_date
        if start_date and entry_date < start_date:
            continue

        title = entry.title
        link = entry.link

        output += f"{title} - {published if published else 'No timestamp'}\n\n"
        output_dict[title] = link

        if len(output_dict) >= limit:
            break

    return output_dict, output.strip()

In [None]:
load_dotenv()

api_key = os.getenv("API_KEY")

client = Cerebras(
  api_key=api_key,
)

start_messages = [
  {"role": "system", "content": """
  You are a helpful AI. You will be connected to an RSS feed based on the user's request. 
  Instead of being reactive, you will be proactive to the RSS feed and contact the user 
  when any item matches. If something might match, call the 'Mark' tool.
   
  There will not always be relevant items, so do not call the 'Mark' tool because you feel obligated.

  Workflow:
  1. User sends request.
  2. Use the 'Hook' tool to create EXACTLY 5 distinct searches. 
  3. You will receive the top 5 results per search. Use 'Mark' to flag relevant ones.
   
  Rules for searches:
  - No superficial variations. Do not create searches that only differ by one vague word 
    (e.g., "OpenAI research" vs. "OpenAI innovation").
  - Each search must represent a distinct angle of the request (e.g., policy, technical 
    breakthroughs, collaborations, controversies, societal impacts).
  - All searches must remain clearly relevant to the user request. Do not drift into 
    unrelated areas just to make them different.
  - Keep searches concise: 2-5 words each.
  - Prioritize recall. Err on the side of including items that might be relevant. 
    Avoid narrowing too much.

  Example user request: "I want to be notified if there's any news about governments 
  creating new regulations specifically for AI safety research."
  Three good searches:
  - "government AI safety regulation"
  - "policy frameworks for AI risk research"
  - "AI governance oversight research initiatives"

  These are all relevant, but capture different aspects of the request. 

  Aim to reduce false negatives at all costs. If an item has ANY possibility of being relevant, you must include it. ONLY remove the titles that are OBVIOUSLY irrelevant to the user's request.
  """},
  {"role": "user", "content": "Hello! I want to be notified if there's any news about climate change affecting global food security. Thanks!"},
]

user_query = start_messages[1]["content"]

start_tools = [
    {
        "type": "function",
        "function": {
            "name": "mark",
            "strict": True,
            "description": "Mark an RSS item as relevant to the user's request.",
            "parameters": {
                "type": "object",
                "properties": {
                    "titles": {
                        "type": "array",
                        "items": {
                          "type": "string"
                        },
                        "description": "The titles of the relevant articles."
                    }
                },
                "required": ["titles"]
            }
        }
    },
    {
      "type": "function",
      "function": {
          "name": "hook",
          "strict": True,
          "description": "Create an RSS feed with Google News.",
          "parameters": {
              "type": "object",
              "properties": {
                  "searches": {
                      "type": "array",
                      "items": {
                        "type": "string"
                      },
                      "description": "The RSS searches you want to make. You can use spaces"
                  }
              },
              "required": ["searches"]
          }
      }
    }
]

In [None]:
def chat(messages, tools=None, need_tool=False):
    for _ in range(3):
        chat_completion = client.chat.completions.create(
            messages=messages,
            tools=tools,
            model="llama-4-scout-17b-16e-instruct"
        )

        msg = chat_completion.choices[0].message
        message_resp = msg.content
        tool_name, tool_contents = None, None

        # Case 1: structured tool call
        if msg.tool_calls:
            tool_name = msg.tool_calls[0].function.name
            tool_contents = msg.tool_calls[0].function.arguments

        # Case 2: raw JSON in content (sometimes the model forgets to use parenthesis for tools and uses brackets instead)
        else:
            try:
                parsed = json.loads(message_resp)
                if "name" in parsed and "arguments" in parsed:
                    tool_name = parsed["name"]
                    tool_contents = parsed["arguments"]
                message_resp = None
            except Exception:
                pass

        if need_tool and not tool_name:
            continue # retry if a tool is required

        return message_resp, tool_name, tool_contents

    # If it's still nothing
    return message_resp, tool_name, tool_contents

In [100]:
# Fuzzy matching since the AI sometimes does not include parts of the title
def find_best_match(model_title, news_dict):
    matches = difflib.get_close_matches(model_title, news_dict.keys(), n=1, cutoff=0.5)
    if matches:
        return news_dict[matches[0]]
    return ""

# Initial chat, get RSS setup
message, tool_name, tool_contents = chat(start_messages, start_tools, True)
searches = json.loads(tool_contents)["searches"]

all_rss_items = [] # everything pulled from RSS
chosen_titles = [] # everything chosen by model
all_news_dicts = [] # raw rss dicts

print("=== USER PROMPT ===")
print(user_query)

print("\n=== ALL SEARCHES ===")

valid_count = 0

for search in searches:
    if valid_count >= 3:
        print("! STOPPED SEARCHING !")
        break

    output_dict, output_str = get_news_feed(search)

    if output_str == '':
        print(f"{search} - EMPTY")
        continue

    valid_count += 1
    print(search)

    loop_messages = start_messages + [
        {"role": "assistant", "content": f"{output_str} This is a list of the 5 most recent RSS items for the search '{search}'. I will now use tool 'mark' if any of the items apply to the user's request. I will NOT use 'hook' because I already did that."},
    ]
    message, tool_name, tool_contents = chat(loop_messages, start_tools, True)

    # Handle tool calling issues
    if isinstance(tool_contents, str):
        titles = json.loads(tool_contents)["titles"]
    elif isinstance(tool_contents, dict):
        titles = tool_contents["titles"]

    all_rss_items.extend(output_dict.keys())
    chosen_titles.extend(titles)
    all_news_dicts.append(output_dict)

# Deduplicate RSS titles
all_rss_items = list(dict.fromkeys(all_rss_items))
chosen_titles = list(dict.fromkeys(chosen_titles))

# Merge all dicts
combined_news_dict = {}
for nd in all_news_dicts:
    combined_news_dict.update(nd)

# Map chosen titles to links
chosen_dict = {}
for t in chosen_titles:
    chosen_dict[t] = find_best_match(t, combined_news_dict)

print("\n=== ALL RSS ITEMS LOOKED AT ===")
for item in all_rss_items:
    print(item)

print("\n=== ALL CHOSEN ITEMS ===")
for t, link in chosen_dict.items():
    print(f"{t} -> {link}")

=== USER PROMPT ===
Hello! I want to be notified if there's any news about climate change affecting global food security. Thanks!

=== ALL SEARCHES ===
climate change food security
global food production impact - EMPTY
sustainable agriculture solutions
climate resilience in farming
! STOPPED SEARCHING !

=== ALL RSS ITEMS LOOKED AT ===
Agriculture Ministry launches system to assess food import risks amid boycotts, climate change - The Jerusalem Post
Agriculture Ministry launches system to assess food import risks amid boycotts, climate change - Yahoo
Food Is Medicine Requires Systemic Changes: “It’s No One Discipline’s Job to Solve Food Insecurity - Food Tank
Reducing food loss, waste ensures food food-secure world - Tehran Times
Feeding The Future: Nature-Based Solutions For Resilient Food Systems – Analysis - Eurasia Review
The Green Revolution: Climate-Smart Food Initiatives Reshape Agricultural Commodity Markets - FinancialContent
Agricultural Vision 2050: Morocco’s Plan to Strengt

In [101]:
def get_main_content(url: str) -> str:
    # Download and parse the article
    article = Article(url)
    article.download()
    article.parse()
    
    return article.text

content = get_main_content("https://example.com")

eval_tools = [
    {
        "type": "function",
        "function": {
            "name": "mark",
            "strict": True,
            "description": "Mark the article/page as relevant or not.",
            "parameters": {
                "type": "object",
                "properties": {
                    "titles": {
                        "type": "boolean",
                        "description": "Use True if it is relevant, False if it is not.."
                    },
                    "reason": {
                        "type": "string",
                        "description": "A detailed explanation (200 words or so). Specifically, spend the first 185 words on the important details and info the page contains, and the last 15 on how it is relevant to the user query."
                    }
                },
                "required": ["titles"]
            }
        }
    }
]

for item, link in chosen_dict.items():
    print(f"=== ITEM ===")
    print(item)

    content = get_main_content(link)[:1000]

    eval_messages = start_messages + [
        {"role": "assistant", "content": f"{content} This is the first 1000 chars of the webpage {item}, which I previously marked as possibly relevant to the user's query based on the title alone. I will strictly make sure that the content is VERY relevant to the user's query '{user_query}'!"},
    ]
    message, tool_name, tool_contents = chat(eval_messages, eval_tools, True)

    # Handle tool calling issues
    parsed = None
    if tool_contents:
        if isinstance(tool_contents, dict):
            parsed = tool_contents
        else:
            try:
                parsed = json.loads(tool_contents)
            except json.JSONDecodeError:
                fixed = tool_contents.replace("true", "True").replace("false", "False")
                try:
                    parsed = eval(fixed, {"__builtins__": None}, {})
                except Exception:
                    parsed = tool_contents

    print(message, tool_name, parsed)

=== ITEM ===
Climate Change Threatening Belize’s Food Security
None mark {'titles': True, 'reason': "The article discusses how climate change is impacting Belize's food security, which is a specific example of the broader issue of climate change affecting global food security. The article highlights the challenges faced by Belize in ensuring food security due to climate change, including droughts, floods, and rising temperatures, which can lead to crop failures and reduced yields. This is relevant to the user's query as it provides a concrete example of how climate change is affecting food security in a specific region, which can be extrapolated to understand the global implications."}
=== ITEM ===
Korea Partnership Targets Food Security Through Climate-Smart Agriculture
None mark {'titles': True, 'reason': "The Korea Partnership Targets Food Security Through Climate-Smart Agriculture article discusses the impact of climate change on global food security and explores strategies to addr