In [None]:
# BSAnq-cqtvPDjPpoaTaLlzvbPX2OZsE

In [None]:
"""
Brave F1 News Search

This script fetches the latest Formula 1 news using the Brave Search API.
It retrieves relevant sources in English and displays them in a structured format.

For Google Colab:
    1. Save this code in a cell and run it
    2. Call the main() function with your API key in the next cell:
       main(api_key="your-brave-api-key")

Command-line Usage:
    python brave_f1_news.py [--count COUNT] [--freshness FRESHNESS]

Arguments:
    --count     Number of results to retrieve (default: 10, max: 50)
    --freshness Filter for news freshness (default: 'pw' - past week)
                Options: 'pd' (past day), 'pw' (past week),
                         'pm' (past month), 'py' (past year)
"""

# Import necessary libraries at the top
import requests
import json
import argparse
import os
from datetime import datetime
from typing import Dict, List, Optional, Any, Union
import sys

# Try to import IPython-specific modules (will work in Colab)
try:
    from IPython.display import display, HTML, Markdown
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

class BraveNewsAPI:
    """A class to interact with the Brave News Search API."""

    # API endpoint for Brave News Search
    BASE_URL = "https://api.search.brave.com/res/v1/news/search"

    def __init__(self, api_key: Optional[str] = None):
        """
        Initialize the Brave News API client.

        Args:
            api_key: Brave Search API key. If None, will look for BRAVE_API_KEY environment variable.
        """
        # Get API key from parameter or environment variable
        self.api_key = api_key or os.environ.get("BRAVE_API_KEY")

        if not self.api_key:
            try:
                # For Google Colab: Check if this is being run interactively
                import google.colab
                print("Error: Brave API key not provided.")
                print("Please provide your API key when calling the main function:")
                print("main(api_key='your-brave-api-key')")
                return
            except:
                # Not in Colab, use standard exit
                print("Error: Brave API key not provided.")
                print("Please either set the BRAVE_API_KEY environment variable or provide it as a parameter.")
                sys.exit(1)

    def search_news(self,
                   query: str,
                   count: int = 10,
                   offset: int = 0,
                   country: str = "US",
                   search_lang: str = "en",
                   ui_lang: str = "en-US",
                   freshness: str = "pw",
                   spellcheck: bool = True,
                   safesearch: str = "moderate",
                   extra_snippets: bool = True) -> Dict[str, Any]:
        """
        Search for news articles using the Brave News Search API.

        Args:
            query: Search query term
            count: Number of results to return (default: 10, max: 50)
            offset: Zero-based page offset for pagination (default: 0, max: 9)
            country: Country source for the news results (default: "US")
            search_lang: Language code for results (default: "en")
            ui_lang: User interface language preference (default: "en-US")
            freshness: Filter by news discovery date (default: "pw" - past week)
            spellcheck: Whether to apply spellchecking on the search query (default: True)
            safesearch: Filter for adult content (default: "moderate")
            extra_snippets: Whether to return additional alternative excerpts (default: True)

        Returns:
            Dict containing the API response
        """
        # Prepare headers
        headers = {
            "Accept": "application/json",
            "Accept-Encoding": "gzip",
            "X-Subscription-Token": self.api_key
        }

        # Prepare parameters
        params = {
            "q": query,
            "count": min(count, 50),  # Ensure count doesn't exceed API limit
            "offset": min(offset, 9),  # Ensure offset doesn't exceed API limit
            "country": country,
            "search_lang": search_lang,
            "ui_lang": ui_lang,
            "freshness": freshness,
            "spellcheck": 1 if spellcheck else 0,
            "safesearch": safesearch,
            "extra_snippets": 1 if extra_snippets else 0
        }

        try:
            # Make the API request
            response = requests.get(
                self.BASE_URL,
                headers=headers,
                params=params
            )

            # Check for HTTP errors
            response.raise_for_status()

            # Parse and return the JSON response
            return response.json()

        except requests.exceptions.RequestException as e:
            print(f"Error making API request: {e}")
            return {"error": str(e)}

    def format_news_results(self, response: Dict[str, Any]) -> None:
        """
        Format and print the news search results.

        Args:
            response: Brave News Search API response
        """
        if "error" in response:
            print(f"Error: {response['error']}")
            return

        # Check if we have results - the API returns results under "results" key, not "news"
        if "results" not in response or not response["results"]:
            print("No results found.")
            return

        # Print search metadata
        print(f"\n===== F1 News Search Results =====")
        print(f"Query: {response.get('query', {}).get('original', 'Unknown')}")
        print("=" * 35 + "\n")

        # Print each news article with all metadata
        for i, article in enumerate(response["results"], 1):
            title = article.get("title", "No title")
            description = article.get("description", "No description available")
            url = article.get("url", "No URL available")
            age = article.get("age", "Unknown")
            source = article.get("meta_url", {}).get("netloc", "Unknown source")

            # Format the output with some HTML formatting for better display in Colab
            try:
                from IPython.display import display, HTML
                display(HTML(f"<h3>{i}. {title}</h3>"))
                display(HTML(f"<p><b>Source:</b> {source} | <b>Age:</b> {age}</p>"))
                display(HTML(f"<p>{description}</p>"))
                display(HTML(f"<p><a href='{url}' target='_blank'>{url}</a></p>"))
                display(HTML("<hr>"))
            except (ImportError, NameError):
                # Fall back to plain text if not in IPython/Colab
                print(f"{i}. {title}")
                print(f"   Source: {source}")
                print(f"   Age: {age}")
                print(f"   {description}")
                print(f"   URL: {url}")
                print("-" * 50)


def main(api_key=None, count=10, freshness="pw", offset=0, query="Formula 1 news"):
    """
    Main function to run the news search with provided parameters.
    This function can be called directly in Google Colab.

    Args:
        api_key: Brave Search API key
        count: Number of results to retrieve (default: 10)
        freshness: Filter for news freshness (default: "pw" - past week)
        offset: Page offset for pagination (default: 0)
        query: Search query (default: "Formula 1 news")
    """
    # Initialize the Brave News API client
    brave_api = BraveNewsAPI(api_key=api_key)

    # Search for Formula 1 news
    print(f"Searching for the latest Formula 1 news...")
    results = brave_api.search_news(
        query=query,
        count=count,
        offset=offset,
        freshness=freshness
    )

    # Format and display the results
    brave_api.format_news_results(results)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Search for the latest Formula 1 news using Brave Search API.")
    parser.add_argument("--count", type=int, default=10, help="Number of results to retrieve (default: 10, max: 50)")
    parser.add_argument("--freshness", type=str, default="pw",
                        choices=["pd", "pw", "pm", "py"],
                        help="Filter for news freshness: pd (past day), pw (past week), pm (past month), py (past year)")
    parser.add_argument("--offset", type=int, default=0, help="Page offset for pagination (default: 0, max: 9)")
    parser.add_argument("--api-key", type=str, help="Brave Search API key (optional, can use BRAVE_API_KEY env var)")
    parser.add_argument("--query", type=str, default="Formula 1 news", help="Search query (default: 'Formula 1 news')")

    args = parser.parse_args()

    main(
        api_key=args.api_key,
        count=args.count,
        freshness=args.freshness,
        offset=args.offset,
        query=args.query
    )

usage: colab_kernel_launcher.py [-h] [--count COUNT] [--freshness {pd,pw,pm,py}] [--offset OFFSET]
                                [--api-key API_KEY] [--query QUERY]
colab_kernel_launcher.py: error: unrecognized arguments: -f /root/.local/share/jupyter/runtime/kernel-0da9fad7-cecb-42f4-b1b2-c547bacdb5fd.json


SystemExit: 2

In [None]:
main(
    api_key="BSAnq-cqtvPDjPpoaTaLlzvbPX2OZsE",  # Your API key
    query="Formula 1",                          # Simplified query
    freshness="pw",                             # Past week
    count=20                                     # Fewer results to test
)

Searching for the latest Formula 1 news...

===== F1 News Search Results =====
Query: Formula 1



In [None]:
!pip install openai requests



In [None]:
import os

# Set your API keys (run once)
os.environ['BRAVE_API_KEY'] = 'BSAnq-cqtvPDjPpoaTaLlzvbPX2OZsE'
os.environ['TAVILY_API_KEY'] = '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'
os.environ['OPENAI_API_KEY'] = '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'

In [None]:
import requests
import json
import os
from datetime import datetime
import openai

# Function to get URLs from Brave API
def get_brave_sources(query, count=10):
    """
    Retrieves source URLs from Brave Search API.

    Args:
        query (str): Search query string
        count (int): Number of results to retrieve

    Returns:
        list: List of URLs from Brave Search results
    """
    # Use environment variable instead of userdata
    brave_api_key = os.environ.get('BRAVE_API_KEY')

    # Example implementation - replace with actual Brave API endpoints and parameters
    brave_url = "https://api.search.brave.com/res/v1/web/search"

    headers = {
        "Accept": "application/json",
        "Accept-Encoding": "gzip",
        "X-Subscription-Token": brave_api_key
    }

    params = {
        "q": query,
        "count": count,
        "search_lang": "en",
        "freshness":"2024-01-01to2024-12-31"
    }

    response = requests.get(brave_url, headers=headers, params=params)

    if response.status_code != 200:
        print(f"Error from Brave API: {response.status_code} - {response.text}")
        return []

    results = response.json()

    # Extract URLs from response (adjust this based on actual Brave API response structure)
    urls = []
    for item in results.get('web', {}).get('results', []):
        if 'url' in item:
            urls.append(item['url'])

    return urls

# Function to extract content using Tavily Extract API
def extract_content_with_tavily(urls):
    """
    Extracts content from URLs using Tavily Extract API.

    Args:
        urls (list): List of URLs to extract content from

    Returns:
        list: List of dictionaries containing URL and extracted content
    """
    tavily_api_key = os.environ.get('TAVILY_API_KEY')

    tavily_url = "https://api.tavily.com/extract"

    headers = {
        "Authorization": f"Bearer {tavily_api_key}",
        "Content-Type": "application/json"
    }

    payload = {
        "urls": urls,
        "include_images": False,
        "extract_depth": "basic"
    }

    response = requests.post(tavily_url, json=payload, headers=headers)

    if response.status_code != 200:
        print(f"Error from Tavily API: {response.status_code} - {response.text}")
        return []

    result = response.json()

    # Extract content from successful results
    content_list = []
    for item in result.get('results', []):
        content_list.append({
            'url': item.get('url'),
            'content': item.get('raw_content')
        })

    # Log any failed extractions
    for failed in result.get('failed_results', []):
        print(f"Failed to extract from {failed.get('url')}: {failed.get('error')}")

    return content_list

# Function to generate structured news events using OpenAI
def generate_structured_news_events(extracted_content_list, num_events=3):
    """
    Generates structured news events with multiple sources using OpenAI.

    Args:
        extracted_content_list (list): List of dictionaries with URL and extracted content
        num_events (int): Number of top news events to identify

    Returns:
        list: List of news events in structured JSON format
    """
    openai_api_key = os.environ.get('OPENAI_API_KEY')
    client = openai.OpenAI(api_key=openai_api_key)

    # Combine all content for analysis
    all_content = ""
    for item in extracted_content_list:
        all_content += f"\n\nURL: {item['url']}\n{item['content'][:3000]}"  # Limit content length

    # Create a structured output request
    user_prompt = f"""
    Analyze the following web content and identify the top {num_events} most important news events.
    Group related news articles under the same event. For each event, provide:
    1. A clear event title describing what happened
    2. The approximate time when the event occurred or was reported
    3. A list of sources covering this event, including headline, link, and a brief description

    IMPORTANTLY, respond ONLY with a valid JSON object that follows this exact structure:
    {{
      "events": [
        {{
          "event": "Name of the event",
          "event_time": "YYYY-MM-DD" or "YYYY-MM-DD HH:MM:SS" if known,
          "sources": [
            {{
              "headline": "Headline from source 1",
              "link": "URL of source 1",
              "description": "Brief description from source 1 (50 words max)"
            }},
            {{
              "headline": "Headline from source 2",
              "link": "URL of source 2",
              "description": "Brief description from source 2 (50 words max)"
            }}
          ]
        }}
      ]
    }}

    Do not include any text before or after the JSON object. Ensure the response is properly formatted JSON.
    Focus on factual information and completely rephrase all content to avoid verbatim copying.
    Include at least 2 sources per event when available.
    Current date: {datetime.now().strftime('%Y-%m-%d')}

    Web content to analyze:
    {all_content[:15000]}  # Limit content to avoid exceeding context window
    """

    try:
        # Use parse_openai_response function with a clear instruction for structured output
        response_text = parse_openai_response(user_prompt)

        # Parse the JSON response
        try:
            events_data = json.loads(response_text)
            return events_data.get('events', [])
        except json.JSONDecodeError as e:
            print(f"Error parsing JSON response: {str(e)}")
            print(f"Response received: {response_text[:500]}...")  # Print first 500 chars for debugging
            return []

    except Exception as e:
        print(f"Error with OpenAI API: {str(e)}")
        return []

# New function for parsing OpenAI responses
def parse_openai_response(query, model="o3-mini", reasoning_effort="medium", max_tokens=10000):
    """
    Parses a query using OpenAI's beta chat completions API.

    Args:
        query (str): The query to parse
        model (str): The model to use (default: "o3-mini")
        reasoning_effort (str): Level of reasoning effort (default: "medium")
        max_tokens (int): Maximum number of tokens for the completion

    Returns:
        str: The parsed response content
    """
    try:
        # Initialize the OpenAI client
        openai_api_key = os.environ.get('OPENAI_API_KEY')
        client = openai.OpenAI(api_key=openai_api_key)

        # Create the message structure
        message = [{"role": "user", "content": query}]

        # Make the API call
        response = client.beta.chat.completions.parse(
            model=model,
            reasoning_effort=reasoning_effort,
            messages=message,
            max_completion_tokens=max_tokens
        )

        # Extract and return the content
        value = response.choices[0].message.content
        return value

    except Exception as e:
        print(f"Error with OpenAI API: {str(e)}")
        return ""

# Main function to run the entire workflow
def generate_news_analysis(search_query, num_sources=10, num_events=3):
    """
    Complete workflow to generate structured news events.

    Args:
        search_query (str): Query to search for news
        num_sources (int): Number of sources to retrieve
        num_events (int): Number of news events to identify

    Returns:
        list: List of structured news events
    """
    print(f"Retrieving sources for query: '{search_query}'...")
    urls = get_brave_sources(search_query, num_sources)

    if not urls:
        print("No sources found.")
        return []

    print(f"Found {len(urls)} sources. Extracting content...")
    extracted_content = extract_content_with_tavily(urls)

    if not extracted_content:
        print("No content extracted.")
        return []

    print(f"Content extracted from {len(extracted_content)} sources. Generating news events...")
    events = generate_structured_news_events(extracted_content, num_events)

    print(f"Generated {len(events)} news events.")
    return events



In [None]:
# Example usage
if __name__ == "__main__":
    search_query = "latest Max Verstappen news"
    events = generate_news_analysis(search_query)

    # Pretty print the events
    print("\nTop News Events:")
    print(json.dumps(events, indent=2))

    # Alternatively, display in a more readable format
    for i, event in enumerate(events, 1):
        print(f"\n{i}. {event.get('event', 'Unnamed Event')}")
        print(f"   Time: {event.get('event_time', 'Unknown')}")
        print("   Sources:")
        for j, source in enumerate(event.get('sources', []), 1):
            print(f"     {j}. {source.get('headline', 'No headline')}")
            print(f"        Link: {source.get('link', 'No link')}")
            print(f"        Description: {source.get('description', 'No description')}")

Retrieving sources for query: 'latest Max Verstappen news'...
Found 10 sources. Extracting content...
Content extracted from 10 sources. Generating news events...
Generated 3 news events.

Top News Events:
[
  {
    "event": "Verstappen Pulls Off Spectacular Comeback at S\u00e3o Paulo Grand Prix",
    "event_time": "2024-11-03",
    "sources": [
      {
        "headline": "Max Verstappen produces wonder drive to claim F1 S\u00e3o Paulo Grand Prix",
        "link": "https://www.theguardian.com/sport/2024/nov/03/max-verstappen-produces-wonder-drive-to-claim-f1-sao-paulo-grand-prix",
        "description": "Reports detail Verstappen's masterful performance in treacherous wet conditions, recovering from a low grid start to secure victory and widen his championship lead over Lando Norris."
      },
      {
        "headline": "Verstappen reflects on \u2018absolutely crazy\u2019 Sao Paulo GP after sublime climb from P17 to victory as he extends title lead",
        "link": "https://www.form

In [None]:
if __name__ == "__main__":
    search_query = "latest Max Verstappen news"
    events = generate_news_analysis(search_query)

    # Pretty print the events
    print("\nTop News Events:")
    print(json.dumps(events, indent=2))

    # Alternatively, display in a more readable format
    for i, event in enumerate(events, 1):
        print(f"\n{i}. {event.get('event', 'Unnamed Event')}")
        print(f"   Time: {event.get('event_time', 'Unknown')}")
        print("   Sources:")
        for j, source in enumerate(event.get('sources', []), 1):
            print(f"     {j}. {source.get('headline', 'No headline')}")
            print(f"        Link: {source.get('link', 'No link')}")
            print(f"        Description: {source.get('description', 'No description')}")

Retrieving sources for query: 'latest Max Verstappen news'...
Found 10 sources. Extracting content...
Content extracted from 10 sources. Generating news events...
Generated 3 news events.

Top News Events:
[
  {
    "event": "Verstappen Dominates Japanese Grand Prix with Fourth Consecutive Win",
    "event_time": "2025-04-06",
    "sources": [
      {
        "headline": "Verstappen\u2019s Brilliance at Suzuka Signals Tough Road Ahead",
        "link": "https://www.nytimes.com/athletic/6258602/2025/04/06/max-verstappen-f1-win-japanese-grand-prix/",
        "description": "Verstappen secured his fourth straight win at Suzuka with a commanding performance, outpacing McLaren rivals and setting an impressive pole lap, hinting at a challenging season for his title challengers."
      },
      {
        "headline": "\u2018It means a lot\u2019 \u2013 Verstappen Thrilled with Fourth Suzuka Win",
        "link": "https://www.formula1.com/en/latest/article/it-means-a-lot-verstappen-thrilled-with