# Import Required Libraries
Import all necessary libraries for the web search agent including os, json, pathlib, requests, serpapi, and LangGraph components.

In [None]:
# Import necessary libraries
import os  # For environment variable access
import json  # For handling JSON data
import pathlib  # For file and directory operations
import requests  # For making HTTP requests
from serpapi import GoogleSearch  # For interacting with SerpAPI

# Import LangGraph components
from langgraph.graph import StateGraph, END  # For building and managing LangGraph pipelines
from langchain_core.runnables import RunnableLambda  # For creating runnable nodes in LangGraph

# Set Up Environment Variables
Set up the SerpAPI key as an environment variable and verify it's available. Create necessary directories for storing search results.

In [None]:
# Set up the SerpAPI key as an environment variable
serp_api_key = "your_serpapi_key_here"  # Replace with your actual SerpAPI key
os.environ["SERP_API_KEY"] = serp_api_key

# Verify that the SerpAPI key is available
if not os.getenv("SERP_API_KEY"):
    raise ValueError("SERP_API_KEY not found in environment variables")

# Create necessary directories for storing search results
content_dir = pathlib.Path("WebAgent/content")
content_dir.mkdir(parents=True, exist_ok=True)  # Create the directory if it doesn't exist

# Confirm directory creation
assert content_dir.exists(), "Failed to create the content directory"

# Implement NvidiaWebSearchAgent
Create the NvidiaWebSearchAgent class that will handle different types of web searches related to NVIDIA.

In [None]:
# Define the NvidiaWebSearchAgent class
class NvidiaWebSearchAgent:
    """
    Agent for retrieving real-time web information about NVIDIA using SerpAPI.
    It can search for general topics, latest news, financial info, and quarterly reports.
    """
    def __init__(self):
        self.api_key = os.getenv("SERP_API_KEY")
        if not self.api_key:
            raise ValueError("SERP_API_KEY not found in environment variables")
        
        # Directory for storing markdown content
        self.content_dir = pathlib.Path("WebAgent/content")
        self.content_dir.mkdir(parents=True, exist_ok=True)
        
        # Dictionary to store links from different searches
        self.nvidia_links = {
            "general": [],
            "news": [],
            "financial": [],
            "quarterly": []
        }
    
    def search(self, query: str, num_results: int = 5, location: str = "United States") -> dict:
        """
        Perform a general search using SerpAPI.
        """
        params = {
            "q": query,
            "location": location,
            "api_key": self.api_key,
            "hl": "en",
            "num": num_results,
            "gl": "us"
        }
        search = GoogleSearch(params)
        raw_results = search.get_dict()
        processed_results = {
            "query": query,
            "search_timestamp": datetime.now().isoformat(),
            "organic_results": self._extract_organic_results(raw_results, num_results),
        }
        if "news_results" in raw_results:
            processed_results["news_results"] = self._extract_news_results(raw_results, num_results)
        if "organic_results" in processed_results:
            self.nvidia_links["general"] = [result["link"] for result in processed_results["organic_results"]]
        return processed_results
    
    def search_news(self, query: str = "nvidia", num_results: int = 5) -> dict:
        """
        Perform a news-specific search using SerpAPI.
        """
        params = {
            "q": f"{query} news",
            "tbm": "nws",
            "api_key": self.api_key,
            "hl": "en",
            "num": num_results
        }
        search = GoogleSearch(params)
        raw_results = search.get_dict()
        results = {
            "query": f"{query} news",
            "search_timestamp": datetime.now().isoformat(),
            "news_results": self._extract_news_results(raw_results, num_results)
        }
        if "news_results" in results:
            self.nvidia_links["news"] = [result["link"] for result in results["news_results"]]
        return results
    
    def search_financial_info(self, specific_topic: Optional[str] = None) -> dict:
        """
        Perform a financial information search using SerpAPI.
        """
        query = "nvidia financial" if not specific_topic else f"nvidia {specific_topic} financial"
        params = {
            "q": query,
            "api_key": self.api_key,
            "hl": "en",
            "gl": "us"
        }
        search = GoogleSearch(params)
        raw_results = search.get_dict()
        results = {
            "query": query,
            "search_timestamp": datetime.now().isoformat(),
            "financial_results": self._extract_organic_results(raw_results, 5)
        }
        if "financial_results" in results:
            self.nvidia_links["financial"] = [result["link"] for result in results["financial_results"]]
        return results
    
    def search_quarterly_report_info(self, year: Optional[int] = None, quarter: Optional[int] = None) -> dict:
        """
        Searches for NVIDIA quarterly report information.
        """
        query = "nvidia quarterly report"
        if year:
            query += f" {year}"
        if quarter and 1 <= quarter <= 4:
            query += f" Q{quarter}"
        params = {
            "q": query,
            "api_key": self.api_key,
            "hl": "en",
            "gl": "us"
        }
        search = GoogleSearch(params)
        raw_results = search.get_dict()
        results = {
            "query": query,
            "search_timestamp": datetime.now().isoformat(),
            "year": year,
            "quarter": quarter,
            "results": self._extract_organic_results(raw_results, 5)
        }
        if "results" in results:
            self.nvidia_links["quarterly"] = [result["link"] for result in results["results"]]
        return results
    
    def _extract_organic_results(self, raw_results: dict, limit: int) -> List[dict]:
        """
        Extract organic search results from raw SerpAPI response.
        """
        results = []
        if "organic_results" in raw_results and raw_results["organic_results"]:
            for item in raw_results["organic_results"][:limit]:
                result = {
                    "title": item.get("title", ""),
                    "link": item.get("link", ""),
                    "snippet": item.get("snippet", "")
                }
                if "displayed_link" in item:
                    result["source"] = item["displayed_link"]
                results.append(result)
        return results
    
    def _extract_news_results(self, raw_results: dict, limit: int) -> List[dict]:
        """
        Extract news search results from raw SerpAPI response.
        """
        results = []
        news_results = raw_results.get("news_results", [])
        if not news_results and "organic_results" in raw_results:
            return self._extract_organic_results(raw_results, limit)
        for item in news_results[:limit]:
            result = {
                "title": item.get("title", ""),
                "link": item.get("link", ""),
                "snippet": item.get("snippet", "")
            }
            if "source" in item:
                result["source"] = item["source"]
            if "date" in item:
                result["date"] = item["date"]
            results.append(result)
        return results

# Define Search Functions
Implement the various search methods: general search, news search, financial information search, and quarterly report search.

In [None]:
# Define search functions for the NvidiaWebSearchAgent class

def search(self, query: str, num_results: int = 5, location: str = "United States") -> dict:
    """
    Perform a general search using SerpAPI.
    """
    params = {
        "q": query,
        "location": location,
        "api_key": self.api_key,
        "hl": "en",
        "num": num_results,
        "gl": "us"
    }
    search = GoogleSearch(params)
    raw_results = search.get_dict()
    processed_results = {
        "query": query,
        "search_timestamp": datetime.now().isoformat(),
        "organic_results": self._extract_organic_results(raw_results, num_results),
    }
    if "news_results" in raw_results:
        processed_results["news_results"] = self._extract_news_results(raw_results, num_results)
    if "organic_results" in processed_results:
        self.nvidia_links["general"] = [result["link"] for result in processed_results["organic_results"]]
    return processed_results

def search_news(self, query: str = "nvidia", num_results: int = 5) -> dict:
    """
    Perform a news-specific search using SerpAPI.
    """
    params = {
        "q": f"{query} news",
        "tbm": "nws",
        "api_key": self.api_key,
        "hl": "en",
        "num": num_results
    }
    search = GoogleSearch(params)
    raw_results = search.get_dict()
    results = {
        "query": f"{query} news",
        "search_timestamp": datetime.now().isoformat(),
        "news_results": self._extract_news_results(raw_results, num_results)
    }
    if "news_results" in results:
        self.nvidia_links["news"] = [result["link"] for result in results["news_results"]]
    return results

def search_financial_info(self, specific_topic: Optional[str] = None) -> dict:
    """
    Perform a financial information search using SerpAPI.
    """
    query = "nvidia financial" if not specific_topic else f"nvidia {specific_topic} financial"
    params = {
        "q": query,
        "api_key": self.api_key,
        "hl": "en",
        "gl": "us"
    }
    search = GoogleSearch(params)
    raw_results = search.get_dict()
    results = {
        "query": query,
        "search_timestamp": datetime.now().isoformat(),
        "financial_results": self._extract_organic_results(raw_results, 5)
    }
    if "financial_results" in results:
        self.nvidia_links["financial"] = [result["link"] for result in results["financial_results"]]
    return results

def search_quarterly_report_info(self, year: Optional[int] = None, quarter: Optional[int] = None) -> dict:
    """
    Searches for NVIDIA quarterly report information.
    """
    query = "nvidia quarterly report"
    if year:
        query += f" {year}"
    if quarter and 1 <= quarter <= 4:
        query += f" Q{quarter}"
    params = {
        "q": query,
        "api_key": self.api_key,
        "hl": "en",
        "gl": "us"
    }
    search = GoogleSearch(params)
    raw_results = search.get_dict()
    results = {
        "query": query,
        "search_timestamp": datetime.now().isoformat(),
        "year": year,
        "quarter": quarter,
        "results": self._extract_organic_results(raw_results, 5)
    }
    if "results" in results:
        self.nvidia_links["quarterly"] = [result["link"] for result in results["results"]]
    return results

# Create LangGraph State and Agent
Define the WebSearchState TypedDict and implement the nvidia_web_search_agent function that will be used in the LangGraph pipeline.

In [None]:
from typing import TypedDict, Optional, Dict, Any

# Define the WebSearchState TypedDict
class WebSearchState(TypedDict, total=False):
    year: int  # Year for the quarterly report
    quarter: int  # Quarter for the quarterly report
    organized_result: Dict[str, Any]  # Structured result of the search

# Define the nvidia_web_search_agent function
def nvidia_web_search_agent(state: WebSearchState) -> Dict[str, Any]:
    """
    LangGraph agent function that uses NvidiaWebSearchAgent to query quarterly report info.
    It organizes the result into a structured dictionary.
    """
    year = state.get("year", None)
    quarter = state.get("quarter", None)
    
    try:
        # Instantiate the NvidiaWebSearchAgent
        agent = NvidiaWebSearchAgent()
        
        # Perform the search for quarterly report information
        search_results = agent.search_quarterly_report_info(year=year, quarter=quarter)
        
        # Organize the search results
        organized = {
            "query": search_results.get("query", ""),
            "search_timestamp": search_results.get("search_timestamp", ""),
            "year": search_results.get("year", year),
            "quarter": search_results.get("quarter", quarter),
            "results": search_results.get("results", [])
        }
        return {"organized_result": organized}
    except Exception as e:
        # Handle any exceptions and return an error message
        return {"organized_result": f"Error: {str(e)}"}

# Build the LangGraph Pipeline
Create and compile the LangGraph pipeline with appropriate nodes and edges.

In [None]:
# Build the LangGraph pipeline
def build_graph():
    """
    Build and compile the LangGraph pipeline with a single node that executes the Nvidia web search.
    """
    # Initialize the StateGraph with the WebSearchState type
    builder = StateGraph(WebSearchState)
    
    # Add a node for the NvidiaWebSearchAgent function
    builder.add_node("NvidiaWebSearchAgent", RunnableLambda(nvidia_web_search_agent))
    
    # Set the entry point of the graph to the NvidiaWebSearchAgent node
    builder.set_entry_point("NvidiaWebSearchAgent")
    
    # Add an edge from the NvidiaWebSearchAgent node to the END node
    builder.add_edge("NvidiaWebSearchAgent", END)
    
    # Compile and return the graph
    return builder.compile()