In [69]:
import os
import requests
import json
from typing import Dict

search_url = "https://www.searchapi.io/api/v1/search"

def web_search(query: str) -> Dict:
    """Searches the web for current information using Firecrawl."""
    print(f"--- Tool: web_search called for query: {query} ---")

    try:
        response = requests.get(search_url,
                                params={
                                    "engine": "google",
                                    "q": query,
                                    "device": "desktop",
                                    "lr": "lang_en",
                                    "num": 5,
                                    "api_key": "aRdkSfLsdV5yZ4tqXKRj5i5r"
                                })
        response.raise_for_status() 
        data = response.json()
        
        # Format the results as requested
        formatted_results = []
        for item in data.get('organic_results', []):
            formatted_results.append({
                "title": item.get("title", "No title"),
                "url": item.get("link", "No URL"),
                "description": item.get("snippet", "No description"),
            })
        
        return {
            "status": "success",
            "results": formatted_results
        }
    
    except Exception as e:
        return {
            "status": "error",
            "error_message": str(e)
        }

In [70]:
res = web_search("Nasa did not land on the moon ever.")

--- Tool: web_search called for query: Nasa did not land on the moon ever. ---


In [71]:
res

{'status': 'success',
 'results': [{'title': 'Moon landing conspiracy theories, debunked',
   'url': 'https://www.rmg.co.uk/stories/space-astronomy/moon-landing-conspiracy-theories-debunked',
   'description': "The Moon landings were faked. Apollo 11 didn't happen. Humans never set foot on the Moon. Heard all this before?"},
  {'title': 'Moon landing conspiracy theories',
   'url': 'https://en.wikipedia.org/wiki/Moon_landing_conspiracy_theories',
   'description': 'The most notable claim of these conspiracy theories is that the six crewed landings (1969–1972) were faked and that twelve Apollo astronauts did not actually ...'},
  {'title': 'How do we know that we went to the Moon?',
   'url': 'https://www.iop.org/explore-physics/moon/how-do-we-know-we-went-to-the-moon',
   'description': 'Every single argument claiming that NASA faked the Moon landings has been discredited. We explore the conspiracy theories and examine the evidence.'}]}

In [89]:
from firecrawl import FirecrawlApp

def web_scrap(search_results: dict) -> str:
    """
    Scrapes the web content from URLs found in the web search results using firecrawl.
    
    Args:
        search_results: The formatted results from the web search agent
        
    Returns:
        A dictionary containing the scraped content from each URL
    """
    print(f"--- Tool: web_scrap called ---")

    urls = [item['url'] for item in search_results['results']]

    try: 
        app = FirecrawlApp(api_key="fc-30673601f0dd4f55ac1c6c6a364de881")
        web_content = {}

        for url in urls:
            result = app.scrape(url)
            web_content[url] = result

        return {
            "status": "success",
            "data": web_content
        }

    except Exception as e:
        return {"error": f"Error during web scraping: {str(e)}"}

In [90]:
q = {'status': 'success',
 'results': [{'title': 'Moon landing conspiracy theories, debunked',
   'url': 'https://www.rmg.co.uk/stories/space-astronomy/moon-landing-conspiracy-theories-debunked',
   'description': "The Moon landings were faked. Apollo 11 didn't happen. Humans never set foot on the Moon. Heard all this before?"},
  {'title': 'Moon landing conspiracy theories',
   'url': 'https://en.wikipedia.org/wiki/Moon_landing_conspiracy_theories',
   'description': 'The most notable claim of these conspiracy theories is that the six crewed landings (1969–1972) were faked and that twelve Apollo astronauts did not actually ...'},
  {'title': 'How do we know that we went to the Moon?',
   'url': 'https://www.iop.org/explore-physics/moon/how-do-we-know-we-went-to-the-moon',
   'description': 'Every single argument claiming that NASA faked the Moon landings has been discredited. We explore the conspiracy theories and examine the evidence.'}]}


In [91]:
ws = web_scrap(q)

--- Tool: web_scrap called ---


In [105]:
import os
from typing import Dict, Optional, List
from urllib.parse import urlparse
from firecrawl import FirecrawlApp


def web_scrap(search_results: dict, extract_format: Optional[str] = "markdown") -> Dict:
    """
    Scrapes the web content from URLs found in the web search results using firecrawl.
    
    Args:
        search_results: The formatted results from the web search agent
        extract_format (str, optional): Format to extract ('markdown', 'html', or 'links'). Defaults to "markdown".
        
    Returns:
        Dict: A dictionary containing the scraped content.
        Includes a 'status' key ('success' or 'error').
        If 'success', includes relevant content in the specified format.
        If 'error', includes an 'error_message' key.
    """
    print(f"--- Tool: web_scrap called ---")

    urls = list(search_results.values()) if isinstance(search_results, dict) else search_results

    try: 
        app = FirecrawlApp(api_key="fc-30673601f0dd4f55ac1c6c6a364de881")
        web_content = {}
        formats = [extract_format]  # Define formats to extract

        for url in urls:
            try:
                result = app.scrape(url, formats=formats)
                
                # Extract content based on requested format
                if extract_format == "markdown" and hasattr(result, "markdown"):
                    content = result.markdown
                elif extract_format == "html" and hasattr(result, "html"):
                    content = result.html
                elif extract_format == "links" and hasattr(result, "links"):
                    content = result.links
                else:
                    content = f"Content not available in '{extract_format}' format"
                
                # Extract domain from URL
                domain = urlparse(url).netloc
                
                # Add to web_content dictionary
                web_content[url] = {
                    "domain": domain,
                    "format": extract_format,
                    "content": content,
                    "metadata": getattr(result, "metadata", {}),
                    "title": getattr(result, "title", "No title")
                }
                
                print(f"✓ Successfully scraped: {url}")
            except Exception as e:
                print(f"✗ Failed to scrape {url}: {str(e)}")
                web_content[url] = {"error": str(e)}

        return {
            "status": "success",
            "data": web_content
        }

    except Exception as e:
        return {"status": "error", "message": f"Error during web scraping: {str(e)}"}

In [106]:
search_results = {
    "NASA Apollo 11 Mission Overview": "https://www.nasa.gov/history/apollo-11-mission-overview/",
    "Moon Landing Conspiracy Theories Debunked": "https://www.rmg.co.uk/stories/topics/why-did-we-stop-going-moon",
    "How Do We Know We Went to the Moon": "https://www.iop.org/explore-physics/moon/how-do-we-know-we-went-to-the-moon"
    }

In [107]:
web_scrap(search_results)

--- Tool: web_scrap called ---
✓ Successfully scraped: https://www.nasa.gov/history/apollo-11-mission-overview/
✓ Successfully scraped: https://www.rmg.co.uk/stories/topics/why-did-we-stop-going-moon
✓ Successfully scraped: https://www.iop.org/explore-physics/moon/how-do-we-know-we-went-to-the-moon


{'status': 'success',
 'data': {'https://www.nasa.gov/history/apollo-11-mission-overview/': {'domain': 'www.nasa.gov',
   'format': 'markdown',
   'content': 'Search\n\n## Suggested Searches\n\n- [Climate Change](https://www.nasa.gov/?search=Climate%20Change)\n- [Artemis](https://www.nasa.gov/?search=Artemis)\n- [Expedition 64](https://www.nasa.gov/?search=Expedition%2064)\n- [Mars perseverance](https://www.nasa.gov/?search=Mars%20perseverance)\n- [SpaceX Crew-2](https://www.nasa.gov/?search=SpaceX%20Crew-2)\n- [International Space Station](https://www.nasa.gov/?search=International%20Space%20Station)\n- [View All Topics A-Z](https://www.nasa.gov/a-to-z-topics-listing/)\n\n8 min read\n\n# Apollo 11 Mission Overview\n\n![The headshot image of Sarah A. Loff](https://secure.gravatar.com/avatar/87f0be0c0ccd6750d582bf37a7e73286?s=300&d=blank&r=g)\n\n### Sarah A. Loff\n\nApr 17, 2015\n\nArticle\n\n- [Share on X.](https://x.com/intent/tweet?via=NASA&text=Apollo%2011%20Mission%20Overview&url=h