In [7]:
!pip install langchain-brightdata langchain-google-genai langgraph langchain-core google-generativeai

Collecting langchain-google-genai
  Downloading langchain_google_genai-2.1.5-py3-none-any.whl.metadata (5.2 kB)
Collecting langgraph
  Downloading langgraph-0.4.8-py3-none-any.whl.metadata (6.8 kB)
Collecting filetype<2.0.0,>=1.2.0 (from langchain-google-genai)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting google-ai-generativelanguage<0.7.0,>=0.6.18 (from langchain-google-genai)
  Downloading google_ai_generativelanguage-0.6.18-py3-none-any.whl.metadata (9.8 kB)
Collecting langgraph-checkpoint>=2.0.26 (from langgraph)
  Downloading langgraph_checkpoint-2.0.26-py3-none-any.whl.metadata (4.6 kB)
Collecting langgraph-prebuilt>=0.2.0 (from langgraph)
  Downloading langgraph_prebuilt-0.2.2-py3-none-any.whl.metadata (4.5 kB)
Collecting langgraph-sdk>=0.1.42 (from langgraph)
  Downloading langgraph_sdk-0.1.70-py3-none-any.whl.metadata (1.5 kB)
INFO: pip is looking at multiple versions of google-generativeai to determine which version is compatible with other r

In [None]:
import os
import json
from typing import Dict, Any, Optional
from langchain_brightdata import BrightDataWebScraperAPI
from langchain_google_genai import ChatGoogleGenerativeAI
from langgraph.prebuilt import create_react_agent

In [None]:
class BrightDataScraper:
    """Enhanced web scraper using BrightData API"""

    def __init__(self, api_key: str, google_api_key: Optional[str] = None):
        """Initialize scraper with API keys"""
        self.api_key = api_key
        self.scraper = BrightDataWebScraperAPI(bright_data_api_key=api_key)

        if google_api_key:
            self.llm = ChatGoogleGenerativeAI(
                model="gemini-2.0-flash",
                google_api_key=google_api_key
            )
            self.agent = create_react_agent(self.llm, [self.scraper])

    def scrape_amazon_product(self, url: str, zipcode: str = "10001") -> Dict[str, Any]:
        """Scrape Amazon product data"""
        try:
            results = self.scraper.invoke({
                "url": url,
                "dataset_type": "amazon_product",
                "zipcode": zipcode
            })
            return {"success": True, "data": results}
        except Exception as e:
            return {"success": False, "error": str(e)}

    def scrape_amazon_bestsellers(self, region: str = "in") -> Dict[str, Any]:
        """Scrape Amazon bestsellers"""
        try:
            url = f"https://www.amazon.{region}/gp/bestsellers/"
            results = self.scraper.invoke({
                "url": url,
                "dataset_type": "amazon_product"
            })
            return {"success": True, "data": results}
        except Exception as e:
            return {"success": False, "error": str(e)}

    def scrape_linkedin_profile(self, url: str) -> Dict[str, Any]:
        """Scrape LinkedIn profile data"""
        try:
            results = self.scraper.invoke({
                "url": url,
                "dataset_type": "linkedin_person_profile"
            })
            return {"success": True, "data": results}
        except Exception as e:
            return {"success": False, "error": str(e)}

    def run_agent_query(self, query: str) -> None:
        """Run AI agent with natural language query"""
        if not hasattr(self, 'agent'):
            print("Error: Google API key required for agent functionality")
            return

        try:
            for step in self.agent.stream(
                {"messages": query},
                stream_mode="values"
            ):
                step["messages"][-1].pretty_print()
        except Exception as e:
            print(f"Agent error: {e}")

    def print_results(self, results: Dict[str, Any], title: str = "Results") -> None:
        """Pretty print results"""
        print(f"\n{'='*50}")
        print(f"{title}")
        print(f"{'='*50}")

        if results["success"]:
            print(json.dumps(results["data"], indent=2, ensure_ascii=False))
        else:
            print(f"Error: {results['error']}")
        print()

In [None]:
def main():
    """Main execution function"""
    BRIGHT_DATA_API_KEY = "Use Your Own API Key"
    GOOGLE_API_KEY = "Use Your Own API Key"

    scraper = BrightDataScraper(BRIGHT_DATA_API_KEY, GOOGLE_API_KEY)

    print("🛍️ Scraping Amazon India Bestsellers...")
    bestsellers = scraper.scrape_amazon_bestsellers("in")
    scraper.print_results(bestsellers, "Amazon India Bestsellers")

    print("📦 Scraping Amazon Product...")
    product_url = "https://www.amazon.com/dp/B08L5TNJHG"
    product_data = scraper.scrape_amazon_product(product_url, "10001")
    scraper.print_results(product_data, "Amazon Product Data")

    print("👤 Scraping LinkedIn Profile...")
    linkedin_url = "https://www.linkedin.com/in/satyanadella/"
    linkedin_data = scraper.scrape_linkedin_profile(linkedin_url)
    scraper.print_results(linkedin_data, "LinkedIn Profile Data")

    print("🤖 Running AI Agent Query...")
    agent_query = """
    Scrape Amazon product data for https://www.amazon.com/dp/B0D2Q9397Y?th=1
    in New York (zipcode 10001) and summarize the key product details.
    """
    scraper.run_agent_query(agent_query)

In [8]:
if __name__ == "__main__":
    print("Installing required packages...")
    os.system("pip install -q langchain-brightdata langchain-google-genai langgraph")

    os.environ["BRIGHT_DATA_API_KEY"] = "Use Your Own API Key"

    main()

Installing required packages...
🛍️ Scraping Amazon India Bestsellers...

Amazon India Bestsellers
"Error extracting data from https://www.amazon.in/gp/bestsellers/: Error 202: {\"snapshot_id\":\"s_mbtsi1fd1mlrbvayq9\",\"message\":\"Your request is still in progress and cannot be retrieved in this call. Use the provided Snapshot ID to track progress via the Monitor Snapshot endpoint and download it once ready via the Download Snapshot endpoint\"}"

📦 Scraping Amazon Product...

Amazon Product Data
{
  "input": {
    "url": "https://www.amazon.com/dp/B08L5TNJHG",
    "zipcode": "10001",
    "asin": "",
    "language": ""
  },
}

👤 Scraping LinkedIn Profile...

LinkedIn Profile Data
{
  "id": "satyanadella",
  "name": "Satya Nadella",
  "city": "Redmond, Washington, United States",
  "country_code": "US",
  "position": "Chairman and CEO at Microsoft",
  "about": "As chairman and CEO of Microsoft, I define my mission and that of my company as empowering every person and every organization 