In [None]:
!pip install -qU crewai langchain_openai 'crewai[tools]' google-search-results

In [None]:
import os
from getpass import getpass

# Ask the user to enter the API keys (they won't show up on screen)
os.environ["GEMINI_API_KEY"] = getpass("Enter your Gemini API key: ")
os.environ["SERPER_API_KEY"] = getpass("Enter your Serper API key: ")

###Initialize Models

In [None]:
from langchain_openai import ChatOpenAI
from crewai import LLM

Gemini = LLM(
    model="gemini/gemini-2.0-flash",)


#gpt4o = ChatOpenAI(model = "gpt-4o")

### AI Web Scraping Agent 🔍

In [None]:
from crewai import Agent, Task, Crew, Process
from crewai_tools import SerperDevTool

# Initialize tools
search_tool = SerperDevTool()

def create_web_scraping_agent(url, target_data):
    # Define the Web Scraping Agent
    scraping_agent = Agent(
        role="Data Collector",
        goal=f"Collect and analyze {target_data} from {url}",
        backstory="An expert data analyst trained to gather and process web information efficiently",
        verbose=True,
        tools=[search_tool],
        llm=Gemini
    )

    # Define the Data Processing Agent
    processing_agent = Agent(
        role="Data Processor",
        goal=f"Process and structure the collected {target_data}",
        backstory="A data processing specialist who organizes and formats collected information into useful insights",
        verbose=True,
        tools=[search_tool],
        llm=Gemini
    )

    # Define the Data Collection Task
    scraping_task = Task(
        description=f"Search and collect {target_data} related to {url}",
        expected_output=f"Raw collected data about {target_data} from {url}",
        agent=scraping_agent
    )

    # Define the Data Processing Task
    processing_task = Task(
        description=f"Process and structure the collected {target_data} into a clear format",
        expected_output=f"A structured and cleaned summary of {target_data}",
        agent=processing_agent
    )

    # Create and Run the Crew
    crew = Crew(
        agents=[scraping_agent, processing_agent],
        tasks=[scraping_task, processing_task],
        verbose=True,
        process=Process.sequential
    )

    result = crew.kickoff(inputs={"url": url, "target_data": target_data})
    return result

### Execute the agent


In [None]:
url = input("Enter the website URL to scrape: ")
target_data = input("Enter the type of data to extract (e.g., product prices, news headlines): ")
scraping_result = create_web_scraping_agent(url, target_data)
print("Web Scraping Results:")
print(scraping_result)