In [1]:
from crewai import Agent, Crew, Process,Task
from crewai.project import CrewBase, agent, crew
from crewai.tools import tool
import requests
import os
import yaml
from langtrace_python_sdk import langtrace
from dotenv import load_dotenv

load_dotenv()

True

In [11]:
# define tools
import pandas as pd
import numpy as np
import sys
import io
import builtins

@tool("filecheck")
def filecheck(file_path: str) -> str:
    """Check if a file exists at the given path."""
    if os.path.exists(file_path):
        return "File exists"
    else:
        return "File does not exist"   
        
@tool("download_data")
def download_data(data_url: str) -> str:
    """Download data from a given URL and save it to a local file."""
    try:
        response = requests.get(data_url, stream=True)
        response.raise_for_status()  # Raise an error for bad responses

        # Extract filename from URL
        filename = data_url.split("/")[-1]
        local_file = os.path.join(os.getcwd(), filename)
        
        # check if file exists
        if os.path.exists(local_file):
            return local_file

        # Save file locally
        with open(local_file, "wb") as file:
            for chunk in response.iter_content(chunk_size=8192):
                file.write(chunk)

        return local_file  # Return the path of the downloaded file

    except requests.exceptions.RequestException as e:
        return f"Error downloading file: {e}"

@tool("json_to_dataframe")
def json_to_dataframe(json_data: str):
    """Convert a JSON string into a Pandas DataFrame."""
    try:
        data = pd.read_json(json_data)
        return data
    except ValueError as e:
        return f"Error parsing JSON: {e}"


@tool("execute_python_code")
def execute_python_code(code: str) -> str:
    """
    Execute custom Python code (Pandas/Numpy compatible) and return the printed output.
    """
    # Capture standard output
    old_stdout = sys.stdout
    sys.stdout = io.StringIO()

    try:
        exec(code, {"pd": pd, "np": np})  # Execute with Pandas & Numpy in the global scope
        output = sys.stdout.getvalue()  # Get printed output
    except Exception as e:
        output = f"Error executing code: {e}"
    finally:
        sys.stdout = old_stdout  # Restore original stdout

    return output


In [18]:
# define agents, tasks and crew

agents_data = None
with open("../agents.yaml", 'r') as file:
    agents_data = yaml.safe_load(file)
    
# define agents
coordinator_agent = Agent(
    config=agents_data['coordinator'],
    verbose=True,
    tools=[filecheck],
    allow_delegation=True,
    llm="gpt-4o"
)

data_downloader_agent = Agent(
    config=agents_data['data_downloader'],
    verbose=True,
    tools=[download_data],
    allow_delegation=False,
    llm="gpt-4o-mini"
)

data_analyst_agent = Agent(
    config=agents_data['data_analyst'],
    verbose=True,
    tools=[json_to_dataframe,execute_python_code],
    allow_delegation=False,
    llm="gpt-4o"
)

# define tasks
analysis_task = Task(
    description="""
        Analyze the data present at {data_source_url} and perform tasks neccessary to answer the user query: {user_query}
    """,
    expected_output="""
        A data analysis report in text as per the user query
    """,
    agent=coordinator_agent
)
crew = Crew(
    agents=[coordinator_agent,data_downloader_agent,data_analyst_agent],
    tasks=[analysis_task],
    verbose=True
)


In [20]:
# response = crew.kickoff(inputs={'user_query': 'Use this data and find the number of people who survived','data_source_url': '/home/shreyak/programming/agentic-data-query-engine/data/titanic.json'})
response = crew.kickoff(inputs={'user_query': 'Use this data and find the title having maxmium imdb rating. Use column: imdb','data_source_url': 'https://raw.githubusercontent.com/kernelshreyak/ai-ml-learning/refs/heads/master/datascience/datasets/scoobydoo.csv'})
print("response: ", response)

[1m[94m 
[2025-03-10 20:45:51][🚀 CREW 'CREW' STARTED, 70143AF7-BF89-467C-BFA1-54FFDB675E2E]: 2025-03-10 20:45:51.929298[00m
[1m[94m 
[2025-03-10 20:45:51][📋 TASK STARTED: 
        ANALYZE THE DATA PRESENT AT HTTPS://RAW.GITHUBUSERCONTENT.COM/KERNELSHREYAK/AI-ML-LEARNING/REFS/HEADS/MASTER/DATASCIENCE/DATASETS/SCOOBYDOO.CSV AND PERFORM TASKS NECCESSARY TO ANSWER THE USER QUERY: USE THIS DATA AND FIND THE TITLE HAVING MAXMIUM IMDB RATING. USE COLUMN: IMDB
    ]: 2025-03-10 20:45:51.967655[00m
[1m[94m 
[2025-03-10 20:45:51][🤖 AGENT 'COORDINATOR
' STARTED TASK]: 2025-03-10 20:45:51.973788[00m
[1m[95m# Agent:[00m [1m[92mCoordinator[00m
[95m## Task:[00m [92m
        Analyze the data present at https://raw.githubusercontent.com/kernelshreyak/ai-ml-learning/refs/heads/master/datascience/datasets/scoobydoo.csv and perform tasks neccessary to answer the user query: Use this data and find the title having maxmium imdb rating. Use column: imdb
    [00m
[1m[94m 
[2025-03-10 20:4

[1m[94m 
[2025-03-10 20:45:54][✅ LLM CALL COMPLETED]: 2025-03-10 20:45:54.917144[00m
[1m[94m 
[2025-03-10 20:45:54][🤖 TOOL USAGE STARTED: 'DELEGATE WORK TO COWORKER']: 2025-03-10 20:45:54.918091[00m
[1m[94m 
[2025-03-10 20:45:54][🤖 AGENT 'DATA DOWNLOADER
' STARTED TASK]: 2025-03-10 20:45:54.930678[00m
[1m[95m# Agent:[00m [1m[92mData Downloader[00m
[95m## Task:[00m [92mDownload the data file from the provided URL and provide the local file path.[00m
[1m[94m 
[2025-03-10 20:45:54][🤖 LLM CALL STARTED]: 2025-03-10 20:45:54.931189[00m
[1m[94m 
[2025-03-10 20:45:57][✅ LLM CALL COMPLETED]: 2025-03-10 20:45:57.137762[00m
[1m[94m 
[2025-03-10 20:45:57][🤖 TOOL USAGE STARTED: 'DOWNLOAD_DATA']: 2025-03-10 20:45:57.138793[00m
[1m[94m 
[2025-03-10 20:45:57][✅ TOOL USAGE FINISHED: 'DOWNLOAD_DATA']: 2025-03-10 20:45:57.150956[00m


[1m[95m# Agent:[00m [1m[92mData Downloader[00m
[95m## Using tool:[00m [92mdownload_data[00m
[95m## Tool Input:[00m [92m
"{\"data