# Project Progress

## Initial Imports

In [1]:
import sys
from pathlib import Path

root_dir = Path().absolute().parent.parent
sys.path.append(str(root_dir))

In [2]:
# Warning control
import json
import os
import warnings

import yaml
from crewai import Agent, Crew, Task
from dotenv import load_dotenv

warnings.filterwarnings('ignore')

load_dotenv()

True

## Load OpenAI Model

In [3]:
os.environ['OPENAI_MODEL_NAME'] = 'gpt-4o-mini'

## Loading Tasks and Agents YAML files

In [4]:
# Define file paths for YAML configurations
files = {
    'agents': 'config/agents.yaml',
    'tasks': 'config/tasks.yaml'
}

# Load configurations from YAML files
configs = {}
for config_type, file_path in files.items():
    with open(file_path, 'r') as file:
        configs[config_type] = yaml.safe_load(file)

# Assign loaded configurations to specific variables
agents_config = configs['agents']
tasks_config = configs['tasks']

## Trying out the ScrapeWebsiteTool

In [5]:
# from crewai_tools import ScrapeWebsiteTool

# # To enable scrapping any website it finds during it's execution
# tool = ScrapeWebsiteTool()

# # Initialize the tool with the website URL, 
# # so the agent can only scrap the content of the specified website
# tool = ScrapeWebsiteTool(website_url='https://www.siegessaeule.de/en/events/?date=2025-01-21')

# # Extract the text from the site
# text = tool.run()
# print(text)

## Try out simple data collectionagent

In [6]:

from crewai_tools import SpiderTool

# Agents
# event_scraper_agent = Agent(
#     config=agents_config['event_data_collection_agent'],
#     verbose=True,
#     tools=[
#         ScrapeWebsiteTool(website_url='https://www.siegessaeule.de/en/events/?date=2025-01-21')
#     ]
# )
spider_tool = SpiderTool(
  # api_key=os.environ['SPIDER_API_KEY'],
  url='https://www.siegessaeule.de/en/events/?date=2025-01-21',
  mode="crawl",
  params = {
    'limit': 8,  # Maximum number of pages to crawl
    'depth': 1,  # Crawl depth
    'cache': True  # Whether to cache the results
  },
)

event_spider_agent = Agent(
    config=agents_config['event_spider_agent'],
    verbose=True,
    tools=[spider_tool]
)


# Tasks
# data_collection = Task(
#   config=tasks_config['data_collection'],
#   agent=event_scraper_agent
# )
crawl_event_list_page = Task(
  config=tasks_config['crawl_event_list_page'],
  agent=event_spider_agent
)

crew = Crew(
  agents=[event_spider_agent],
  tasks=[crawl_event_list_page],
  verbose=True
)

# Set inputs and run the crew
# inputs = {
#   'spider_url': 'https://www.siegessaeule.de/en/events/?date=2025-01-21'
# }

# Run the crew
result = crew.kickoff()
print(result)


[1m[95m# Agent:[00m [1m[92mCrawl provided websites to find Event detail pages[00m
[95m## Task:[00m [92mUse the SpiderTool to crawl the url to collect basic information on events with their event detail page urls.
[00m
[91m 

Action 'None' don't exist, these are the only available Actions:
Tool Name: Spider scrape & crawl tool
Tool Arguments: {'url': {'description': 'Website URL', 'type': 'str'}, 'params': {'description': 'Set additional params. Options include:\n- `limit`: Optional[int] - The maximum number of pages allowed to crawl per website. Remove the value or set it to `0` to crawl all pages.\n- `depth`: Optional[int] - The crawl limit for maximum depth. If `0`, no limit will be applied.\n- `metadata`: Optional[bool] - Boolean to include metadata or not. Defaults to `False` unless set to `True`. If the user wants metadata, include params.metadata = True.\n- `query_selector`: Optional[str] - The CSS query selector to use when extracting content from the markup.\n', 'typ

In [7]:
import pandas as pd

costs = (crew.usage_metrics.prompt_tokens + crew.usage_metrics.completion_tokens) \
    * 0.150 / 1_000_000
print(f"Total costs: ${costs:.4f}")

# Convert UsageMetrics instance to a DataFrame
df_usage_metrics = pd.DataFrame([crew.usage_metrics.dict()])
df_usage_metrics

Total costs: $0.0008


Unnamed: 0,total_tokens,prompt_tokens,cached_prompt_tokens,completion_tokens,successful_requests
0,5229,4953,2432,276,4


## Creating Custom Tools

## Create Crew, Agents and Tasks

## Kickoff Crew

## Usage Metrics and Costs

Let’s see how much it would cost each time if this crew runs at scale.

## Report

In [8]:
from IPython.display import Markdown

markdown  = result.raw
Markdown(markdown)

{"events":[{"title":"Sample Event 1","date":"2023-10-01","description":"Description for Sample Event 1","detail_url":"https://example.com/events/sample-event-1"},{"title":"Sample Event 2","date":"2023-10-05","description":"Description for Sample Event 2","detail_url":"https://example.com/events/sample-event-2"},{"title":"Sample Event 3","date":"2023-10-10","description":"Description for Sample Event 3","detail_url":"https://example.com/events/sample-event-3"}]}