### from https://github.com/tylerprogramming/ai/blob/main/crewai_series/day_02.ipynb

In [None]:
%pip install -U 'crewai[tools]'
%pip install -U crewai

### Trying to build a good agent

In [17]:
import json
from typing import Optional, Literal
from pydantic import BaseModel, ValidationError, model_validator
from datetime import datetime
from crewai import Agent, Task, Crew, Process, LLM
import os
from dotenv import load_dotenv

# Load environment variables (put your keys and config in ../.env)
load_dotenv(dotenv_path="../.env")

OPEN_AI_BASE_URL = os.getenv("OPEN_AI_BASE_URL")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
OPENAI_MODEL = os.getenv("OPENAI_MODEL")

ollama_1b = LLM(
    model=OPENAI_MODEL,
    base_url=OPEN_AI_BASE_URL,
    api_key=OPENAI_API_KEY,
    temperature=0.0,
)

# Define allowed profile choices
ProfileChoice = Literal["driving-car", "cycling-regular", "foot-walking"]

class Payload(BaseModel):
    origin_address: str
    destination_address: str
    buffer_distance: float
    startinputdate: str  # ISO datetime string
    endinputdate: str    # ISO datetime string
    query_text: Optional[str] = "events"
    numevents: int
    profile_choice: Optional[ProfileChoice] = "driving-car"

    @model_validator(mode="after")  # Runs after parsing/validation
    def check_date_order(cls, model):
        start = datetime.fromisoformat(model.startinputdate)
        end = datetime.fromisoformat(model.endinputdate)
        if start > end:
            raise ValueError("start date can't be later than end date")
        return model

agent = Agent(
    role="Payload Extractor",
    goal=(
        "Given an input sentence, extract ONLY the following fields as JSON: "
        "origin_address, destination_address, buffer_distance (in km), startinputdate (ISO 8601 date-time string for departure), "
        "endinputdate (ISO 8601 date-time string for arrival), query_text (search keywords found after phrases like 'about', 'on', or 'for', else default 'events'), "
        "numevents (integer), profile_choice (one of 'driving-car', 'cycling-regular', 'foot-walking'; default 'driving-car'). "
        "You must parse these fields dynamically from the input sentence provided via 'input' variable. "
        "Do not return default or example values unless they appear explicitly in the input sentence. "
        "Output ONLY the JSON object, no additional commentary."
    ),
    backstory="Expert at precise structured extraction from unstructured text sentences.",
    tools=[],
    llm=ollama_1b,
    verbose=True,
    allow_delegation=False,
)

task = Task(
    description=(
        "Extract the payload data from this input sentence dynamically:\n"
        "{input}\n\n"
        "Return ONLY a JSON object matching the following format (with profile_choice restricted to specific values):\n"
        '{\n'
        '  "origin_address": "Padova",\n'
        '  "destination_address": "Venice",\n'
        '  "buffer_distance": 6.0,\n'
        '  "startinputdate": "2025-09-03T06:00:00",\n'
        '  "endinputdate": "2025-09-07T15:00:00",\n'
        '  "query_text": "events",\n'
        '  "numevents": 13,\n'
        '  "profile_choice": "driving-car"  # or cycling-regular, foot-walking\n'
        '}\n'
        "Use the values from the input sentence above, not the example values here. Extract query_text from phrases like 'about music', 'on theater', 'for workshop', etc."
    ),
    expected_output="A JSON object matching the Payload pydantic model with profile_choice and dynamic query_text.",
    agent=agent,
    output_json=Payload,
)

crew = Crew(
    agents=[agent],
    tasks=[task],
    verbose=True,
    process=Process.sequential,
)

def extract_payload(sentence: str):
    result = crew.kickoff(inputs={"input": sentence})
    try:
        payload = Payload.model_validate(result.to_dict())
        return payload
    except ValidationError as e:
        print("Validation failed:", e)
        return None

if __name__ == "__main__":
    sentence = (
        "I want to go from Vicenza to Trento and I will leave 4 October 2025 at 2 a.m. "
        "and I will arrive on 11 October at 5:00. Give me 11 events about workshop in a range of 6 km. "
        "Use cycling-regular transport."
    )
    output = extract_payload(sentence)
    if output:
        print(json.dumps(output.model_dump(), indent=2))
    else:
        print("Failed to extract valid payload.")


Output()

{
  "origin_address": "Vicenza",
  "destination_address": "Trento",
  "buffer_distance": 6.0,
  "startinputdate": "2025-10-04T02:00:00",
  "endinputdate": "2025-10-11T05:00:00",
  "query_text": "workshop",
  "numevents": 11,
  "profile_choice": "cycling-regular"
}


In [18]:
if __name__ == "__main__":
    sentence = (
        "I want to go from trento to Bologna on foot"
        "I will leave at 1 September 2025 at 16 and I will arrive on 11 October around at 5:00."
        "Give me 31 events about sport in a range of 5 km. "
        )
    output = extract_payload(sentence)
    if output:
        # Use model_dump instead of dict for Pydantic v2
        print(json.dumps(output.model_dump(), indent=2))
    else:
        print("Failed to extract valid payload.")

Output()

{
  "origin_address": "trento",
  "destination_address": "Bologna",
  "buffer_distance": 5.0,
  "startinputdate": "2025-09-01T16:00:00",
  "endinputdate": "2025-10-11T17:00:00",
  "query_text": "sport",
  "numevents": 31,
  "profile_choice": "foot-walking"
}
