# Variables

In [1]:
%%capture

# LangGraph Swarm
%pip install langchain_openai langgraph langgraph-swarm langgraph-supervisor ipykernel

In [17]:
# %%capture

# # MCP LangChain
# %pip install langchain-mcp-adapters

In [2]:
# Imports

import os
from langchain_openai import ChatOpenAI

In [19]:
# APIs

# OpenAI
os.environ["OPENAI_API_KEY"] = "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"

# Langsmith
os.environ["LANGSMITH_API_KEY"] = "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"
os.environ["LANGSMITH_TRACING_V2"] = "true"

In [20]:
# Modelo

model = ChatOpenAI(model="gpt-4.1-nano")
# model = ChatOpenAI(model="gpt-4.1-mini")

In [21]:
# Evitar error de asyncio en Windows

import asyncio
asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())

In [22]:
# Config

import uuid

config = {"configurable": {"thread_id": str(uuid.uuid4()), "user_id": "1"}}

In [23]:
# Streaming para LangGraph

def print_stream(stream):
    for ns, update in stream:
        print(f"Namespace '{ns}'")
        for node, node_updates in update.items():
            if node_updates is None:
                continue

            if isinstance(node_updates, (dict, tuple)):
                node_updates_list = [node_updates]
            elif isinstance(node_updates, list):
                node_updates_list = node_updates
            else:
                raise ValueError(node_updates)

            for node_updates in node_updates_list:
                print(f"Update from node '{node}'")
                if isinstance(node_updates, tuple):
                    print(node_updates)
                    continue
                messages_key = next(
                    (k for k in node_updates.keys() if "messages" in k), None
                )
                if messages_key is not None:
                    node_updates[messages_key][-1].pretty_print()
                else:
                    print(node_updates)

        print("\n\n")

    print("\n===\n")

# Supervisor

In [34]:
from langgraph_supervisor import create_supervisor
from pydantic import BaseModel, Field
from langchain_core.runnables import RunnableConfig
from langgraph.checkpoint.memory import MemorySaver
from langgraph.prebuilt import create_react_agent
from langgraph_swarm import create_handoff_tool, create_swarm
from langchain_core.tools import tool
import json
from langchain_openai import ChatOpenAI
from pathlib import Path


def make_prompt(base_prompt: str):
    def _prompt(state: dict, config: RunnableConfig) -> list:
        user_id = config["configurable"].get("user_id")
        return [
            {
                "role": "system",
                "content": (
                    f"{base_prompt}\n"
                )
            },
            *state["messages"],
        ]
    return _prompt

import os
import json
import uuid

@tool
def get_document(document: str) -> str:
    """Devuelve el Markdown completo de la factura"""
    return document



s_date_bill = create_react_agent(
    model,
    tools=[get_document],
    prompt=make_prompt(
        "### Role\n"
        "You are an expert in extracting invoice issue dates.\n\n"
        "### Objective\n"
        "Identify and extract the issue date from the provided invoice document.\n\n"
        "### Instructions\n"
        "- Focus solely on the invoice issue date.\n"
        "- Format the date as 'DD-MM-YYYY'.\n"
        "- Ignore any other dates present in the document.\n\n"
        "### Output Format\n"
        "{\n"
        "  \"Date\": \"DD-MM-YYYY\"\n"
        "}"
    ),
    name="date_bill",
)


s_number_bill = create_react_agent(
    model,
    tools=[get_document],
    prompt=make_prompt(
        "### Role\n"
        "You are an expert in identifying invoice numbers.\n\n"
        "### Objective\n"
        "Locate and extract the invoice number from the provided document.\n\n"
        "### Instructions\n"
        "- Look for identifiers labeled as 'Invoice No.', 'Invoice Number', or similar.\n"
        "- Exclude any numbers related to clients or customers.\n\n"
        "### Output Format\n"
        "{\n"
        "  \"InvoiceNumber\": \"<InvoiceNumber>\"\n"
        "}"
    ),
    name="number_bill",
)


s_vehicle_plate_bill = create_react_agent(
    model,
    tools=[get_document],
    prompt=make_prompt(
        "### Role\n"
        "You are an expert in recognizing European vehicle license plates.\n\n"
        "### Objective\n"
        "Identify and extract any vehicle license plates present in the document.\n\n"
        "### Instructions\n"
        "- Recognize the following formats:\n"
        "  - 1234 ABC\n"
        "  - 1234-ABC\n"
        "  - 1234ABC\n"
        "  - AB 1234 C\n"
        "  - AB1234C\n"
        "  - AB 1234 CU\n"
        "  - AB1234CU\n"
        "- If a license plate is found, return it; otherwise, return null.\n\n"
        "### Output Format\n"
        "{\n"
        "  \"VehiclePlate\": \"<VehiclePlate>\" // or null\n"
        "}"
    ),
    name="vehicle_plate_bill",
)


s_total_amount_bill = create_react_agent(
    model,
    tools=[get_document],
    prompt=make_prompt(
        "### Role\n"
        "You are an expert in extracting total amounts from invoices.\n\n"
        "### Objective\n"
        "Identify and extract the total amount due from the invoice.\n\n"
        "### Instructions\n"
        "- Focus on the final total amount payable.\n"
        "- Format the amount with a comma as the decimal separator and include the '€' symbol.\n\n"
        "### Output Format\n"
        "{\n"
        "  \"TotalAmount\": \"<Amount> €\"\n"
        "}"
    ),
    name="total_amount_bill",
)


s_company_bill = create_react_agent(
    model,
    tools=[get_document],
    prompt=make_prompt(
        "### Role\n"
        "You are an expert in identifying issuing companies on invoices under Spanish law.\n\n"
        "### Objective\n"
        "Extract the name and NIF of the company that issued the invoice.\n\n"
        "### Instructions\n"
        "- Identify the company that provided, leased, or rented the vehicle.\n"
        "- Exclude the following:\n"
        "  - Company Name: 'DEABRU KALEA FILMEAK, S.L.U.'\n"
        "  - NIF: 'B75587808'\n"
        "- If the issuing company is not found, return null.\n\n"
        "### Output Format\n"
        "{\n"
        "  \"CompanyName\": \"<CompanyName>\",\n"
        "  \"NIF\": \"<NIF>\"\n"
        "}"
    ),
    name="company_bill",
)


s_report_bill = create_react_agent(
    model,
    prompt=make_prompt(
        "### Role\n"
        "You are responsible for compiling invoice data into a final report.\n\n"
        "### Objective\n"
        "Use the provided values to generate a final JSON report.\n\n"
        "### Instructions\n"
        "- Utilize the 'generate_final_json' tool with the following fields:\n"
        "  - Date\n"
        "  - InvoiceNumber\n"
        "  - VehiclePlate\n"
        "  - TotalAmount\n"
        "  - CompanyName\n"
        "  - NIF\n"
        "- Call the tool exactly once.\n"
        "- Return only the file path provided by the tool.\n\n"
        "### Output Format\n"
        "<file_path>"
    ),
    name="report_bill",
)


s_validate_report_bill = create_react_agent(
    model,
    # tools=[read_json_file, update_json_file],
    prompt=make_prompt(
        "### Role\n"
        "You are responsible for validating and correcting invoice JSON reports.\n\n"
        "### Objective\n"
        "Ensure the JSON file at the provided path adheres to the required format and content standards.\n\n"
        "### Instructions\n"
        "1. Read the JSON content using 'read_json_file'.\n"
        "2. Validate and correct the following:\n"
        "   - 'Date' should be in 'DD-MM-YYYY' format.\n"
        "   - Keys must be exactly: Date, InvoiceNumber, VehiclePlate, TotalAmount, CompanyName, NIF.\n"
        "   - 'TotalAmount' must use a comma as the decimal separator and include the '€' symbol.\n"
        "3. If corrections are made, update the file using 'update_json_file'.\n"
        "4. Return the final JSON content without additional text or tool calls.\n\n"
        "### Output Format\n"
        "{\n"
        "  \"Date\": \"DD-MM-YYYY\",\n"
        "  \"InvoiceNumber\": \"<InvoiceNumber>\",\n"
        "  \"VehiclePlate\": \"<VehiclePlate>\",\n"
        "  \"TotalAmount\": \"<Amount> €\",\n"
        "  \"CompanyName\": \"<CompanyName>\",\n"
        "  \"NIF\": \"<NIF>\"\n"
        "}"
    ),
    name="validate_report_bill",
)


supervisor = create_supervisor(
    [s_date_bill, s_number_bill, s_vehicle_plate_bill, s_total_amount_bill, s_company_bill, s_report_bill, s_validate_report_bill],
    model=model,
    prompt=make_prompt(
        """
        ### Role
        You are a supervisor coordinating the work of several specialized agents to extract and validate invoice information.

        ### Objective
        Ensure the accurate extraction of the following fields from an invoice:
        - Date
        - InvoiceNumber
        - VehiclePlate
        - TotalAmount
        - CompanyName
        - NIF

        ### Instructions
        1. **Planning**: Before initiating any agent calls, plan the sequence of actions required to extract all necessary information.
        2. **Agent Coordination**:
            - Assign each agent to its specific task:
                - `date_bill`: Extract the invoice date.
                - `number_bill`: Extract the invoice number.
                - `vehicle_plate_bill`: Extract the vehicle plate number.
                - `total_amount_bill`: Extract the total amount.
                - `company_bill`: Extract the company name and NIF.
            - Do not ask agents to perform tasks outside their designated responsibilities.
        3. **Data Saving**: Instruct agents to save their results using their respective tools, even if the result is `null`.
        4. **Report Generation**:
            - Once all individual data points are collected and saved, call the `report_bill` agent to compile the data into a single JSON file.
        5. **Validation**:
            - After the report is generated, call the `validate_report_bill` agent with the path to the JSON file to ensure all data is correctly formatted and complete.
        6. **Final Output**:
            - Present only the validated JSON content to the user, without any additional commentary or text.

        ### Agent Descriptions
        - `date_bill`: Extracts the invoice date.
        - `number_bill`: Extracts the invoice number.
        - `vehicle_plate_bill`: Extracts the vehicle plate number.
        - `total_amount_bill`: Extracts the total amount.
        - `company_bill`: Extracts the company name and NIF.
        - `report_bill`: Compiles all extracted data into a single JSON file.
        - `validate_report_bill`: Validates and corrects the compiled JSON file.

        ### Output Format
        The final JSON should have the following structure:
        {
            "Date": "DD-MM-YYYY",
            "InvoiceNumber": "InvoiceNumber",
            "VehiclePlate": "VehiclePlate",
            "TotalAmount": "TotalAmount",
            "CompanyName": "CompanyName",
            "NIF": "NIF"
        }

        ### Final Notes
        - Do not terminate your process until all fields are accurately extracted, compiled, and validated.
        - Ensure all agents perform only their designated tasks.
        - Maintain a clear and structured approach throughout the process.
        """
    )
)


s_app = supervisor.compile()

In [35]:
config = {
    "configurable": {
        "user_id": "mesmo",
        # "document": md_content,
    }
}

print_stream(
    s_app.stream(
        {
            "messages": [
                {"role": "user", "content": "Cuál es el número, fecha, matrícula, importe y empresa (y NIF) de la factura?"} # ← pregunta inicial
            ]
        },
        config,          # ← tu diccionario con configurable
        subgraphs=True,  # ← si quieres ver los saltos internos
    )
)

Namespace '('supervisor:5426dcaf-4bb6-8cbd-a960-7df1e1eb1902',)'
Update from node 'agent'
Name: supervisor
Tool Calls:
  transfer_to_date_bill (call_RhVAI6ijMTAjQzeLewo3GZxG)
 Call ID: call_RhVAI6ijMTAjQzeLewo3GZxG
  Args:



Namespace '()'
Update from node 'supervisor'
Name: transfer_to_date_bill

Successfully transferred to date_bill



Namespace '('date_bill:45d86808-1a44-0342-0201-fa83ea01a36d',)'
Update from node 'agent'
Name: date_bill
Tool Calls:
  get_document (call_YmVzcjmKM1NoEke1ba180MJb)
 Call ID: call_YmVzcjmKM1NoEke1ba180MJb
  Args:
    document: <Insert the invoice document here>
  get_document (call_GyqmmVFwR9VLHnXT1h1kMxIn)
 Call ID: call_GyqmmVFwR9VLHnXT1h1kMxIn
  Args:
    document: <Insert the invoice document here>
  get_document (call_jk9TQ7iTXwY4NOQ8QuEofimt)
 Call ID: call_jk9TQ7iTXwY4NOQ8QuEofimt
  Args:
    document: <Insert the invoice document here>
  get_document (call_aWlxlClcQ7jDomYUPyvb2CVV)
 Call ID: call_aWlxlClcQ7jDomYUPyvb2CVV
  Args:
    document: <