In [1]:
# Import path configuration
from pathlib import Path
import sys
import logging
from dotenv import load_dotenv

from langchain_core.messages import (
    AIMessage,
    AnyMessage,
    HumanMessage,
    SystemMessage,
    ToolMessage,
)
load_dotenv()
logger  = logging.getLogger(__name__)

def _find_project_root(start: Path) -> Path:
    for parent in [start, *start.parents]:
        if (parent/"pyproject.toml").exists():
            return parent
    raise RuntimeError("Project root not found")

ROOT = _find_project_root(Path.cwd())
SRC = ROOT / "src"

if str(SRC) not in sys.path:
    sys.path.insert(0, str(SRC)) 

In [2]:

# import inspect
from app.utils.test import print_something
from app.adapters.invoice_parsing_agent import InvoiceParsingAgent, MODEL
from app.config import configure_logging
# print(inspect.getfile(InvoiceParsingAgent))

In [3]:
configure_logging()

In [4]:
from pypdf import PdfReader

def pdf_to_text(pdf_path: str, max_chars: int = 40_000) -> str:
    reader = PdfReader(pdf_path)
    chunks = []
    for i, page in enumerate(reader.pages):
        t = page.extract_text() or ""
        chunks.append(f"\n\n--- Page {i+1} ---\n{t}")
        if sum(len(c) for c in chunks) >= max_chars:
            break
    return "".join(chunks)[:max_chars]

In [5]:
invoice_text = pdf_to_text("./invoices/invoice01.pdf")
invoice_text

'\n\n--- Page 1 ---\nIntegratel Perú S.A.A.   |   R.U.C. 20100017491   |   Jr. Domingo Martínez Luján N° 1130   |   Lima \x96 Lima - SurquilloPágina 1/3\nMovistar HogarRecibo EneroESTHER RENE PEREZ CASTILLODNI: 06198154 ---..Mz.S/Lt.14URBANIZACIONRESIDENCIALLOSJAZMINESDELNARANJAL, LOS OLIVOS, LIMA, LIMACuenta financiera: 603306704Teléfonos asociados: 15233543\nTotal a pagarS/159.90Último día de pago: 05/02N° Fijo / Cód. pago: 15233543\n Pagaatiempotureciboymantentesiempreconectado.No esperes hasta el último día de pago.\nCiclo de facturación - Enero 202618/01 05/02Fecha deEmisiónÚltimo díade pago\n¡AHORRA!¡PagatureciboMovistardeformadigital,evitacomisionesytenmásbeneficios!esrápido,fácilyseguro.Ahorapuedes pagar con YAPE\nResumen del recibo - Nº S5AA-0077300973\nTrio Mov. Voz Int. Full HD RA AG24S/ 159.90\nDescuentos y Bonificaciones Inafectos S/ 0.00\nRedondeo S/ 0.00Devoluciones S/ 0.00Débitos S/ 0.00Deuda pasada S/ 0.00Total a pagarS/ 159.90\n\n--- Page 2 ---\nMovistar Hogar\nIntegr

In [None]:

from langchain_google_genai import ChatGoogleGenerativeAI
from langgraph.checkpoint.sqlite import SqliteSaver

# memory = SqliteSaver.from_conn_string(":memory:")
prompt = """You are a smart invoice parsing assistant. \
You will be provided with text extracted from an invoice. \
The original invoice may have been in PDF or image format, \
but you will only receive the text extracted from the original format \
what you will return is a JSON formatted string with the following information from the \
extracted invoice's text:\
    - date: The date of the invoice with format DD-MM-YYYY or MM-YYYY if day is not present
    - expires: The expiry date of the invoice (for payment, float-type, IMPORTANT:do not round!)
    - amount: The amount to pay
IMPORTANT: Make sure the the output is a valid JSON string and nothing else, just the \
invoice's requested information in JSON format.
"""
model = ChatGoogleGenerativeAI(model=MODEL)

thread_id = 1

In [7]:
agent_state = None
with SqliteSaver.from_conn_string(":memory:") as memory:
    abot = InvoiceParsingAgent(model, [], thread_id=thread_id,system=prompt, checkpointer=memory)
    agent_state = abot.query_stream(invoice_text)

2026-02-02 10:37:38,371::INFO::google_genai.models::AFC is enabled with max remote calls: 10.
2026-02-02 10:37:38,373::DEBUG::httpcore.connection::connect_tcp.started host='generativelanguage.googleapis.com' port=443 local_address=None timeout=None socket_options=None
2026-02-02 10:37:38,451::DEBUG::httpcore.connection::connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x7661807f1d90>
2026-02-02 10:37:38,451::DEBUG::httpcore.connection::start_tls.started ssl_context=<ssl.SSLContext object at 0x76617b01c710> server_hostname='generativelanguage.googleapis.com' timeout=None
2026-02-02 10:37:38,495::DEBUG::httpcore.connection::start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x76617b0eab50>
2026-02-02 10:37:38,495::DEBUG::httpcore.http11::send_request_headers.started request=<Request [b'POST']>
2026-02-02 10:37:38,496::DEBUG::httpcore.http11::send_request_headers.complete
2026-02-02 10:37:38,497::DEBUG::httpcore.http11::send_reques

KeyError: 'max_revisions'