In [None]:
# Import path configuration
from pathlib import Path
import sys
import logging
from dotenv import load_dotenv

from langchain_core.messages import (
    AIMessage,
    AnyMessage,
    HumanMessage,
    SystemMessage,
    ToolMessage,
)
load_dotenv()
logger  = logging.getLogger(__name__)

def _find_project_root(start: Path) -> Path:
    for parent in [start, *start.parents]:
        if (parent/"pyproject.toml").exists():
            return parent
    raise RuntimeError("Project root not found")

ROOT = _find_project_root(Path.cwd())
SRC = ROOT / "src"

if str(SRC) not in sys.path:
    sys.path.insert(0, str(SRC)) 

In [None]:

# import inspect
from app.utils.test import print_something
from app.adapters.invoice_parsing_agent import InvoiceParsingAgent, MODEL
from app.config import configure_logging
# print(inspect.getfile(InvoiceParsingAgent))

In [None]:
configure_logging()

In [None]:
from pypdf import PdfReader

def pdf_to_text(pdf_path: str, max_chars: int = 40_000) -> str:
    reader = PdfReader(pdf_path)
    chunks = []
    for i, page in enumerate(reader.pages):
        t = page.extract_text() or ""
        chunks.append(f"\n\n--- Page {i+1} ---\n{t}")
        if sum(len(c) for c in chunks) >= max_chars:
            break
    return "".join(chunks)[:max_chars]

In [None]:
invoice_text = pdf_to_text("./invoices/invoice01.pdf")
invoice_text

In [None]:

from langchain_google_genai import ChatGoogleGenerativeAI
from langgraph.checkpoint.sqlite import SqliteSaver

# memory = SqliteSaver.from_conn_string(":memory:")
prompt = """You are a smart invoice parsing assistant. \
You will be provided with text extracted from an invoice. \
The original invoice may have been in PDF or image format, \
but you will only receive the text extracted from the original format \
what you will return is a JSON formatted string with the following information from the \
extracted invoice's text:\
    - date: The date of the invoice with format DD-MM-YYYY or MM-YYYY if day is not present
    - expires: The expiry date of the invoice (for payment, float-type, IMPORTANT:do not round!)
    - amount: The amount to pay
IMPORTANT: Make sure the the output is a valid JSON string and nothing else, just the \
invoice's requested information in JSON format.
"""
model = ChatGoogleGenerativeAI(model=MODEL)

thread_id = 1

In [None]:
agent_state = None
with SqliteSaver.from_conn_string(":memory:") as memory:
    abot = InvoiceParsingAgent(model, [], thread_id=thread_id,system=prompt, checkpointer=memory)
    agent_state = abot.query_stream(invoice_text)

In [None]:
agent_state

In [None]:
agent_state.values['invoice_json']

invoice = json.loads(agent_state.values['invoice_json'])
