In [None]:
!pip install pymupdf4llm

In [None]:
# %%
import logging
import os

import openai
import pymupdf4llm
from dotenv import load_dotenv
from openai import OpenAI
from rich import print

logging.basicConfig(level=logging.WARNING)
load_dotenv()

# %%
api_key = os.getenv("openai_api_key")
client = OpenAI(api_key=api_key)
COMPLETIONS_MODEL = "gpt-4o"

# %%
# Define models for Structured Outputs
from pydantic import BaseModel


class Item(BaseModel):
    product: str
    price: float
    quantity: int


class Receipt(BaseModel):
    total_paid_amount: float
    supplier: str
    items: list[Item]
    order_reference_number: str


# PDF to markdown text
md_text = pymupdf4llm.to_markdown("../data/raw/transaction/receipt.pdf")

# Send request using Structured Outputs/Response Format
completion = client.beta.chat.completions.parse(
    model=COMPLETIONS_MODEL,
    messages=[
        {"role": "system", "content": "Extract the information from the receipt"},
        {"role": "user", "content": md_text},
    ],
    response_format=Receipt,
)

output = completion.choices[0].message.parsed
receipt = Receipt.model_validate(output)
print(receipt)
