In [1]:
import base64
import getpass
import json
import os
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage
from pprint import pprint

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
os.environ["GOOGLE_API_KEY"] = getpass.getpass("Masukkan Google AI API key kamu: ")

In [3]:
# Inisialisasi Model

llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    temperature=0.0
)

In [4]:
# Prompt Template
PROMPT = """
You are an intelligent OCR and reasoning model. 
You are given an image of an Indonesian receipt. 
Read and structure the content into this JSON format:

{
    "store_name": <store_name_if_any>,
    "date": <purchase_date_if_any>,
    "menus": [
        {
            "name": <item_name>,
            "count": <purchased_count>,
            "price": <price_each_or_total>
        }
    ],
    "subtotal": <subtotal_value_if_any>,
    "total": <final_total_value_if_any>
}

- Return only valid JSON.
- If some fields are missing, leave them as null.
- Make sure the numbers are in integer format (no commas).
"""

In [5]:
# Load Image and Convert to Base64
IMAGE_PATH = "/Users/mhdfarhanali/Documents/SmartSplitBill AI/modules/data/receipt1.jpg"

with open(IMAGE_PATH, "rb") as f:
    image_bytes = f.read()

encoded = base64.b64encode(image_bytes).decode("utf-8")
data_uri = f"data:image/jpeg;base64,{encoded}"

In [6]:
# Create Prompt Message (Text + Image)
message = HumanMessage(
    content=[
        {"type": "text", "text": PROMPT},
        {"type": "image_url", "image_url": data_uri},
    ]
)

In [7]:
# Run Gemini Inference

print("Running Gemini inference... please wait.")
response = llm.invoke([message])
print("\nGemini response received!\n")

Running Gemini inference... please wait.

Gemini response received!



In [8]:
# Clean JSON Output
raw_output = response.content
clean_json = raw_output.replace("```json", "").replace("```", "").strip()

# Parse safely
try:
    receipt_dict = json.loads(clean_json)
    print("Parsed Receipt JSON:\n")
    pprint(receipt_dict)
except Exception as e:
    print("Failed to parse JSON:", e)
    print("\nRaw output:\n", raw_output[:500])

Parsed Receipt JSON:

{'date': None,
 'menus': [{'count': 1, 'name': 'Matcha Float', 'price': 23000},
           {'count': 1, 'name': 'Red Velvet Ice', 'price': 20000},
           {'count': 1, 'name': 'Coklat Float', 'price': 23000},
           {'count': 1, 'name': 'Korean Strawberry', 'price': 25000},
           {'count': 1, 'name': 'Americano Ice', 'price': 15000}],
 'store_name': None,
 'subtotal': 106000,
 'total': 106000}
