# Code for Receipt Scanner
Content should be commented throughout :)

In [1]:
# setup

import os
import base64
import json
import time
import re
from datetime import datetime
from dateutil.parser import parse as date_parse
from dotenv import load_dotenv
from openai import OpenAI
import backoff
from collections import defaultdict
import textwrap

load_dotenv()

True

In [2]:
# PROMPTS
CATEGORY_MAP = {
    "F": "Fruit", "V": "Vegetable", "G": "Grain",
    "D": "Dairy", "M": "Meat",      "S": "Snack",
    "O": "Other"
}
LOCATION_MAP = {"P": "Pantry", "F": "Fridge", "Z": "Freezer"}

# triple-quoted f-string + dedent makes edits much cleaner
PROMPT_TEMPLATE = textwrap.dedent("""
    Today's date is {current_date}. You are given a receipt image. Please parse each item on the receipt. 
    For each item, do the following:
    1. Guess the cleaned-up item name (if uncertain, do your best or put N/A).
    2. Provide the quantity always as `1` for each line-item—do not try to combine duplicates. We’ll consolidate later.
    3. Guess a reasonable expiration date for this food item based on today's date (if unsure, put N/A).
    4. Classify it from {{ {category_keys} }} (if unsure, put N/A).
    5. Provide a storage location from {{ {location_keys} }} (if unsure, put N/A).
    6. Provide the cost as a float (e.g., 5.99). If unsure, put N/A.

    If the entire image is not a valid receipt or you cannot parse any items, return 'INVALID'.

    The final response format must be strictly:
    {{(item, quantity, expiration, category, location, cost), ...}}
""")


In [3]:
def encode_image(image_path: str) -> str:
    """
    Encode an image file to a base64 string.
    """
    with open(image_path, "rb") as image_file:
        image_bytes = image_file.read()
        image_base64 = base64.b64encode(image_bytes).decode("utf-8")
    return image_base64

In [4]:
def get_enhanced_receipt_items_gpt4o(image_path: str, current_date: str) -> str:
    """
    Sends a receipt image to the 4o-mini GPT model with instructions to return 
    items, quantities, a guessed expiration date, category, storage location, 
    and cost, in the format:
    
      {
        (cleaned_item1, quantity1, expiration1, category1, location1, cost1),
        (cleaned_item2, quantity2, expiration2, category2, location2, cost2),
        ...
      }
    
    We also provide a "current date" to GPT in the prompt so it can make
    a more educated guess about the expiration date.
    
    If the receipt is undecipherable or not a receipt, GPT should return "INVALID".
    """
    # 2) BUILD THE PROMPT BY INJECTING DYNAMIC VALUES
    prompt = PROMPT_TEMPLATE.format(
        current_date=current_date,
        category_keys=", ".join(f"{k} for {v}" for k, v in CATEGORY_MAP.items()),
        location_keys=", ".join(f"{k} for {v}" for k, v in LOCATION_MAP.items()),
    )

    # 3) send image + prompt
    base64_image = encode_image(image_path)
    client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
    chat = client.chat.completions.create(
        model="gpt-4o-mini",
        max_tokens=1000,  # reducing can speed up runtime/cost
        temperature=0.0,  # lower, less creative (note, 0.0 makes better expiration dates) 
        messages=[{
            "role": "user",
            "content": [
                {"type": "text", "text": prompt},
                {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}}
            ]
        }],
    )

    return chat.choices[0].message.content

In [5]:
def parse_enhanced_gpt_output(gpt_output: str) -> list:
    """
    Given the GPT output in the format:
      {
        (cleaned_item, quantity, expiration, category, location, cost),
        ...
      }
    or "INVALID"
    
    - If it's "INVALID" or if no valid items were parsed, return an empty list.
    - Otherwise, parse each tuple and return a list of dictionaries, enforcing:
      {
        "item": string,
        "quantity": int,
        "expiration": ISO 8601 (or "N/A"),
        "category": one of [F, V, G, M, S, O],
        "location": one of [P, F, Z],
        "cost": float (or "N/A" if unparseable)
      }
    """
    # Check if the entire text is just "INVALID" (case-insensitive match)
    if gpt_output.strip().upper() == "INVALID":
        return []
    
    # Regex to capture 6 fields within parentheses:
    # (cleaned_item, quantity, expiration, category, location, cost)
    pattern = r'\(\s*([^,]+)\s*,\s*([^,]+)\s*,\s*([^,]+)\s*,\s*([^,]+)\s*,\s*([^,]+)\s*,\s*([^,]+)\s*\)'
    matches = re.findall(pattern, gpt_output)
    
    if not matches:
        return []
    
    valid_categories = {"F", "V", "G", "M", "S", "O"}
    valid_locations = {"P", "F", "Z"}
    
    items_list = []
    
    for match in matches:
        # match is a tuple of 6 strings
        cleaned_item, quantity, expiration, category, location, cost = match
        
        # Strip extra quotes/spaces
        cleaned_item = cleaned_item.strip("\"' ")
        quantity_str = quantity.strip("\"' ")
        expiration_str = expiration.strip("\"' ")
        category_str = category.strip("\"' ")
        location_str = location.strip("\"' ")
        cost_str = cost.strip("\"' ")
        
        # -------------------
        # 1) ITEM (string)
        item_val = cleaned_item
        
        # -------------------
        # 2) QUANTITY (int or "N/A")
        try:
            quantity_val = int(quantity_str)
        except ValueError:
            quantity_val = "N/A"  # fallback
        
        # -------------------
        # 3) EXPIRATION (ISO 8601 or "N/A")
        #    Attempt to parse user-provided date string:
        try:
            parsed_dt = date_parse(expiration_str)
            # Convert to ISO 8601 (YYYY-MM-DD) or full datetime .isoformat()
            expiration_val = parsed_dt.date().isoformat()
        except (ValueError, TypeError):
            expiration_val = "N/A"
        
        # -------------------
        # 4) CATEGORY
        #    Must be one of [F, V, G, M, S, O], else fallback to "O" or "N/A".
        category_val = category_str if category_str in valid_categories else "O"
        
        # -------------------
        # 5) LOCATION
        #    Must be one of [P, F, Z], else fallback to "P" or "N/A".
        location_val = location_str if location_str in valid_locations else "P"
        
        # -------------------
        # 6) COST (float or "N/A")
        try:
            cost_val = float(cost_str.replace("$", ""))  # remove $ if present
        except ValueError:
            cost_val = "N/A"
        
        items_list.append({
            "item": item_val,
            "quantity": quantity_val,
            "expiration": expiration_val,
            "category": category_val,
            "location": location_val,
            "cost": cost_val
        })
    
    return items_list

In [6]:
def consolidate_items(items_list: list) -> list:
    """
    Given a list of item-dicts each with quantity=1, 
    merge duplicates into a single dict with summed quantity.
    Keys used for grouping are all fields except 'quantity'.
    """
    grouped = {}
    
    for itm in items_list:
        # build a key that captures everything except quantity
        key = (
            itm["item"],
            itm["expiration"],
            itm["category"],
            itm["location"],
            itm["cost"],
        )
        
        if key not in grouped:
            # copy and ensure quantity is an int
            grouped[key] = {
                **itm,
                "quantity": int(itm.get("quantity", 1) or 1)
            }
        else:
            grouped[key]["quantity"] += int(itm.get("quantity", 1) or 1)
    
    return list(grouped.values())

In [7]:
def write_json_to_file(data: list, filename: str = "receipt_output.json"):
    """
    Write the list of dictionaries to a JSON file.
    """
    with open(filename, "w", encoding="utf-8") as f:
        json.dump(data, f, indent=2)

In [8]:
def main(image_path):
    # Track start time
    start_time = time.time()
    
    # Replace with the path to your receipt image
    #image_path = "comm4190_S25_Final_Project_GroupL/test_receipts/test1.png"
    
    # We supply today's date to help GPT figure out expiration
    # Example: "January 15, 2025" or "2025-01-15"
    current_date = datetime.now().strftime("%Y-%m-%d")  
    
    # 1. Get the raw enhanced text from GPT
    raw_gpt_output = get_enhanced_receipt_items_gpt4o(image_path, current_date)
    print("=======\nGPT Raw Enhanced Output:")
    print(raw_gpt_output)
    
    # 2. Parse the GPT output into a structured Python list
    items_json = parse_enhanced_gpt_output(raw_gpt_output)

    
    # 3. Check if it's invalid or empty
    if not items_json:
        print("\n=======\nNo valid items found or GPT returned 'INVALID'.")
    else:
        print("\n=======\nParsed JSON:")
        print(json.dumps(items_json, indent=2))

        # 3) consolidate duplicates
        consolidated = consolidate_items(items_json)
        print("\n=======\nConsolidated items (with quantities):")
        print(json.dumps(consolidated, indent=2))

        # 4. Write the JSON to a file
        write_json_to_file(consolidated, "receipt_output.json")
        print("\n=======\nThe parsed items have been written to 'receipt_output.json'.")

    # Track end time
    end_time = time.time()
    runtime_seconds = end_time - start_time
    print(f"\nScript runtime: {runtime_seconds:.2f} seconds")

In [9]:
main("test_receipts/notes_test.jpg")

GPT Raw Enhanced Output:
INVALID

No valid items found or GPT returned 'INVALID'.

Script runtime: 3.07 seconds


In [10]:
main("test_receipts/cat_test.jpg")

GPT Raw Enhanced Output:
INVALID

No valid items found or GPT returned 'INVALID'.

Script runtime: 1.20 seconds


In [11]:
main("test_receipts/human_test.jpg")

GPT Raw Enhanced Output:
INVALID

No valid items found or GPT returned 'INVALID'.

Script runtime: 3.33 seconds


In [12]:
main("test_receipts/target_test.png")

GPT Raw Enhanced Output:
```plaintext
{ 
    ("Oreo Cookie", 1, "2026-05-13", "S", "P", 5.98), 
    ("NATVAL ENER", 1, "2026-05-13", "O", "F", 5.89), 
    ("Motts Frtsn", 1, "2026-05-13", "O", "F", 1.50), 
    ("Motts Frtsn", 1, "2026-05-13", "O", "F", 4.50), 
    ("V8", 1, "2026-05-13", "O", "F", 4.19), 
    ("Oscar Mayer", 1, "2026-05-13", "M", "F", 2.50), 
    ("Oscar Mayer", 1, "2026-05-13", "M", "F", 3.07), 
    ("Naked", 1, "2026-05-13", "O", "F", 2.50), 
    ("Tomato", 1, "2026-05-13", "V", "F", 1.99), 
    ("Lettuce", 1, "2026-05-13", "V", "F", 1.49), 
    ("KC Masterpiece", 1, "2026-05-13", "O", "P", 2.99), 
    ("Market Pant", 1, "2026-05-13", "O", "P", 2.99), 
    ("BC Sndly Sl", 1, "2026-05-13", "O", "P", 1.99), 
    ("Pepperidge", 1, "2026-05-13", "O", "P", 2.99), 
    ("Smuckers", 1, "2026-05-13", "O", "P", 2.50), 
    ("Jennie-O", 1, "2026-05-13", "M", "F", 2.50), 
    ("Lean Cuisin", 1, "2026-05-13", "M", "F", 2.49), 
    ("Lean Cuisin", 1, "2026-05-13", "M", "F", 2.49)

In [13]:
main("test_receipts/costco_test.png")

GPT Raw Enhanced Output:
{ 
    ("ST LOUIS RIB", 1, "2026-05-13", "M", "F", 21.63), 
    ("ORG FR EGGS", 1, "2025-06-13", "O", "F", 7.99), 
    ("HADDOCK", 1, "2025-06-13", "M", "F", 20.48), 
    ("THAI JASMINE", 1, "2026-05-13", "G", "P", 19.99), 
    ("SILK ORG.ALM", 1, "2026-05-13", "D", "F", 10.99), 
    ("NY THIN CUT", 1, "2025-06-13", "M", "F", 39.12), 
    ("BRUSSEL SPT", 1, "2025-06-13", "V", "F", 4.89), 
    ("GREEK YOGURT", 1, "2025-06-13", "D", "F", 7.49), 
    ("ASPARAGUS", 1, "2025-06-13", "V", "F", 7.49), 
    ("OMG3 FSHOIL", 1, "2026-05-13", "O", "P", 28.49), 
    ("PUMPKIN SEED", 1, "2026-05-13", "O", "P", 3.60), 
    ("ORG BANANAS", 1, "2025-06-13", "F", "F", 2.99), 
    ("3 BERRY", 1, "2025-06-13", "F", "F", 3.99), 
    ("BLUEBERRIES", 1, "2025-06-13", "F", "F", 3.99), 
    ("AVOCADOS", 1, "2025-06-13", "F", "F", 4.99), 
    ("V8 VEG.JUICE", 1, "2026-05-13", "O", "P", 15.79), 
    ("V8 VEG.JUICE", 1, "2026-05-13", "O", "P", 15.79), 
    ("BATH*", 1, "N/A", "O", "P", 5