<a href="https://colab.research.google.com/github/samarjahanahmedburney/Recepitiq/blob/main/Receipt_IQ.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
import os

# Optional LLM imports
import openai
import google.generativeai as genai

In [None]:
# -------------------------------
# 1. IMAGE PREPROCESSING
# -------------------------------

def preprocess_image(image_path):
    image = cv2.imread(image_path)

    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Noise reduction
    gray = cv2.GaussianBlur(gray, (5, 5), 0)

    # Adaptive threshold
    thresh = cv2.adaptiveThreshold(
        gray, 255,
        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY,
        11, 2
    )

    return thresh

In [None]:
# -------------------------------
# 2. OCR
# -------------------------------

def extract_text(image_path):
    processed = preprocess_image(image_path)
    custom_config = r'--oem 3 --psm 6'
    text = pytesseract.image_to_string(processed, config=custom_config)
    return text


# -------------------------------
# 3. DATA PARSING
# -------------------------------

def parse_receipt(text):
    lines = text.split("\n")
    items = []

    for line in lines:
        line = line.strip()

        match = re.search(r'(.+?)\s+(\d+[.,]\d{2})$', line)
        if match:
            name = match.group(1)
            price = float(match.group(2).replace(",", "."))

            items.append({
                "item": name,
                "price": price,
                "quantity": 1
            })

    return pd.DataFrame(items)

In [None]:
# -------------------------------
# 4. CATEGORIZATION
# -------------------------------

def categorize_item(name):
    name = name.lower()

    if any(word in name for word in ["milk", "cheese", "yogurt"]):
        return "Dairy"
    elif any(word in name for word in ["bread", "cake", "bun"]):
        return "Bakery"
    elif any(word in name for word in ["chicken", "beef", "meat"]):
        return "Meat"
    elif any(word in name for word in ["chips", "snack", "chocolate"]):
        return "Snacks"
    elif any(word in name for word in ["rice", "pasta", "flour"]):
        return "Grains"
    else:
        return "Other"

In [None]:
# -------------------------------
# 5. SPENDING ANALYSIS
# -------------------------------

def analyze_spending(df):
    df["category"] = df["item"].apply(categorize_item)

    category_totals = df.groupby("category")["price"].sum()
    total_spending = df["price"].sum()

    summary = category_totals.reset_index()
    summary["percentage"] = (summary["price"] / total_spending) * 100

    return df, summary, total_spending

In [None]:
# -------------------------------
# 6. LLM ADVICE (OPENAI)
# -------------------------------

def generate_advice_openai(summary, total):
    openai.api_key = os.getenv("OPENAI_API_KEY")

    prompt = f"""
    Here is a spending summary:
    {summary.to_string(index=False)}

    Total spending: {total}

    Give personalized budgeting advice.
    Highlight overspending areas.
    """

    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}]
    )

    return response["choices"][0]["message"]["content"]