In [1]:
!pip install gradio easyocr pandas matplotlib -q
!pip uninstall torch torchvision torchaudio -y
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu

import gradio as gr
import easyocr
import pandas as pd
import re
import os
import matplotlib.pyplot as plt
from datetime import datetime

reader = easyocr.Reader(['en'])

def extract_store_name(ocr_lines):
    lines_cleaned = [line.strip().lower() for line in ocr_lines if line.strip()]
    full_combined = " ".join(lines_cleaned)
    blacklist = ['registration', 'co.reg', 'hypermall', 'plt', 'lot', 'ground floor', 'invoice']

    brand_keywords = {
        'ai-cha': 'AI-CHA', 'kiss my bowl': 'KISS MY BOWL', 'mr.diy': 'MR. D.I.Y',
        'mr d.i.y': 'MR. D.I.Y', 'mr diy': 'MR. D.I.Y', 'guardian': 'GUARDIAN',
        'padini': 'PADINI', 'watsons': 'WATSONS', 'parkson': 'PARKSON',
        'everrise': 'EVERRISE', 'teppanya': 'TEPPANYA CAFE', 'marks & spencer': 'MARKS & SPENCER',
        'cck': 'CCK FRESH MART', 'ccklocal': 'CCKLOCAL - KK', 'popular': 'POPULAR BOOKSTORE',
        'h&m': 'H&M', 'h & m': 'H&M', 'hem': 'H&M', 'daiso': 'DAISO JAPAN', 'kfc': 'KFC',
        '15 minutes bake': '15 MINUTES BAKE CAFE', 'uniqlo': 'UNIQLO', 'ilaollao': 'LLAOLLAO',
        'llaollao': 'LLAOLLAO', 'caring': 'CARING PHARMACY', 'sushihan': 'SUSHIHAN',
        'u$h|han': 'SUSHIHAN', 'u $ h | h a n': 'SUSHIHAN', 'tea co': 'TEA CO.', 'teaco': 'TEA CO.'
    }

    for keyword, brand in brand_keywords.items():
        if keyword.replace(" ", "").replace(".", "") in full_combined.replace(" ", "").replace(".", ""):
            return brand

    for i, line in enumerate(lines_cleaned[:5]):
        if i != 0 and any(bad in line for bad in blacklist): continue
        if not any(char.isdigit() for char in line) and len(line) >= 5:
            return line.title()
    return None

def extract_receipt(image_path):
    results = reader.readtext(image_path)
    ocr_texts = [text.lower() for (_, text, _) in results]
    ocr_lines_raw = [text.strip() for (_, text, _) in results]

    grand_total_amount = None
    total_candidates = []

    lines = [line.strip().lower().replace(" ", "") for line in ocr_lines_raw]
    exclude_keywords = ['saving', 'cash', 'change', 'points', 'qty', 'subtotal', 'item', 'unit', 'original', 'price', 'discount']
    include_keywords = ['total', 'grand total', 'total amount', 'amount due']
    total_keywords = ['grandtotal', 'total', 'totalamount', 'amountdue']

    for i in range(len(lines) - 2):
        line1 = lines[i]
        line2 = lines[i + 1]
        line3 = lines[i + 2]
        combined = line1 + line2

        if any(k in combined for k in total_keywords) and not any(b in combined for b in exclude_keywords):
            if re.match(r'^[rm]*\d{1,5}[.,]\d{2}$', line3):
                try:
                    value = float(re.findall(r'[\d.,]+', line3)[0].replace(",", "."))
                    if 0.5 <= value <= 1000:
                        total_candidates.append((i + 2, value))
                except:
                    continue

    for i in range(len(lines) - 1):
        current = lines[i]
        next_line = lines[i + 1]

        if any(k in current for k in total_keywords) and not any(b in current for b in exclude_keywords):
            if re.match(r'^[rm\s]*\d{1,5}[.,]\d{2}$', next_line):
                try:
                    value = float(next_line.replace("rm", "").replace(",", ".").strip())
                    if 0.5 <= value <= 1000:
                        total_candidates.append((i + 1, value))
                except:
                    continue

            inline_match = re.search(r'(total|amount due)[:\s]*rm?\s*([\d.,]+)', current)
            if inline_match:
                try:
                    value = float(inline_match.group(2).replace(",", "."))
                    if 0.5 <= value <= 1000:
                        total_candidates.append((i, value))
                except:
                    continue

            if re.match(r'^[rm\s]*\d{1,5}[.,]\d{2}$', current) and any(k in next_line for k in total_keywords):
                if not any(b in next_line for b in exclude_keywords):
                    try:
                        value = float(current.replace("rm", "").replace(",", ".").strip())
                        if 0.5 <= value <= 1000:
                            total_candidates.append((i, value))
                    except:
                        continue

    if total_candidates:
        total_candidates.sort(key=lambda x: x[0], reverse=True)
        grand_total_amount = total_candidates[0][1]

    amounts = []

    for i, (_, text, _) in enumerate(results):
        lower = text.lower().strip()
        if any(x in lower for x in exclude_keywords):
            continue

        if any(k in lower for k in include_keywords) and not any(x in lower for x in exclude_keywords):
            found = re.findall(r'\d{1,5}[.,]\d{2}', text)
            if not found and i + 1 < len(results):
                next_text = results[i + 1][1]
                found = re.findall(r'\d{1,5}[.,]\d{2}', next_text)
            for f in found:
                try:
                    value = float(f.replace(",", ".").replace(" ", ""))
                    if 0.5 <= value <= 1000:
                        grand_total_amount = value
                        break
                except:
                    continue

        if grand_total_amount:
            break

    for i in range(len(results) - 1):
        line = results[i][1].lower()
        if 'total' in line and i + 2 < len(results):
            num1 = results[i+1][1].strip().replace(",", ".")
            num2 = results[i+2][1].strip().replace(",", ".")
            if re.match(r'^\d{1,3}$', num1) and re.match(r'^\d{2}$', num2):
                try:
                    combined = float(f"{int(num1)}.{int(num2)}")
                    if 0.5 <= combined <= 500:
                        grand_total_amount = combined
                        break
                except:
                    continue

    if not grand_total_amount:
        for (_, text, _) in results:
            if any(x in text.lower() for x in exclude_keywords):
                continue
            found = re.findall(r'\d{1,5}[.,]\d{2}', text)
            for f in found:
                try:
                    value = float(f.replace(",", "."))
                    if 0.5 <= value <= 1000:
                        amounts.append(value)
                except:
                    continue

    if not grand_total_amount and amounts:
        grand_total_amount = max(amounts)

    if not grand_total_amount:
        for i in range(len(results) - 1):
            current = results[i][1].lower().strip()
            next_line = results[i + 1][1].strip()

            if any(x in current for x in exclude_keywords):
                continue

            if re.match(r'^rm$', current) and re.match(r'^\d{1,5}(\.\d{1,2})?$', next_line):
                try:
                    value = float(next_line)
                    if 0.5 <= value <= 1000:
                        grand_total_amount = value
                        break
                except:
                    continue

            if re.match(r'^\d{1,5}(\.\d{1,2})?$', current) and re.match(r'^rm$', next_line.lower()):
                try:
                    value = float(current)
                    if 0.5 <= value <= 1000:
                        grand_total_amount = value
                        break
                except:
                    continue

    if grand_total_amount and grand_total_amount > 100:
        for i in range(len(ocr_lines_raw) - 1):
            line = ocr_lines_raw[i].lower()
            if "sub" in line and "total" in line:
                try:
                    suspicious_value = float(ocr_lines_raw[i + 1].replace(",", ".").replace("rm", "").replace("rl", "").strip())
                    if suspicious_value > 100:
                        for j in range(i + 2, len(ocr_lines_raw)):
                            next_line = ocr_lines_raw[j].lower()
                            next_matches = re.findall(r'\d{1,5}[.,]\d{2}', next_line)
                            for match in next_matches:
                                try:
                                    corrected = float(match.replace(",", "."))
                                    if 0.5 <= corrected <= 100:
                                        print(f"Final Fix: Overwritten grand total {grand_total_amount:.2f} → {corrected:.2f}")
                                        grand_total_amount = corrected
                                        break
                                except:
                                    continue
                        break
                except:
                    continue

    date_found = None
    for line in ocr_texts:
        if 'date' in line:
            match = re.search(r'(\d{4})[/-](\d{1,2})[/-](\d{1,2})', line)
            if not match:
                match = re.search(r'(\d{1,2})[/-](\d{1,2})[/-](\d{4})', line)
            if match:
                y, m, d = match.groups() if len(match.groups()[0]) == 4 else (match.groups()[2], match.groups()[1], match.groups()[0])
                try:
                    year = int(y)
                    if 2000 <= year <= datetime.now().year + 2:
                        date_found = f"{year:04d}-{int(m):02d}-{int(d):02d}"
                        break
                    elif 2195 <= year <= 2210:
                        date_found = f"2025-{int(m):02d}-{int(d):02d}"
                        break
                except:
                    continue

    if not date_found:
        for line in ocr_texts:
            match = re.search(r'(\d{1,2})[/-](\d{1,2})[/-](\d{2,4})', line)
            if match:
                d, m, y = match.groups()
                if len(y) == 2:
                    y = '20' + y
                try:
                    year = int(y)
                    if 2000 <= year <= datetime.now().year + 2:
                        date_found = f"{year:04d}-{int(m):02d}-{int(d):02d}"
                        break
                    elif 2195 <= year <= 2210:
                        date_found = f"2025-{int(m):02d}-{int(d):02d}"
                        break
                except:
                    continue

    if not date_found:
      for line in ocr_texts:
        match = re.search(r'(\d{1,2})[/-](Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*[/-](\d{2,4})', line, re.IGNORECASE)
        if match:
            d, mon, y = match.groups()
            mon_map = {
                'jan': '01', 'feb': '02', 'mar': '03', 'apr': '04', 'may': '05', 'jun': '06',
                'jul': '07', 'aug': '08', 'sep': '09', 'oct': '10', 'nov': '11', 'dec': '12'
            }
            mon_num = mon_map.get(mon[:3].lower())
            if mon_num:
                if len(y) == 2:
                    y = '20' + y
                try:
                    date_found = f"{int(y):04d}-{mon_num}-{int(d):02d}"
                    break
                except:
                    continue

    store_name = extract_store_name(ocr_lines_raw)
    if store_name:
        store_name = store_name.upper()
        if any(bad in store_name for bad in ['D. .Y', 'D .Y', 'MR D', 'MR. D']):
            store_name = 'MR. D.I.Y'
        elif 'DIY' in store_name.replace(" ", ""):
            store_name = 'MR. D.I.Y'
        store_name = (store_name.replace('(EN)', '(EM)')
                                 .replace('(En)', '(EM)')
                                 .replace('SDN,BHD', 'SDN. BHD')
                                 .replace('SDN.BHD.', 'SDN. BHD')
                                 .replace('BHD_', 'BHD'))
        if 'SDN' in store_name and 'BHD' not in store_name:
            store_name += ' BHD'
    return store_name or "", date_found or "", f"{grand_total_amount:.2f}" if grand_total_amount else ""

def save_record(name, date, amount):
    try:
        parsed_date = pd.to_datetime(date, format='%d/%m/%Y', errors='coerce')
        if pd.isna(parsed_date):
            parsed_date = pd.to_datetime(date, format='%Y-%m-%d', errors='coerce')
        if not pd.isna(parsed_date):
            date = parsed_date.strftime('%Y-%m-%d')
    except:
        pass
    csv_path = "/content/receipt_records.csv"
    if os.path.exists(csv_path):
        df = pd.read_csv(csv_path)
    else:
        df = pd.DataFrame(columns=["Store Name", "Date", "Amount (RM)"])
    new_entry = {"Store Name": name, "Date": date, "Amount (RM)": f"{float(amount):.2f}" if amount else ""}
    df = pd.concat([df, pd.DataFrame([new_entry])], ignore_index=True)
    df.to_csv(csv_path, index=False)
    return "✅ Record saved successfully!"

def load_records():
    path = "/content/receipt_records.csv"
    if os.path.exists(path):
        df = pd.read_csv(path)
        df = df.loc[:, ~df.columns.str.lower().isin(["index", "unnamed: 0", "Index", "Index.1"])]
        df["Date"] = df["Date"].astype(str).str.strip()
        df["Amount (RM)"] = pd.to_numeric(df["Amount (RM)"], errors="coerce").map(lambda x: f"{x:.2f}" if pd.notna(x) else "")
        df.insert(0, "Index", range(len(df)))
        return df
    return pd.DataFrame(columns=["Index", "Store Name", "Date", "Amount (RM)"])

def delete_record(index_to_delete):
    df = load_records()
    if 0 <= index_to_delete < len(df):
        df = df.drop(index=index_to_delete).reset_index(drop=True)
        df.to_csv("/content/receipt_records.csv", index=False)
    return load_records()

def analyze_summary():
    df = load_records()
    if df.empty:
        return "⚠️ No data available for analysis"
    df["Date"] = pd.to_datetime(df["Date"], dayfirst=True, errors='coerce')
    df["Amount (RM)"] = pd.to_numeric(df["Amount (RM)"], errors='coerce')
    total = df["Amount (RM)"].sum()
    avg = df["Amount (RM)"].mean()
    max_ = df["Amount (RM)"].max()
    min_ = df["Amount (RM)"].min()
    return f"📊 Expense Summary\n\n💰 Total Expense：RM {total:.2f}\n📉 Average Spending：RM {avg:.2f}\n🔺 Highest Expense：RM {max_:.2f}\n🔻 Lowest Expense：RM {min_:.2f}"

def analyze_monthly_table():
    df = load_records()
    if df.empty or "Date" not in df or "Amount (RM)" not in df:
        return pd.DataFrame(columns=["Month", "Amount (RM)"])

    df["Date"] = pd.to_datetime(df["Date"], errors='coerce')
    df["Amount (RM)"] = pd.to_numeric(df["Amount (RM)"], errors='coerce')

    df = df.dropna(subset=["Date", "Amount (RM)"])
    df["Month"] = df["Date"].dt.to_period("M")
    monthly = df.groupby("Month")["Amount (RM)"].sum().reset_index()
    monthly["Month"] = monthly["Month"].astype(str)
    monthly["Amount (RM)"] = monthly["Amount (RM)"].map(lambda x: f"{x:.2f}")
    return monthly

def analyze_monthly_plot():
    df = load_records()
    if df.empty or "Date" not in df or "Amount (RM)" not in df:
        return None

    df["Date"] = pd.to_datetime(df["Date"], errors='coerce')
    df["Amount (RM)"] = pd.to_numeric(df["Amount (RM)"], errors='coerce')
    df = df.dropna(subset=["Date", "Amount (RM)"])

    df["Month"] = df["Date"].dt.to_period("M")
    monthly = df.groupby("Month")["Amount (RM)"].sum().reset_index()
    monthly["Month"] = monthly["Month"].astype(str)

    plt.figure(figsize=(8,4))
    plt.plot(monthly["Month"], monthly["Amount (RM)"], marker='o')
    plt.title("Monthly Expense Trend Chart")
    plt.xlabel("Month")
    plt.ylabel("Total Expenses (RM)")
    plt.xticks(rotation=45)
    plt.tight_layout()
    return plt

with gr.Blocks(title="🧾 Personal Expense Management System") as demo:
      gr.Markdown("## 🧾 Personal Expense Management System")

      with gr.Tab("📸 Upload and Recognize Receipt"):
        with gr.Row():
            with gr.Column():
                image_input = gr.Image(type="filepath", label="Upload Receipt Image")
                extract_btn = gr.Button("📤 Extract Information")
            with gr.Column():
                name = gr.Textbox(label="🏪 Store Name")
                date = gr.Textbox(label="📅 Date")
                amount = gr.Textbox(label="💰 Total Amount (RM)")
                save_btn = gr.Button("✅ Confirm and Save Record")
                save_result = gr.Textbox(label="", interactive=False)
        extract_btn.click(fn=extract_receipt, inputs=image_input, outputs=[name, date, amount])
        save_btn.click(fn=save_record, inputs=[name, date, amount], outputs=save_result)

      with gr.Tab("📜 View Expense Records"):
          refresh_btn = gr.Button("🔄 Refresh Records")
          record_table = gr.Dataframe(interactive=False)
          with gr.Row():
            delete_index = gr.Number(label="🗑️ Enter Row Index to Delete (starting from 0)", precision=0)
            delete_btn = gr.Button("Delete Selected Record")
            refresh_btn.click(fn=load_records, outputs=record_table)
            delete_btn.click(fn=delete_record, inputs=delete_index, outputs=record_table)

      with gr.Tab("📅 Monthly Expense Table"):
          monthly_table = gr.Dataframe(label="📅 Monthly Total Expenses")
          monthly_btn = gr.Button("📥 Generate Table")
          monthly_btn.click(fn=analyze_monthly_table, outputs=monthly_table)

      with gr.Tab("📈 Monthly Expense Trend"):
          trend_plot = gr.Plot(label="📈 Monthly Expense Trend Chart")
          trend_btn = gr.Button("📊 Generate Chart")
          trend_btn.click(fn=analyze_monthly_plot, outputs=trend_plot)

      with gr.Tab("📊 Expense Summary"):
            summary_box = gr.Textbox(lines=8, label="📊 Expense Summary")
            summary_btn = gr.Button("🔍 Analyze All Records")
            summary_btn.click(fn=analyze_summary, outputs=summary_box)

            image_input.change(
                fn=lambda x: ("", "", "", "") if x is None else gr.update(),
                inputs=image_input,
                outputs=[name, date, amount, save_result]
                )

demo.launch(debug=True)

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.1/54.1 MB[0m [31m14.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m322.9/322.9 kB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.9/2.9 MB[0m [31m58.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m95.2/95.2 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.5/11.5 MB[0m [31m58.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.0/72.0 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m97.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━



Progress: |██████████████████████████████████████████████████| 100.0% Complete



Progress: |██████████████████████████████████████████████████| 100.0% CompleteIt looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://6775b974691a6e381a.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




Final Fix: Overwritten grand total 233.60 → 28.60
Final Fix: Overwritten grand total 28.60 → 23.60


ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/uvicorn/protocols/http/h11_impl.py", line 403, in run_asgi
    result = await app(  # type: ignore[func-returns-value]
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
    return await self.app(scope, receive, send)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/fastapi/applications.py", line 1054, in __call__
    await super().__call__(scope, receive, send)
  File "/usr/local/lib/python3.11/dist-packages/starlette/applications.py", line 112, in __call__
    await self.middleware_stack(scope, receive, send)
  File "/usr/local/lib/python3.11/dist-packages/starlette/middleware/errors.py", line 187, in __call__
    raise exc
  File "/usr/local/lib/python3.11/dist-packages/starlette/middleware/errors.py",

Final Fix: Overwritten grand total 233.60 → 28.60
Final Fix: Overwritten grand total 28.60 → 23.60




Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://6775b974691a6e381a.gradio.live


