In [1]:
pip install paddleocr paddlepaddle flask python-multipart fastapi uvicorn

Collecting paddleocr
  Using cached paddleocr-3.1.0-py3-none-any.whl.metadata (22 kB)
Collecting paddlepaddle
  Using cached paddlepaddle-3.1.0-cp311-cp311-win_amd64.whl.metadata (8.7 kB)
Collecting flask
  Using cached flask-3.1.1-py3-none-any.whl.metadata (3.0 kB)
Collecting fastapi
  Using cached fastapi-0.116.0-py3-none-any.whl.metadata (28 kB)
Collecting uvicorn
  Using cached uvicorn-0.35.0-py3-none-any.whl.metadata (6.5 kB)
Collecting paddlex>=3.1.0 (from paddlex[ie,multimodal,ocr,trans]>=3.1.0->paddleocr)
  Using cached paddlex-3.1.2-py3-none-any.whl.metadata (78 kB)
Collecting httpx (from paddlepaddle)
  Using cached httpx-0.28.1-py3-none-any.whl.metadata (7.1 kB)
Collecting numpy>=1.21 (from paddlepaddle)
  Using cached numpy-2.3.1-cp311-cp311-win_amd64.whl.metadata (60 kB)
Collecting opt-einsum==3.3.0 (from paddlepaddle)
  Using cached opt_einsum-3.3.0-py3-none-any.whl.metadata (6.5 kB)
Collecting networkx (from paddlepaddle)
  Using cached networkx-3.5-py3-none-any.whl.meta

In [1]:
pip install paddleocr==2.6.1 opencv-python

Collecting paddleocr==2.6.1
  Downloading paddleocr-2.6.1.0-py3-none-any.whl.metadata (26 kB)
Collecting opencv-python
  Using cached opencv_python-4.12.0.88-cp37-abi3-win_amd64.whl.metadata (19 kB)
Collecting scikit-image (from paddleocr==2.6.1)
  Downloading scikit_image-0.25.2-cp311-cp311-win_amd64.whl.metadata (14 kB)
Collecting imgaug (from paddleocr==2.6.1)
  Using cached imgaug-0.4.0-py2.py3-none-any.whl.metadata (1.8 kB)
Collecting lmdb (from paddleocr==2.6.1)
  Downloading lmdb-1.7.2-cp311-cp311-win_amd64.whl.metadata (1.3 kB)
Collecting visualdl (from paddleocr==2.6.1)
  Using cached visualdl-2.5.3-py3-none-any.whl.metadata (25 kB)
Collecting rapidfuzz (from paddleocr==2.6.1)
  Downloading rapidfuzz-3.13.0-cp311-cp311-win_amd64.whl.metadata (12 kB)
Collecting cython (from paddleocr==2.6.1)
  Downloading cython-3.1.2-cp311-cp311-win_amd64.whl.metadata (6.0 kB)
Collecting attrdict (from paddleocr==2.6.1)
  Using cached attrdict-2.0.1-py2.py3-none-any.whl.metadata (6.7 kB)
Colle

ERROR: Could not install packages due to an OSError: [WinError 32] The process cannot access the file because it is being used by another process: 'C:\\Users\\User\\AppData\\Local\\Temp\\pip-unpack-2wvlgnjj\\opencv_python_headless-4.12.0.88-cp37-abi3-win_amd64.whl'
Check the permissions.



In [7]:
from flask import Flask, request, jsonify
from paddleocr import PaddleOCR
import tempfile
import os
import cv2
import re

app = Flask(__name__)
ocr = PaddleOCR(use_angle_cls=True, lang='id')  # atau 'en' kalau struk Inggris

# -----------------------
# 🔧 PREPROCESS IMAGE
# -----------------------
def preprocess_image(image_path):
    image = cv2.imread(image_path)

    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    processed_path = image_path.replace('.jpg', '_processed.jpg')
    cv2.imwrite(processed_path, thresh)

    return processed_path

# -----------------------
# 🔎 PARSE TEXT KE JSON STRUK
# -----------------------
def parse_struk(lines):
    result = {
        "tanggal": None,
        "waktu": None,
        "items": [],
        "total": None,
        "tunai": None,
        "kembali": None
    }

    item_pattern = re.compile(r'(.+?)\s+(\d+)\s+(\d{3,})\s+(\d{3,})')
    date_pattern = re.compile(r'(\d{2}.\d{2}.\d{2})[ -]+(\d{2}:\d{2})')
    money_pattern = re.compile(r'(\d{1,3}(?:,\d{3})*|\d+)')

    for line in lines:
        # Tanggal & Waktu
        date_match = date_pattern.search(line)
        if date_match:
            result["tanggal"] = date_match.group(1)
            result["waktu"] = date_match.group(2)

        # Item belanja
        item_match = item_pattern.match(line)
        if item_match:
            nama = item_match.group(1).strip()
            qty = int(item_match.group(2))
            harga = int(item_match.group(3).replace(",", ""))
            subtotal = int(item_match.group(4).replace(",", ""))
            result["items"].append({
                "nama": nama,
                "qty": qty,
                "harga": harga,
                "subtotal": subtotal
            })

        # Total
        if "TOTAL" in line.upper():
            found = money_pattern.findall(line)
            if found:
                result["total"] = int(found[-1].replace(",", ""))

        # Tunai
        if "TUNAI" in line.upper():
            found = money_pattern.findall(line)
            if found:
                result["tunai"] = int(found[-1].replace(",", ""))

        # Kembali
        if "KEMBALI" in line.upper():
            found = money_pattern.findall(line)
            if found:
                result["kembali"] = int(found[-1].replace(",", ""))

    return result

# -----------------------
# 📤 OCR ENDPOINT
# -----------------------
@app.route('/ocr', methods=['POST'])
def ocr_image():
    if 'image' not in request.files:
        return jsonify({"error": "Image file required"}), 400

    file = request.files['image']

    try:
        with tempfile.NamedTemporaryFile(delete=False, suffix='.jpg') as tmp:
            file.save(tmp.name)
            image_path = tmp.name

        processed_path = preprocess_image(image_path)

        result = ocr.ocr(processed_path)

        os.remove(image_path)
        os.remove(processed_path)

        # 🔍 Tampilkan hasil OCR mentah ke terminal
        lines = [line[1][0] for line in result[0]]
        print("=== OCR TEXT LINES ===")
        for line in lines:
            print(line)

        # Parsing hasil ke JSON struk
        parsed = parse_struk(lines)
        return jsonify(parsed)

    except Exception as e:
        return jsonify({"error": "OCR failed", "detail": str(e)}), 500

# -----------------------
# 🚀 RUN SERVER
# -----------------------
if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5001)


  ocr = PaddleOCR(use_angle_cls=True, lang='id')  # atau 'en' kalau struk Inggris
[32mCreating model: ('PP-LCNet_x1_0_doc_ori', None)[0m
[32mUsing official model (PP-LCNet_x1_0_doc_ori), the model files will be automatically downloaded and saved in C:\Users\User\.paddlex\official_models.[0m
[32mCreating model: ('UVDoc', None)[0m
[33mThe model(UVDoc) is not supported to run in MKLDNN mode! Using `paddle` instead![0m
[32mUsing official model (UVDoc), the model files will be automatically downloaded and saved in C:\Users\User\.paddlex\official_models.[0m
[32mCreating model: ('PP-LCNet_x1_0_textline_ori', None)[0m
[32mUsing official model (PP-LCNet_x1_0_textline_ori), the model files will be automatically downloaded and saved in C:\Users\User\.paddlex\official_models.[0m
[32mCreating model: ('PP-OCRv5_server_det', None)[0m
[32mUsing official model (PP-OCRv5_server_det), the model files will be automatically downloaded and saved in C:\Users\User\.paddlex\official_models.[0

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5001
 * Running on http://10.13.231.41:5001
[2025-07-11 10:17:50,391] [    INFO] _internal.py:97 - [33mPress CTRL+C to quit[0m
  result = ocr.ocr(processed_path)
[2025-07-11 10:24:19,480] [    INFO] _internal.py:97 - 127.0.0.1 - - [11/Jul/2025 10:24:19] "POST /ocr HTTP/1.1" 200 -


=== OCR TEXT LINES ===
n
a
o
t
o
e
e
e
e
e
e
i
e
e


In [11]:
from flask import Flask, request, jsonify
from paddleocr import PaddleOCR
import tempfile
import os
import re
import cv2

app = Flask(__name__)
ocr = PaddleOCR(use_textline_orientation=True, lang='id')  # ✅ Tidak pakai cls=True

# 🔧 Preprocessing (opsional)
def preprocess_image(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_COLOR)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 180, 255, cv2.THRESH_BINARY)
    cv2.imwrite(image_path, thresh)
    return image_path

@app.route('/ocr', methods=['POST'])
def ocr_struk():
    if 'image' not in request.files:
        return jsonify({'error': 'No image uploaded'}), 400

    file = request.files['image']
    with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as tmp:
        filepath = tmp.name
        file.save(filepath)
        preprocess_image(filepath)

        result = ocr.ocr(filepath)
        os.remove(filepath)

    if not result:
        return jsonify({'error': 'OCR failed'}), 500

    # Ekstrak teks dari hasil OCR
    text_lines = [line[1][0] for box in result for line in box]
    items = []
    total = 0

    for line in text_lines:
        line = line.strip()

        # 🔍 Parsing item: contoh "2x Indomie Goreng 6.000"
        match = re.match(r'(\d+)x\s+(.+?)\s+([\d.,]+)', line)
        if match:
            qty = int(match.group(1))
            name = match.group(2).strip()
            price_str = match.group(3).replace('.', '').replace(',', '')  # Hapus pemisah ribuan
            try:
                price = int(price_str)
                items.append({"nama": name, "jumlah": qty, "harga": price})
            except ValueError:
                pass  # Abaikan jika format angka tidak valid

        # 🔍 Parsing total
        elif re.search(r'total', line, re.IGNORECASE):
            num = re.findall(r'[\d.,]+', line)
            if num:
                total_str = num[-1].replace('.', '').replace(',', '')
                try:
                    total = int(total_str)
                except ValueError:
                    pass

    return jsonify({
        "total": total,
        "items": items,
        "raw": text_lines  # Debugging hasil OCR mentah
    })

if __name__ == '__main__':
    app.run(debug=True)


[32mCreating model: ('PP-LCNet_x1_0_doc_ori', None)[0m
[32mUsing official model (PP-LCNet_x1_0_doc_ori), the model files will be automatically downloaded and saved in C:\Users\User\.paddlex\official_models.[0m


[32mCreating model: ('UVDoc', None)[0m
[33mThe model(UVDoc) is not supported to run in MKLDNN mode! Using `paddle` instead![0m
[32mUsing official model (UVDoc), the model files will be automatically downloaded and saved in C:\Users\User\.paddlex\official_models.[0m
[32mCreating model: ('PP-LCNet_x1_0_textline_ori', None)[0m
[32mUsing official model (PP-LCNet_x1_0_textline_ori), the model files will be automatically downloaded and saved in C:\Users\User\.paddlex\official_models.[0m
[32mCreating model: ('PP-OCRv5_server_det', None)[0m
[32mUsing official model (PP-OCRv5_server_det), the model files will be automatically downloaded and saved in C:\Users\User\.paddlex\official_models.[0m
[32mCreating model: ('latin_PP-OCRv5_mobile_rec', None)[0m
[32mUsing official model (latin_PP-OCRv5_mobile_rec), the model files will be automatically downloaded and saved in C:\Users\User\.paddlex\official_models.[0m


 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
[2025-07-11 10:28:35,223] [    INFO] _internal.py:97 - [33mPress CTRL+C to quit[0m
[2025-07-11 10:28:35,224] [    INFO] _internal.py:97 -  * Restarting with stat


SystemExit: 1