In [1]:
import torch
device = None
if torch.cuda.is_available():
    device = torch.device("cuda")  # Use GPU if available
    print(f"Using GPU: {torch.cuda.get_device_name(device)}")
else:
    device = torch.device("cpu")  # Fallback to CPU
    print("CUDA not available. Using CPU.")

Using GPU: NVIDIA GeForce RTX 3060 Laptop GPU


In [None]:
from ollama_ocr import OCRProcessor

# Initialize OCR processor
ocr = OCRProcessor(model_name='llama3.2-vision:11b')  # You can use any vision model available on Ollama
# Process an image
result = ocr.process_image(
    image_path="cstcbill.png",
    format_type="markdown" , # Options: markdown, text, json, structured, key_value
    preprocess=True
)
print(result)


**Receipt Header**
---------------

Costco Wholesale
Dallas, TX 75251

**Store Information**

* Store Name: Dallas #1266
* Address: 8055 Churchill Way, Dallas, TX 75251

**Receipt Details**

* Date: 08/10/2023
* Time: 17:46
* Amount: $81.83

**Items Purchased**

* Vita Coco 1 (1218891) - $20.99
	+ Member Number: 111974448781
* Hadley Orgn (1168335) - $8.99
	+ Price: $8.99
* Organic Gre (1059995) - $7.99
	+ Price: $7.99
* Seedless WA (4032) - $5.99
	+ Price: $5.99
* Vita Coco 1 (1218891) - $20.99
	+ Member Number: 111974448781

**Subtotal**, **Taxes**, and **Total**

* Subtotal: $81.83
* Tax (8%): $6.55
* Total: $88.38

**Payment Method**

* Payment Type: Visa
* Credit Card Number: 1234-5678-9012-3456
* Expiration Date: 12/2025
* Security Code: 987

**Approval Status**

* Approved - Purchase


In [7]:
from ollama_ocr import OCRProcessor
import torch

# Check if CUDA is available
if torch.cuda.is_available():
    print(f"Using GPU: {torch.cuda.get_device_name(torch.cuda.current_device())}")
else:
    print("CUDA not available. The code will use CPU.")

# Initialize OCR processor (no explicit device configuration)
try:
    ocr = OCRProcessor(
        model_name='llama3.2-vision:11b'  # Model initialization
    )
    print("OCR Processor initialized successfully.")
except Exception as e:
    print(f"Error initializing OCR Processor: {e}")

# Process an image
try:
    result = ocr.process_image(
        image_path="cstcbill.png",
        format_type="key_value",  # Options: markdown, text, json, structured, key_value
        preprocess=True
    )
    print("OCR Result:")
    print(result)
except Exception as e:
    print(f"Error during OCR processing: {e}")


Using GPU: NVIDIA GeForce RTX 3060 Laptop GPU
OCR Processor initialized successfully.
OCR Result:
Here is the extracted text:

* **COSTCO WHOLESALE**: DALLAS #1266, 8055 CHURCHILL WAY, DALLAS, TX 75251
* **SUBTOTAL TAX**: $81.83
* **APPROVED PURCHASE AMOUNT**: $81.83
* **VISA**: 08/10/2023, 17:46, 12669396
* **CHIP READ**: None
* **MEMBER ID**: 111974448781
* **ITEMS**:
	+ VITA COCO 1: $20.99
	+ HADLEY ORGANIC: $8.99
	+ ORGANIC COCONUT: $7.89
	+ SEEDLESS WATERMELON: $5.99
	+ VITA COCO 1: $20.99
	+ ORGANIC GREEN TEA: $7.99
	+ KANSAS ORGANIC WHOLE: $8.99

Please note that the "CHIP READ" field is blank, indicating no chip read data was captured.


In [8]:
import json

# Raw OCR result
ocr_result = result

# Parsing the OCR result
def parse_ocr_result(ocr_text):
    # Split lines and initialize data structure
    lines = ocr_text.strip().split("\n")
    data = {
        "store": "",
        "location": "",
        "subtotal_tax": "",
        "approved_purchase_amount": "",
        "visa": "",
        "chip_read": "",
        "member_id": "",
        "items": []
    }

    # Loop through lines to parse data
    for line in lines:
        if "**COSTCO WHOLESALE**" in line:
            parts = line.split(": ")
            data["store"] = "COSTCO WHOLESALE"
            data["location"] = parts[1].strip()
        elif "**SUBTOTAL TAX**" in line:
            data["subtotal_tax"] = line.split(": ")[1].strip()
        elif "**APPROVED PURCHASE AMOUNT**" in line:
            data["approved_purchase_amount"] = line.split(": ")[1].strip()
        elif "**VISA**" in line:
            data["visa"] = line.split(": ")[1].strip()
        elif "**CHIP READ**" in line:
            data["chip_read"] = line.split(": ")[1].strip()
        elif "**MEMBER ID**" in line:
            data["member_id"] = line.split(": ")[1].strip()
        elif "+ " in line:
            # Parse items
            item_parts = line.split(": ")
            item_name = item_parts[0].strip("+ ").strip()
            item_price = item_parts[1].strip()
            data["items"].append({"name": item_name, "price": item_price})

    return data

# Parse the OCR result
parsed_data = parse_ocr_result(ocr_result)

# Save to JSON file
with open("costco_bill.json", "w") as json_file:
    json.dump(parsed_data, json_file, indent=4)

print("Parsed data saved to costco_bill.json")


Parsed data saved to costco_bill.json
