# Code snippet to check if your local machine has GPU


In [1]:
import torch
device = None
if torch.cuda.is_available():
    device = torch.device("cuda")  # Use GPU if available
    print(f"Using GPU: {torch.cuda.get_device_name(device)}")
else:
    device = torch.device("cpu")  # Fallback to CPU
    print("CUDA not available. Using CPU.")

Using GPU: NVIDIA GeForce RTX 3060 Laptop GPU


In [4]:
import subprocess

def execute_command(command: str):
    """
    Executes a command in the command prompt (Windows) or shell (Linux/macOS) and returns the output.

    :param command: The command to execute as a string.
    :return: The command output as a string.
    """
    try:
        result = subprocess.run(command, shell=True, text=True, capture_output=True)
        return result.stdout if result.stdout else result.stderr
    except Exception as e:
        return str(e)


NAME                   ID              SIZE      MODIFIED     
llama3.2-vision:11b    085a1fdae525    7.9 GB    8 hours ago     
deepseek-r1:14b        ea35dfe18182    9.0 GB    17 hours ago    
phi4:latest            ac896e5b8b34    9.1 GB    17 hours ago    



In [6]:
import requests
import socket
from ollama_ocr import OCRProcessor

def get_local_machine_details():
    """
    Retrieve details about the local machine.
    """
    try:
        hostname = socket.gethostname()
        ip_address = socket.gethostbyname(hostname)
        print(f"üìü Local Machine Name: {hostname}")
        print(f"üåê Local IP Address: {ip_address}")
        return hostname, ip_address
    except Exception as e:
        print(f"‚ùå Error retrieving local machine details: {e}")
        return None, None

def process_image_with_ollama(image_path, model_name="llama3.2-vision:11b"):
    """
    Process an image using the OCRProcessor after verifying the Ollama server.
    """
    # Retrieve local machine details
    get_local_machine_details()

    try:
        # Initialize OCRProcessor
        print(f"üöÄ Using model: {model_name}")
        ocr = OCRProcessor(model_name=model_name)

        # Process the image
        result = ocr.process_image(
            image_path=image_path,
            format_type="key_value",  # Output format: markdown
            preprocess=True          # Preprocess the image
        )
        print("üìù OCR Result:")
        print(result)
        return  result

    except requests.exceptions.ConnectionError:
        print("‚ùå Could not connect to the Ollama server. Please ensure it is running and accessible.")
    except Exception as e:
        print(f"‚ùå An error occurred during image processing: {e}")

# Run the enhanced code
result = process_image_with_ollama("cstcbill.png")


üìü Local Machine Name: SriluRoop
üåê Local IP Address: 100.64.52.177
üöÄ Using model: llama3.2-vision:11b
üìù OCR Result:
Error processing image: 404 Client Error: Not Found for url: http://localhost:11434/api/generate


In [8]:
import json

# Raw OCR result
ocr_result = result

# Parsing the OCR result
def parse_ocr_result(ocr_text):
    # Split lines and initialize data structure
    lines = ocr_text.strip().split("\n")
    data = {
        "store": "",
        "location": "",
        "subtotal_tax": "",
        "approved_purchase_amount": "",
        "visa": "",
        "chip_read": "",
        "member_id": "",
        "items": []
    }

    # Loop through lines to parse data
    for line in lines:
        if "**COSTCO WHOLESALE**" in line:
            parts = line.split(": ")
            data["store"] = "COSTCO WHOLESALE"
            data["location"] = parts[1].strip()
        elif "**SUBTOTAL TAX**" in line:
            data["subtotal_tax"] = line.split(": ")[1].strip()
        elif "**APPROVED PURCHASE AMOUNT**" in line:
            data["approved_purchase_amount"] = line.split(": ")[1].strip()
        elif "**VISA**" in line:
            data["visa"] = line.split(": ")[1].strip()
        elif "**CHIP READ**" in line:
            data["chip_read"] = line.split(": ")[1].strip()
        elif "**MEMBER ID**" in line:
            data["member_id"] = line.split(": ")[1].strip()
        elif "+ " in line:
            # Parse items
            item_parts = line.split(": ")
            item_name = item_parts[0].strip("+ ").strip()
            item_price = item_parts[1].strip()
            data["items"].append({"name": item_name, "price": item_price})

    return data

# Parse the OCR result
parsed_data = parse_ocr_result(ocr_result)

# Save to JSON file
with open("costco_bill.json", "w") as json_file:
    json.dump(parsed_data, json_file, indent=4)

print("Parsed data saved to costco_bill.json")


Parsed data saved to costco_bill.json
