## DETECT OBJECTS

In [45]:
from inference_sdk import InferenceHTTPClient
from PIL import Image, ImageDraw
import pytesseract
import re

IMAGE_PATH = "image2.png"
MODEL_ID = "invoice-processing--githib/2"
API_KEY = "I9NtOOtiqQm0MWTk2dvN"
API_URL = "https://detect.roboflow.com"

CLIENT = InferenceHTTPClient(
    api_url=API_URL,
    api_key=API_KEY
)

response = CLIENT.infer(IMAGE_PATH, model_id=MODEL_ID)

print(response)

{'inference_id': '4d0fbf82-3b99-4d10-ba80-7ac59b0f980a', 'time': 0.018174281000028714, 'image': {'width': 750, 'height': 1061}, 'predictions': [{'x': 378.5, 'y': 461.5, 'width': 665.0, 'height': 255.0, 'confidence': 0.9568327069282532, 'class': 'table', 'class_id': 1, 'detection_id': 'a666be8c-2446-4fe4-a3be-422bad2da396'}, {'x': 144.0, 'y': 72.0, 'width': 178.0, 'height': 64.0, 'confidence': 0.9423655867576599, 'class': 'paragraph', 'class_id': 0, 'detection_id': '132374cc-6001-462a-b209-0664d282aa63'}, {'x': 577.0, 'y': 255.0, 'width': 242.0, 'height': 110.0, 'confidence': 0.9342994689941406, 'class': 'paragraph', 'class_id': 0, 'detection_id': '6bcf7400-cbc0-47fa-bac0-e2e2f1aa20fd'}, {'x': 112.5, 'y': 155.5, 'width': 113.0, 'height': 63.0, 'confidence': 0.9314045906066895, 'class': 'paragraph', 'class_id': 0, 'detection_id': 'e02680e3-5580-4cab-b7a5-9f0db8f667da'}, {'x': 323.0, 'y': 247.0, 'width': 128.0, 'height': 90.0, 'confidence': 0.9209174513816833, 'class': 'paragraph', 'class

## VISUALIZE

In [46]:
# Function to visualize the image with bounding boxes
def visualize_image_with_boxes(image, response):
    draw = ImageDraw.Draw(image)  # Create a drawing context

    for prediction in response['predictions']:
        x, y = prediction['x'], prediction['y']
        width, height = prediction['width'], prediction['height']
        label = prediction['class']

        # Calculate bounding box coordinates
        x0 = int(x - width / 2)
        y0 = int(y - height / 2)
        x1 = int(x + width / 2)
        y1 = int(y + height / 2)

        # Draw the bounding box (rectangle) around the detected object
        draw.rectangle([x0, y0, x1, y1], outline='red', width=3)
        draw.text((x0, y0 - 10), label, fill='red')  # Optionally display the label

    # Show the image with bounding boxes
    image.show()



image = Image.open(IMAGE_PATH)
visualize_image_with_boxes(image, response)

## EXTRACT TEXTS

In [47]:
# Define regex patterns for specific fields
patterns = {
    'invoice_number': r"Invoice No[: ]?\s*([A-Za-z0-9\-]+)",
    'gstin': r"GSTIN[: ]?\s*([0-9A-Z]{15})",
    'date': r"Date[: ]?\s*([\d]{2}/[\d]{2}/[\d]{4})",  # Match date format DD/MM/YYYY
    'amount': r"Total[: ]?\s*([₹$]?\d{1,3}(?:,\d{3})*(?:\.\d{2})?)"  # Match amounts like ₹15,345.00 or $1,000.00
}

# List to store all extracted data
extracted_data = {}

# Function to perform OCR and extract specific fields using regex
def extract_data_with_regex(response, patterns, image):
    texts = []  # List to store extracted texts
    for prediction in response['predictions']:
        x, y = prediction['x'], prediction['y']
        width, height = prediction['width'], prediction['height']
        label = prediction['class']

        # Calculate bounding box coordinates
        x0 = int(x - width / 2)
        y0 = int(y - height / 2)
        x1 = int(x + width / 2)
        y1 = int(y + height / 2)

        # Crop the region of interest (ROI)
        roi = image.crop((x0, y0, x1, y1))

        # Perform OCR on the cropped region
        extracted_text = pytesseract.image_to_string(roi, lang='eng')  # Change 'eng' if using other languages
        texts.append({'class': label, 'bounding_box': (x0, y0, x1, y1), 'text': extracted_text.strip()})

        # Extract specific data using regex patterns
        for key, pattern in patterns.items():
            match = re.search(pattern, extracted_text)
            if match:
                extracted_data[key] = match.group(1)
                # Print extracted data here
                print(f"{key}: {match.group(1)}")

    return texts


texts = extract_data_with_regex(response, patterns, image)
for text in texts:
    print(f"Class: {text['class']}, Bounding Box: {text['bounding_box']}")
    print(f"Extracted Text:\n{text['text']}")
    print("-" * 50)

Class: table, Bounding Box: (46, 334, 711, 589)
Extracted Text:
DESCRIPTION UNIT PRICE AMOUNT

Frontend design restructure 9,999.00 9,999.00
Custom icon package 975.00 41,950.00
Gandhi mouse pad 99.00 297.00
Subtotal 12,246.00

GST 12.0% 1,469.52

TOTAL 13,715.52
--------------------------------------------------
Class: paragraph, Bounding Box: (55, 40, 233, 104)
Extracted Text:

--------------------------------------------------
Class: paragraph, Bounding Box: (456, 200, 698, 310)
Extracted Text:

--------------------------------------------------
Class: paragraph, Bounding Box: (56, 124, 169, 187)
Extracted Text:
paffron Design

17 Namrata Bldg
Delhi, Delhi 400077
--------------------------------------------------
Class: paragraph, Bounding Box: (259, 202, 387, 292)
Extracted Text:

--------------------------------------------------
Class: paragraph, Bounding Box: (606, 44, 700, 141)
Extracted Text:

--------------------------------------------------
Class: paragraph, Bounding Box: (