## DETECT OBJECTS

In [1]:
from inference_sdk import InferenceHTTPClient
from PIL import Image, ImageDraw
import pytesseract
import re

IMAGE_PATH = "image2.png"
MODEL_ID = "invoice-processing--githib/2"
API_KEY = "I9NtOOtiqQm0MWTk2dvN"
API_URL = "https://detect.roboflow.com"

CLIENT = InferenceHTTPClient(
    api_url=API_URL,
    api_key=API_KEY
)

response = CLIENT.infer(IMAGE_PATH, model_id=MODEL_ID)

print(response)

{'inference_id': 'd96c5282-f7b7-4b4a-bf4d-46c2be85723d', 'time': 0.017839892000665714, 'image': {'width': 750, 'height': 1061}, 'predictions': [{'x': 378.5, 'y': 461.5, 'width': 665.0, 'height': 255.0, 'confidence': 0.9568327069282532, 'class': 'table', 'class_id': 1, 'detection_id': '0df36607-245c-4b5c-ac5d-dbf54c6d3525'}, {'x': 144.0, 'y': 72.0, 'width': 178.0, 'height': 64.0, 'confidence': 0.9423655867576599, 'class': 'paragraph', 'class_id': 0, 'detection_id': '3d65625b-152c-447f-bebe-9e3e831e98f7'}, {'x': 577.0, 'y': 255.0, 'width': 242.0, 'height': 110.0, 'confidence': 0.9342994689941406, 'class': 'paragraph', 'class_id': 0, 'detection_id': '37dd277a-f0a9-4dad-ac02-70ba1dd23940'}, {'x': 112.5, 'y': 155.5, 'width': 113.0, 'height': 63.0, 'confidence': 0.9314045906066895, 'class': 'paragraph', 'class_id': 0, 'detection_id': 'a4e29a0a-71bd-4ff1-bc67-b376cb055c12'}, {'x': 323.0, 'y': 247.0, 'width': 128.0, 'height': 90.0, 'confidence': 0.9209174513816833, 'class': 'paragraph', 'class

## VISUALIZE

In [10]:
response["predictions"]

[{'x': 378.5,
  'y': 461.5,
  'width': 665.0,
  'height': 255.0,
  'confidence': 0.9568327069282532,
  'class': 'table',
  'class_id': 1,
  'detection_id': '0df36607-245c-4b5c-ac5d-dbf54c6d3525'},
 {'x': 144.0,
  'y': 72.0,
  'width': 178.0,
  'height': 64.0,
  'confidence': 0.9423655867576599,
  'class': 'paragraph',
  'class_id': 0,
  'detection_id': '3d65625b-152c-447f-bebe-9e3e831e98f7'},
 {'x': 577.0,
  'y': 255.0,
  'width': 242.0,
  'height': 110.0,
  'confidence': 0.9342994689941406,
  'class': 'paragraph',
  'class_id': 0,
  'detection_id': '37dd277a-f0a9-4dad-ac02-70ba1dd23940'},
 {'x': 112.5,
  'y': 155.5,
  'width': 113.0,
  'height': 63.0,
  'confidence': 0.9314045906066895,
  'class': 'paragraph',
  'class_id': 0,
  'detection_id': 'a4e29a0a-71bd-4ff1-bc67-b376cb055c12'},
 {'x': 323.0,
  'y': 247.0,
  'width': 128.0,
  'height': 90.0,
  'confidence': 0.9209174513816833,
  'class': 'paragraph',
  'class_id': 0,
  'detection_id': 'bd0ff91e-29b9-4bc5-80c5-636d9f0f1040'},
 {'

In [20]:
# Function to visualize the image with bounding boxes
def visualize_image_with_boxes(image, response):
    draw = ImageDraw.Draw(image)  # Create a drawing context

    for prediction in response['predictions']:
        x, y = prediction['x'], prediction['y']
        width, height = prediction['width'], prediction['height']
        label = prediction['class']

        # Calculate bounding box coordinates
        padding = 0.05    # adding padding since the ocr was missing a few characters sometimes so extending the dimensions of the box
        x0 = int(x - width / 2 - width*padding)
        y0 = int(y - height / 2 - height*padding)
        x1 = int(x + width / 2 + width*padding)
        y1 = int(y + height / 2 + height*padding)

        # Draw the bounding box (rectangle) around the detected object
        # red if paragraph, yellow if table
        draw.rectangle([x0, y0, x1, y1], outline='red' if label=="paragraph" else "light-green", width=3)
        draw.text((x0, y0 - 10), label, fill='red' if label=="paragraph" else "yellow")  

    # Show the image with bounding boxes
    image.show()



image = Image.open(IMAGE_PATH)
visualize_image_with_boxes(image, response)

ValueError: unknown color specifier: 'light-green'

## EXTRACT TEXTS

In [13]:
# Define regex patterns for specific fields
patterns = {
    'invoice_number': r"Invoice No[: ]?\s*([A-Za-z0-9\-]+)",
    'gstin': r"GSTIN[: ]?\s*([0-9A-Z]{15})",
    'date': r"Date[: ]?\s*([\d]{2}/[\d]{2}/[\d]{4})",  # Match date format DD/MM/YYYY
    'amount': r"Total[: ]?\s*([₹$]?\d{1,3}(?:,\d{3})*(?:\.\d{2})?)"  # Match amounts like ₹15,345.00 or $1,000.00
}

# List to store all extracted data
extracted_data = {}

# Function to perform OCR and extract specific fields using regex
def extract_data_with_regex(response, patterns, image):
    texts = []  # List to store extracted texts
    for prediction in response['predictions']:
        x, y = prediction['x'], prediction['y']
        width, height = prediction['width'], prediction['height']
        label = prediction['class']

        # Calculate bounding box coordinates
        padding = 0.05    # adding padding since the ocr was missing a few characters sometimes so extending the dimensions of the box
        x0 = int(x - width / 2 - width*padding)
        y0 = int(y - height / 2 - height*padding)
        x1 = int(x + width / 2 + width*padding)
        y1 = int(y + height / 2 + height*padding)

        # Crop the region of interest (ROI)
        roi = image.crop((x0, y0, x1, y1))

        # Perform OCR on the cropped region
        extracted_text = pytesseract.image_to_string(roi, lang='eng')  # Change 'eng' if using other languages
        texts.append({'class': label, 'bounding_box': (x0, y0, x1, y1), 'text': extracted_text.strip()})

        # Extract specific data using regex patterns
        for key, pattern in patterns.items():
            match = re.search(pattern, extracted_text)
            if match:
                extracted_data[key] = match.group(1)
                # Print extracted data here
                print(f"{key}: {match.group(1)}")

    return texts


texts = extract_data_with_regex(response, patterns, image)
for text in texts:
    print(f"Class: {text['class']}, Bounding Box: {text['bounding_box']}")
    print(f"Extracted Text:\n{text['text']}")
    print("-" * 50)

Class: table, Bounding Box: (12, 321, 744, 601)
Extracted Text:
DESCRIPTION UNIT PRICE AMOUNT

Frontend design restructure 9,999.00 9,999.00
Custom icon package 975.00 41,950.00
Gandhi mouse pad 99.00 297.00
Subtotal 12,246.00

GST 12.0% 1,469.52

TOTAL 13,715.52
--------------------------------------------------
Class: paragraph, Bounding Box: (46, 36, 241, 107)
Extracted Text:

--------------------------------------------------
Class: paragraph, Bounding Box: (443, 194, 710, 315)
Extracted Text:
INVOICE # 1N-001
INVOICE DATE 29/01/2019

POE 2430/2019
DUE DATE 26/04/2019
--------------------------------------------------
Class: paragraph, Bounding Box: (50, 120, 174, 190)
Extracted Text:

--------------------------------------------------
Class: paragraph, Bounding Box: (252, 197, 393, 296)
Extracted Text:
SHIPTO

Kavindra Mannan

264, Abdul Rehman
Mumbai, Bihar 40009
--------------------------------------------------
Class: paragraph, Bounding Box: (601, 39, 704, 145)
Extracted Text: