In [2]:
import cv2
import numpy as np

from openai import OpenAI
import os
import base64

from dotenv import load_dotenv

load_dotenv()
client = OpenAI(api_key=os.getenv("TMG_OpenAI_API"))

def encode_image(image_path):
    _, buffer = cv2.imencode(".jpg", image_path)
    return base64.b64encode(buffer).decode('utf-8')


def readNumber (base64_image):
    response = client.chat.completions.create(
    model="gpt-4o",
    messages=[
        {
        "role": "user",
        "content": [
            {"type": "text", "text": "This image might have a number in it. Can you tell me what number it is and only that number? if there is no number can you respond with no number. Numbers maybe rotated, if the numbers are rotated can you respond with no number?"},
            {
            "type": "image_url",
            "image_url": {
                "url": f"data:image/jpeg;base64,{base64_image}",
            },
            },
        ],
        }
    ],
    max_tokens=300,
    )

    # print(response.choices[0].message.content)
    return (response.choices[0].message.content)


# Function to convert normalized coordinates to pixel coordinates
def normalized_to_pixel_coords(x_center, y_center, width, height, img_width, img_height):
    x_center_pixel = int(x_center * img_width)
    y_center_pixel = int(y_center * img_height)
    width_pixel = int(width * img_width)
    height_pixel = int(height * img_height)
    
    x_min = int(x_center_pixel - width_pixel / 2)
    y_min = int(y_center_pixel - height_pixel / 2)
    x_max = int(x_center_pixel + width_pixel / 2)
    y_max = int(y_center_pixel + height_pixel / 2)
    
    return x_min, y_min, x_max, y_max

# Open the file and read the bounding box coordinates
bounding_boxes_file = 'datasets/dataset9/labels/train/02483012_CAD_D3-005.024-0001_00_EN_-00001.txt'

# Load your image
image_path = 'datasets/dataset9/images/train/02483012_CAD_D3-005.024-0001_00_EN_-00001.png'
image = cv2.imread(image_path)
img_height, img_width = image.shape[:2]

dimensions = []
dimCount = 1

with open(bounding_boxes_file, 'r') as file:
    for line in file:
        parts = line.strip().split()
        class_id, x_center, y_center, width, height = map(float, parts)
        
        # Convert normalized coordinates to pixel coordinates
        x_min, y_min, x_max, y_max = normalized_to_pixel_coords(
            x_center, y_center, width, height, img_width, img_height
        )
        
        # Crop the region of interest (ROI) from the image
        roi = image[y_min:y_max, x_min:x_max]

        # Resize image 
        resized_image = cv2.resize(roi, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
        # print("Image shape:", resized_image.shape)

        #do thresholding
        gray = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)  # Convert to grayscale
        _, thresholded_image = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

        # encode image
        encoded_img = encode_image(thresholded_image)

        # print the value that openai reads
        dim = readNumber(encoded_img)
        
        try:
        # Try to convert the result to a float
            number = float(dim)
            # print(number)
            dimensions.append(number)  # Append to the array if successful
        except ValueError:
        # Handle the case where dim is not a number (e.g., it's a string)
            # print(f"Value returned is not a number: {dim}")
            # dimensions.append(f"{label_gen}_n/a")

        # rotating the image

            rotatedImg = cv2.rotate(thresholded_image, cv2.ROTATE_90_CLOCKWISE)
            encoded_img_rot = encode_image(rotatedImg)

            dim_rotate = readNumber(encoded_img_rot)
            dimensions.append(dim_rotate)

        # Draw the bounding box on the image
        color = (0, 255, 0)  # Green color for the bounding box
        thickness = 2       # Thickness of the bounding box line
        cv2.rectangle(image, (x_min, y_min), (x_max, y_max), color, thickness)

        # Define label position (slightly above the top-left corner of the bounding box)
        label_position_gen = (x_min, y_min - 10) if y_min - 10 > 10 else (x_min, y_min + 20)
        label_gen = f'{dimCount}'
        # print(label_gen)
        dimCount +=1

        # Add label to the image
        cv2.putText(image, label_gen, label_position_gen, cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

# Save or display the result
cv2.imwrite('image_with_boxes.jpg', image)
print(dimensions)
# cv2.imshow('Image with Bounding Boxes', image)
# cv2.waitKey(0)
# cv2.destroyAllWindows()


['2', 7.0, '45', '25', 1.5, '2', 25.0, 6698.0, '5', 35.0, 0.3, 7.0, 5.0, 45.48, 0.0, '56249', '555', '530', '480', 430.0, 9.5, '2', '260', '210', 101.0, '25', '0', 0.0, 12.0, 47.98, 48.98, 66.98, 'No number.', 'No number.']


put into excel

In [21]:
from openpyxl import load_workbook

# Load the existing Excel file and select the worksheet
file_path = "template.xlsx"
wb = load_workbook(filename=file_path)
ws = wb.active

print(dimensions)
# Arrays to populate
ids = list(range(1, len(dimensions) + 1))

# Define the starting row and columns
start_row = 8  # Starting from row 5
id_col =1     # Column B for 'ID'
value_col = 2  # Column C for 'Value'

# Populate the columns with a blank row in between
for i, (id_val, value) in enumerate(zip(ids, dimensions)):
    current_row = start_row + i * 2  # Increment by 2 for each entry
    print(f"{current_row}")
    
    ws.cell(row=current_row, column=id_col, value=id_val)   # Write ID in column B
    
    ws.cell(row=current_row, column=value_col, value=value) # Write Value in column C

# Save the workbook
wb.save(file_path)



8
10
12
14
16
18
20
22
24
26
28
30
32
34
36
38
40
42
44
46
48
50
52
54
56
58
60
62
64
66
68
70
72
74
