In [1]:
import cv2
import numpy as np
import os
import cv2
from ultralytics import YOLO

In [2]:
def load_images(directory):
    images = []
    for filename in os.listdir(directory):
        if filename.endswith(".png") or filename.endswith(".jpg"):
            img = cv2.imread(os.path.join(directory, filename))
            if img is not None:
                images.append(img)
    return images

def load_queries(directory):
    queries = []
    for filename in os.listdir(directory):
        if filename.endswith(".txt"):
            with open(os.path.join(directory, filename), 'r') as file:
                # Read all lines and strip newline characters
                query = [int(line.strip()) for line in file]
            queries.append((query[0],query[1:]))
    return queries

def load_results(directory):
    queries = []
    for filename in os.listdir(directory):
        if filename.endswith("t.txt"):
            with open(os.path.join(directory, filename), 'r') as file:
                # Read all lines and strip newline characters
                query = [line.strip() for line in file]
            queries.append(" | ".join(query[1:]))
    return queries

def load_lane_pins(directory):
    pins_list = []
    for filename in os.listdir(directory):
        if filename.endswith(".txt"):
            with open(os.path.join(directory, filename), 'r') as file:
                pins = [line.strip().split(',') for line in file]
                pins = [tuple([int(num) for num in sublist]) for sublist in pins]
            pins_list.append(pins)
    return pins_list

def display_image(image):
    cv2.imshow('Image', image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

In [3]:
def predict(chosen_model, img, classes=[], conf=0.5):
    if classes:
        results = chosen_model.predict(img, classes=classes, conf=conf)
    else:
        results = chosen_model.predict(img, conf=conf)

    return results

def predict_and_detect(chosen_model, img, classes=[], conf=0.5):
    results = predict(chosen_model, img, classes, conf=conf)

    for result in results:
        for box in result.boxes:
            cv2.rectangle(img, (int(box.xyxy[0][0]), int(box.xyxy[0][1])),
                          (int(box.xyxy[0][2]), int(box.xyxy[0][3])), (255, 0, 0), 2)
            cv2.putText(img, f"{result.names[int(box.cls[0])]}",
                        (int(box.xyxy[0][0]), int(box.xyxy[0][1]) - 10),
                        cv2.FONT_HERSHEY_PLAIN, 1, (255, 0, 0), 1)
    return img, results

def get_sorted_pins_position(model, image, classes=[], conf=0.5, verbose=0):
    pin_pos = []
    if verbose == 1:
        img,results = predict_and_detect(model, image, classes, conf)
        display_image(img)
    else:
        results = predict(model, image, classes, conf)
    for result in results:
        for box in result.boxes:
            left, top, right, buttom = box.xyxy[0]
            pin_pos.append((int(left), int(top), int(right), int(buttom)))
    return sorted(pin_pos, key=lambda x: (x[3], x[0]), reverse=True)

In [4]:
def template_matching(image, template):
    # Apply template matching
    result = cv2.matchTemplate(image, template, cv2.TM_CCOEFF_NORMED)
    # Get the best match position
    _, max_val, _, max_loc = cv2.minMaxLoc(result)
    return max_val, max_loc

def classify_lane(image, lane1_template, lane2_template, lane3_template, lane4_template):
    max_val1, max_loc1 = template_matching(image, lane1_template)
    max_val2, max_loc2 = template_matching(image, lane2_template)
    max_val3, max_loc3 = template_matching(image, lane3_template)
    max_val4, max_loc4 = template_matching(image, lane4_template)

    print(f"Matching score for lane 1: {max_val1}")
    print(f"Matching score for lane 2: {max_val2}")
    print(f"Matching score for lane 3: {max_val3}")
    print(f"Matching score for lane 4: {max_val4}")

    max_val = max(max_val1, max_val2, max_val3, max_val4)

    # Determine which lane the photo belongs to
    if max_val1 == max_val:
        return 1, max_val1, max_loc1
    elif max_val2 == max_val:
        return 2, max_val2, max_loc2
    elif max_val3 == max_val:
        return 3, max_val3, max_loc3
    elif max_val4 == max_val:
        return 4, max_val4, max_loc4

In [5]:
def rectangle_area(rect):
    """
    Calculate the area of a rectangle.
    
    rect should be a tuple or list in the format (xmin, ymin, xmax, ymax).
    """
    xmin, ymin, xmax, ymax = rect
    return max(0, xmax - xmin) * max(0, ymax - ymin)

def intersection_area(rect1, rect2):
    """
    Calculate the area of intersection between two rectangles.
    
    rect1 and rect2 should be tuples or lists in the format (xmin, ymin, xmax, ymax).
    """
    xmin1, ymin1, xmax1, ymax1 = rect1
    xmin2, ymin2, xmax2, ymax2 = rect2

    # Calculate the coordinates of the intersection rectangle
    ixmin = max(xmin1, xmin2)
    iymin = max(ymin1, ymin2)
    ixmax = min(xmax1, xmax2)
    iymax = min(ymax1, ymax2)

    # Compute the width and height of the intersection rectangle
    iw = max(0, ixmax - ixmin)
    ih = max(0, iymax - iymin)

    # Return the area of the intersection rectangle
    return iw * ih

def find_best_matching_rectangle(target_rect, rectangles):
    """
    Find the rectangle from the list that shares the most area with the target rectangle.
    
    target_rect should be a tuple or list in the format (xmin, ymin, xmax, ymax).
    rectangles should be a list of tuples/lists in the same format.
    """
    max_intersection_area = 0
    best_match = (0,0,0,0)

    for rect in rectangles:
        area = intersection_area(target_rect, rect)
        if area > max_intersection_area:
            max_intersection_area = area
            best_match = rect

    target_area = rectangle_area(target_rect)

    if target_area == 0:
        return best_match, max_intersection_area, 0

    shared_percentage = (max_intersection_area / target_area) * 100

    lambda_buttom = abs(target_rect[3] - best_match[3])
    return best_match, max_intersection_area, shared_percentage, lambda_buttom

In [6]:
def ensure_directory_exists(path):
    if not os.path.exists(path):
        os.makedirs(path)

In [7]:
def calculate_accuracy(predicted_truths, ground_truths):
    sum = 0
    for prediction,truth in zip(predicted_truths, ground_truths):
        if prediction == truth:
            sum += 1
    return sum / len(predicted_truths)

In [24]:
train_images = load_images('train/Task1')
train_queries = load_queries('train/Task1')
lane_images = load_images('train/Task1/full-configuration-templates')
lane_pins = load_lane_pins('train/Task1/full-configuration-templates')

In [9]:
model = YOLO("yolov8x.pt")

In [112]:
for pin in lane_pins[2]:
    cv2.rectangle(lane_images[2], (pin[0], pin[1]),
                          (pin[2], pin[3]), (255, 0, 0), 2)
    display_image(lane_images[2])

In [25]:
i = 1
for image, query in zip(train_images, train_queries):
    #Find from which lane was the image taken from.
    lane, _, _ = classify_lane(image, lane_images[0], lane_images[1], lane_images[2], lane_images[3])

    #Get the full pins positions for the specific lane.
    full_pins_pos = lane_pins[lane-1]

    #Detect the pins for the current image.
    pins_pos = get_sorted_pins_position(model, image, classes=[39,75], conf=0.02, verbose=0)

    detection_list = []
    for input_pin in query[1]:
        #For each original input pi, positon in the lane find the one in the image with the best_match over surface and position.
        input_pin_pos = full_pins_pos[input_pin-1]
        best_match, max_intersection_area, shared_percentage, lambda_buttom = find_best_matching_rectangle(input_pin_pos, pins_pos)
        
        #For a detected pin to be valid it needs to share 30% of the same surface as the original
        #and the offset between buttom should be smaller then 10 pixels.
        if shared_percentage > 27:  #and lambda_buttom < 10:
            detection_list.append(1)
        else:
            detection_list.append(0)

    if i<10:
        file_path = os.path.join(f"{os.getcwd()}\\train\\Task1\\predited-truth", f"0{i}_pt.txt")
    else:
        file_path = os.path.join(f"{os.getcwd()}\\train\\Task1\\predited-truth", f"{i}_pt.txt")

    ensure_directory_exists(os.path.dirname(file_path))

    with open(file_path, 'w') as file:
        file.write(f"{query[0]}\n")
        for q,response in zip(query[1], detection_list):
            file.write(f"{q} {response}\n")

    i+=1



Matching score for lane 1: 0.5485676527023315
Matching score for lane 2: 0.5191406011581421
Matching score for lane 3: 0.6252387166023254
Matching score for lane 4: 0.5294371843338013

0: 384x640 7 bottles, 6 vases, 1867.9ms
Speed: 2.0ms preprocess, 1867.9ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)
Matching score for lane 1: 0.6415997743606567
Matching score for lane 2: 0.5492364168167114
Matching score for lane 3: 0.6739968657493591
Matching score for lane 4: 0.6053139567375183

0: 384x640 1 vase, 1893.2ms
Speed: 1.0ms preprocess, 1893.2ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)
Matching score for lane 1: 0.5710817575454712
Matching score for lane 2: 0.5454027056694031
Matching score for lane 3: 0.5569331645965576
Matching score for lane 4: 0.6997135877609253

0: 384x640 2 bottles, 5 vases, 1890.3ms
Speed: 3.1ms preprocess, 1890.3ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)
Matching score for lane 1: 0.56280285120

In [26]:
predicted_truths = load_results("train/Task1/predited-truth")
ground_truths = load_results("train/Task1/ground-truth")

In [27]:
acc = calculate_accuracy(predicted_truths, ground_truths)

In [28]:
acc

0.8