## import used libraries

In [1]:
import cv2
import os
import json
import numpy as np
import random

## Handy function for showing images

In [2]:
def show(file, img):
    cv2.imshow(file, img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()


## Get images and initialze result with empty array

In [240]:
path = "./data/train" # path for the images in the data
expected = "./data.json"
margin_error = 10 # error when comparing bounding boxes
files = [f for f in os.listdir(path) if f.endswith(".png")] # filter out only .png files in the given path
cnt = len(files) - 1 # Run the algorithm on all the data, change this when testing


## Function to compute the iou between two bounding boxes

In [234]:
def compute_iou(box1, box2):
    # Compute the coordinates of the intersection rectangle
    left = max(box1["left"], box2["left"])
    top = max(box1["top"], box2["top"])
    right = min(box1["left"] + box1["width"], box2["left"] + box2["width"])
    bottom = min(box1["top"] + box1["height"], box2["top"] + box2["height"])

    # If the intersection is empty, return 0
    if right <= left or bottom <= top:
        return 0.0

    # Compute the area of the intersection and union rectangles
    intersection = (right - left) * (bottom - top)
    union = box1["width"] * box1["height"] + box2["width"] * box2["height"] - intersection

    # Compute the IoU
    iou = intersection / union

    return iou

## Function to perform NMS on an array of boxes

In [235]:
def non_max_suppression(boxes, iou_threshold=0.4):
    # Sort boxes by their confidence score (e.g., area)
    boxes = sorted(boxes, key=lambda x: x["width"] * x["height"], reverse=True)

    # Initialize a list of non-overlapping boxes
    picked_boxes = []

    # Loop over boxes
    while boxes:
        # Pick the box with the highest confidence score
        picked_box = boxes.pop(0)
        picked_boxes.append(picked_box)

        # Compute the IoU between the picked box and all other boxes
        iou_scores = [compute_iou(picked_box, box) for box in boxes]

        # Remove all boxes with an IoU greater than the threshold
        boxes = [boxes[i] for i in range(len(boxes)) if iou_scores[i] < iou_threshold]

    return picked_boxes

## Detect digits using k-means clusetring

In [236]:
def k_means_clustering(img, boxes):
    # Reshape the image to a 2D array of pixels and 3 color values (RGB)
    pixel_values = img.reshape((-1, 3))
    # Convert to float type
    pixel_values = np.float32(pixel_values)

    # Define the criteria for stopping the algorithm
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 0.2)

    # Number of clusters (K)
    k = 3
    # Perform k-means clustering
    _, labels, centers = cv2.kmeans(pixel_values, k, None, criteria, 100, cv2.KMEANS_RANDOM_CENTERS)

    # Convert back to 8 bit values
    centers = np.uint8(centers)

    # Flatten the labels array
    labels = labels.flatten()

    # Convert all pixels to the color of the centroids
    segmented_image = centers[labels]

    # Reshape back to the original image dimension
    segmented_image = segmented_image.reshape(img.shape)
    
    # Find the contours of the segments
    gray = cv2.cvtColor(segmented_image, cv2.COLOR_BGR2GRAY)

    # Sharpen the image using the unsharp masking technique
    blurred = cv2.GaussianBlur(gray, (0, 0), 3)
    sharpened = cv2.addWeighted(gray, 1.5, blurred, -0.5, 0)
            
    thresh = cv2.adaptiveThreshold(sharpened, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
    inverted = 255 - thresh
        
    # Dilate to connect nearby contours
    kernel = np.ones((1,1),np.uint8)
    dilated = cv2.dilate(thresh, kernel, iterations=1)

    # Erode to separate connected contours
    eroded = cv2.erode(dilated, kernel, iterations=1)
       
    contours, hierarchy = cv2.findContours(eroded, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    contours_inv, _ = cv2.findContours(inverted, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    
    contours = contours + contours_inv
    min_contour_area = 50
    contours = [c for c in contours if cv2.contourArea(c) > min_contour_area]
    
    # Define minimum and maximum aspect ratio for digits
    min_aspect_ratio = 0.2
    max_aspect_ratio = 2.5
    
    # Calculate image dimensions
    img_height, img_width = img.shape[:2]


    # Draw the bounding rectangle of each contour on the original image
    for cnt in contours:
        left,top,width,height = cv2.boundingRect(cnt)
        
        aspect_ratio = width / float(height)

        # Filter out regions with aspect ratio outside the desired range
        if aspect_ratio < min_aspect_ratio or aspect_ratio > max_aspect_ratio:
            continue

            
        std_dev_threshold = 15
        std_dev = cv2.meanStdDev(
            gray[top:top+height, left:left+width])[1][0][0]
        if std_dev < std_dev_threshold:
            continue
            
        # Define minimum and maximum dimensions for digits
        minWidth = int(0.02 * img_width)
        minHeight = int(0.02 * img_height)
        maxWidth = int(0.5 * img_width)
        maxHeight = int(0.95 * img_height)
        
        # filter out too small or too big objects that are not likely to contain digits
        if width < minWidth or height < minHeight or width > maxWidth or height > maxHeight:
            continue
                       
        # Append bounding box to list
        boxes.append({
            "label": 0,
            "left": left,
            "top": top,
            "width": width,
            "height": height
        })        
        
    # Append results for current file to overall results
    results.append({
        "filename": file[len(path)+1:],
        "boxes": boxes
    })



## Loop on dataset images to test the algorithms

Note: you can uncomment the show(file, img) command to see the ouput image of the algorithm but before doing that consider changing the cnt because it will perform the algorithm on the whole dataset which takes alot of time so for testing using a cnt of 10 is sufficient to see a sample output of the algorithm instead of the whole dataset

In [237]:
results = []
with open(expected) as f:
    data = json.load(f)


for i in range(cnt):
    file = f'{path}/{files[i]}'
    # Read the image
    img = cv2.imread(file)
    boxes = []    
    k_means_clustering(img, boxes)
    filtered_boxes = non_max_suppression(boxes)
    for box in boxes:
        cv2.rectangle(img,(box["left"], box["top"]),(box["left"] + box["width"], box["top"] + box["height"]),(0,0,255),1)
        
#     show(file, img)



        
# Save the data
with open("results.json", "w") as f:
    json.dump(results, f)

# Close cv2 windows if any are open
cv2.destroyAllWindows()



## Test algorithm accuracy

In [238]:
def test(file, i, correct, actual):
    expected = data[int(file[:-4]) - 1]
    res = results[i]
    actual[0] += len(expected["boxes"])
    # loop over all the boxes in the expected data and see if any matches
    # the boxes generated by the algorithm
    for exp in expected["boxes"]:
        seen = False
        for box in res["boxes"]:
            if abs(box["left"] - exp["left"]) <= margin_error \
            and abs(box["top"] - exp["top"]) <= margin_error \
            and abs(box["width"] - exp["width"]) <= margin_error \
            and abs(box["height"] - exp["height"]) <= margin_error:
                seen = True
        correct[0] += seen


            
correct_boxes = [0]
actual_boxes = [0]

for i in range(cnt):
    file = files[i]
    test(file, i, correct_boxes, actual_boxes)
        
print(f"Algorithm accuracy: {round(((correct_boxes[0]/actual_boxes[0]) * 100), 2)}%")
    

Algorithm accuracy: 59.92%
