## import used libraries

In [1]:
import cv2
import os
import json
import numpy as np
import random

## Handy function for showing images

In [2]:
def show(file, img):
    cv2.imshow(file, img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()


## Get images and initialze result with empty array

In [50]:
path = "./data/train"
expected = "./data/data.json"
margin_error = 10
files = [f for f in os.listdir(path) if f.endswith(".png")]
cnt = 2000


## Define the Algorithm to detect digits using contours

In [28]:
def detect_digits(file, boxes):
    # Load the image and convert to grayscale
    img = cv2.imread(file)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # Increase image contrast using histogram equalization
    gray = cv2.equalizeHist(gray)

    # Sharpen the image using the unsharp masking technique
    blurred = cv2.GaussianBlur(gray, (0, 0), 3)
    sharpened = cv2.addWeighted(gray, 1.5, blurred, -0.5, 0)
    
    # Apply adaptive thresholding to the image
    binary = cv2.adaptiveThreshold(
        sharpened, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 11, 2)
    inverted = 255 - binary
    
    # Apply Canny edge detection to the binary image
    edges = cv2.Canny(binary, 100, 200)

    # Find contours of the digits
    contours, _ = cv2.findContours(
        binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contours_inv, _ = cv2.findContours(
        inverted, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
       # Filter out small contours
    min_contour_area = 50
    contours = [c for c in contours if cv2.contourArea(
        c) > min_contour_area] + [c for c in contours_inv if cv2.contourArea(c) > min_contour_area]
    
        # Define minimum and maximum aspect ratio for digits
    min_aspect_ratio = 0.2
    max_aspect_ratio = 2.5
     # Draw bounding boxes around the regions that are likely to contain digits
    for contour in contours:
        left, top, width, height = cv2.boundingRect(contour)

        aspect_ratio = width / float(height)

        # Filter out regions with aspect ratio outside the desired range
        if aspect_ratio < min_aspect_ratio or aspect_ratio > max_aspect_ratio:
            continue

        std_dev_threshold = 4
        std_dev = cv2.meanStdDev(
            gray[top:top+height, left:left+width])[1][0][0]
        if std_dev < std_dev_threshold:
            continue
            

        # Calculate image dimensions
        img_height, img_width = img.shape[:2]
        
        # The digits is likely to be in the middle of the image so filter out contours on the far edges
        if (left < int(0.1 * img_width)) or ((left + width) > int(0.9 * img_width)):
            continue

        # Define minimum and maximum dimensions for digits
        minWidth = int(0.04 * img_width)
        minHeight = int(0.04 * img_height)
        maxWidth = int(0.27 * img_width)
        maxHeight = int(0.95 * img_height)
        

        if width < minWidth or height < minHeight or width > maxWidth or height > maxHeight:
            continue
            
        # Append bounding box to list
        boxes.append({
            "label": 0,
            "left": left,
            "top": top,
            "width": width,
            "height": height
        })

            
        # Draw bounding boxes around the regions that are likely to contain digits
        cv2.rectangle(img, (left, top), (left + width,
                      top + height), (0, 0, 255), 1)

    # Display the image with bounding boxes around the digits
#     show(file, img)

    # Append results for current file to overall results
    results.append({
        "filename": file[len(path)+1:],
        "boxes": boxes
    })

    

## Define the algorithm to detect digits using k-means clusetring

In [51]:
def k_means_clustering(file, boxes):
    # Read the image
    img = cv2.imread(file)

    # Reshape the image to a 2D array of pixels and 3 color values (RGB)
    pixel_values = img.reshape((-1, 3))
    # Convert to float type
    pixel_values = np.float32(pixel_values)

    # Define the criteria for stopping the algorithm
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 0.2)

    # Number of clusters (K)
    k = 3
    # Perform k-means clustering
    _, labels, centers = cv2.kmeans(pixel_values, k, None, criteria, 100, cv2.KMEANS_RANDOM_CENTERS)

    # Convert back to 8 bit values
    centers = np.uint8(centers)

    # Flatten the labels array
    labels = labels.flatten()

    # Convert all pixels to the color of the centroids
    segmented_image = centers[labels]

    # Reshape back to the original image dimension
    segmented_image = segmented_image.reshape(img.shape)
    
    # Find the contours of the segments
    gray = cv2.cvtColor(segmented_image, cv2.COLOR_BGR2GRAY)
    
    equ = cv2.equalizeHist(gray)


    # Sharpen the image using the unsharp masking technique
    blurred = cv2.GaussianBlur(gray, (0, 0), 3)
    sharpened = cv2.addWeighted(gray, 1.5, blurred, -0.5, 0)
        

#     ret, thresh = cv2.threshold(sharpened, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
    thresh = cv2.adaptiveThreshold(sharpened, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
#     show(file, thresh)
    inverted = 255 - thresh
    
    
    # Dilate to connect nearby contours
    kernel = np.ones((1,1),np.uint8)
    dilated = cv2.dilate(thresh, kernel, iterations=1)

    # Erode to separate connected contours
    eroded = cv2.erode(dilated, kernel, iterations=1)
    
#     show(file, eroded)
   
    contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    contours_inv, _ = cv2.findContours(inverted, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    
#     contours = contours + contours_inv
    
    min_contour_area = 50
#     contours = [c for c in contours if cv2.contourArea(
#         c) > min_contour_area] + [c for c in contours_inv if cv2.contourArea(c) > min_contour_area]
    
    # Define minimum and maximum aspect ratio for digits
    min_aspect_ratio = 0.2
    max_aspect_ratio = 2.5


    # Draw the bounding rectangle of each contour on the original image
    for cnt in contours:
        left,top,width,height = cv2.boundingRect(cnt)
        
        aspect_ratio = width / float(height)

        # Filter out regions with aspect ratio outside the desired range
        if aspect_ratio < min_aspect_ratio or aspect_ratio > max_aspect_ratio:
            continue

            
        std_dev_threshold = 15
        std_dev = cv2.meanStdDev(
            gray[top:top+height, left:left+width])[1][0][0]
        if std_dev < std_dev_threshold:
            continue
        # Calculate image dimensions
        img_height, img_width = img.shape[:2]

        # Define minimum and maximum dimensions for digits
        minWidth = int(0.04 * img_width)
        minHeight = int(0.04 * img_height)
        maxWidth = int(0.5 * img_width)
        maxHeight = int(0.95 * img_height)
        

        if width < minWidth or height < minHeight or width > maxWidth or height > maxHeight:
            continue
                       
        # Append bounding box to list
        boxes.append({
            "label": 0,
            "left": left,
            "top": top,
            "width": width,
            "height": height
        })

        cv2.rectangle(img,(left, top),(left + width, top + height),(0,0,255),1)
        
        
    # Append results for current file to overall results
    results.append({
        "filename": file[len(path)+1:],
        "boxes": boxes
    })


    # Show the image with bounding rectangles
#     show(file, img)



## Loop on some files to test the algorithms

In [52]:
results = []
rand_files = []
random.shuffle(files)
with open(expected) as f:
    data = json.load(f)


for i in range(cnt):
#     r = random.randint(1, 33402)
    file = f'{path}/{files[i]}'
#     rand_files.append(r)
    boxes = []    
    k_means_clustering(file, boxes)
#     detect_digits(file, boxes)
    
# Display the image with bounding boxes around the digits
with open("results.json", "w") as f:
    json.dump(results, f)

cv2.destroyAllWindows()



## Test algorithm accuracy

In [49]:
def test(file, i, correct, actual):
    expected = data[int(file[:-4]) - 1]
    res = results[i]
    actual[0] += len(expected["boxes"])
    

    for exp in expected["boxes"]:
        seen = False
        for box in res["boxes"]:
            if abs(box["left"] - exp["left"]) <= margin_error \
            and abs(box["top"] - exp["top"]) <= margin_error \
            and abs(box["width"] - exp["width"]) <= margin_error \
            and abs(box["height"] - exp["height"]) <= margin_error:
                seen = True
        correct[0] += seen


            
correct_boxes = [0]
actual_boxes = [0]

for i in range(cnt):
    file = files[i]
    test(file, i, correct_boxes, actual_boxes)

print(correct_boxes[0], actual_boxes[0])
        
print(f"Algorithm accuracy: {(correct_boxes[0]/actual_boxes[0]) * 100}%")
    

586 1078
Algorithm accuracy: 54.35992578849722%
