## import used libraries

In [3]:
import cv2
import os
import json
import numpy as np
import random

## Handy function for showing images

In [4]:
def show(file, img):
    cv2.imshow(file, img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()


## Define some important global variables including the image files

In [5]:
path = os.path.join(".", "data", "train")
expected = os.path.join(".", "data.json")
margin_error = 10 # error when comparing bounding boxes
files = [f for f in os.listdir(path) if f.endswith(".png")] # filter out only .png files in the given path
cnt = len(files) - 1 # Run the algorithm on all the data, change this when testing


## Function to compute the iou between two bounding boxes

In [4]:
def compute_iou(box1, box2):
    # Compute the coordinates of the intersection rectangle
    left = max(box1["left"], box2["left"])
    top = max(box1["top"], box2["top"])
    right = min(box1["left"] + box1["width"], box2["left"] + box2["width"])
    bottom = min(box1["top"] + box1["height"], box2["top"] + box2["height"])

    # If the intersection is empty, return 0
    if right <= left or bottom <= top:
        return 0.0

    # Compute the area of the intersection and union rectangles
    intersection = (right - left) * (bottom - top)
    union = box1["width"] * box1["height"] + box2["width"] * box2["height"] - intersection

    # Compute the IoU
    iou = intersection / union

    return iou

## Function to perform NMS on an array of boxes

In [5]:
def non_max_suppression(boxes, iou_threshold=0.4):
    # Sort boxes by their confidence score (e.g., area)
    boxes = sorted(boxes, key=lambda x: x["width"] * x["height"], reverse=True)

    # Initialize a list of non-overlapping boxes
    picked_boxes = []

    # Loop over boxes
    while boxes:
        # Pick the box with the highest confidence score
        picked_box = boxes.pop(0)
        picked_boxes.append(picked_box)

        # Compute the IoU between the picked box and all other boxes
        iou_scores = [compute_iou(picked_box, box) for box in boxes]

        # Remove all boxes with an IoU greater than the threshold
        boxes = [boxes[i] for i in range(len(boxes)) if iou_scores[i] < iou_threshold]

    return picked_boxes

## Detect digits using k-means clusetring

In [6]:
def k_means_clustering(img, boxes):
    # Reshape the image to a 2D array of pixels and 3 color values (RGB)
    pixel_values = img.reshape((-1, 3))
    # Convert to float type
    pixel_values = np.float32(pixel_values)

    # Define the criteria for stopping the algorithm
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 0.2)

    # Number of clusters (K)
    k = 3
    # Perform k-means clustering
    _, labels, centers = cv2.kmeans(pixel_values, k, None, criteria, 100, cv2.KMEANS_RANDOM_CENTERS)

    # Convert back to 8 bit values
    centers = np.uint8(centers)

    # Flatten the labels array
    labels = labels.flatten()

    # Convert all pixels to the color of the centroids
    segmented_image = centers[labels]

    # Reshape back to the original image dimension
    segmented_image = segmented_image.reshape(img.shape)
    
    # Find the contours of the segments
    gray = cv2.cvtColor(segmented_image, cv2.COLOR_BGR2GRAY)

    # Sharpen the image using the unsharp masking technique
    blurred = cv2.GaussianBlur(gray, (0, 0), 3)
    sharpened = cv2.addWeighted(gray, 1.5, blurred, -0.5, 0)
            
    thresh = cv2.adaptiveThreshold(sharpened, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
    inverted = 255 - thresh
        
    # Dilate to connect nearby contours
    kernel = np.ones((1,1),np.uint8)
    dilated = cv2.dilate(thresh, kernel, iterations=1)

    # Erode to separate connected contours
    eroded = cv2.erode(dilated, kernel, iterations=1)
       
    contours, hierarchy = cv2.findContours(eroded, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    contours_inv, _ = cv2.findContours(inverted, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    
    contours = contours + contours_inv
    min_contour_area = 50
    contours = [c for c in contours if cv2.contourArea(c) > min_contour_area]
    
    # Define minimum and maximum aspect ratio for digits
    min_aspect_ratio = 0.2
    max_aspect_ratio = 2.5
    
    # Calculate image dimensions
    img_height, img_width = img.shape[:2]


    # Draw the bounding rectangle of each contour on the original image
    for cnt in contours:
        left,top,width,height = cv2.boundingRect(cnt)
        
        aspect_ratio = width / float(height)

        # Filter out regions with aspect ratio outside the desired range
        if aspect_ratio < min_aspect_ratio or aspect_ratio > max_aspect_ratio:
            continue

            
        std_dev_threshold = 15
        std_dev = cv2.meanStdDev(
            gray[top:top+height, left:left+width])[1][0][0]
        if std_dev < std_dev_threshold:
            continue
            
        # Define minimum and maximum dimensions for digits
        minWidth = int(0.02 * img_width)
        minHeight = int(0.02 * img_height)
        maxWidth = int(0.5 * img_width)
        maxHeight = int(0.95 * img_height)
        
        # filter out too small or too big objects that are not likely to contain digits
        if width < minWidth or height < minHeight or width > maxWidth or height > maxHeight:
            continue
                       
        # Append bounding box to list
        boxes.append({
            "label": 0,
            "left": left,
            "top": top,
            "width": width,
            "height": height
        })        
        
    # Append results for current file to overall results
    results.append({
        "filename": file[len(path)+1:],
        "boxes": boxes
    })



## Loop on dataset images to test the algorithms

Note: you can uncomment the show(file, img) command to see the ouput image of the algorithm but before doing that consider changing the cnt because it will perform the algorithm on the whole dataset which takes alot of time so for testing using a cnt of 10 is sufficient to see a sample output of the algorithm instead of the whole dataset

In [237]:
results = []
with open(expected) as f:
    data = json.load(f)


for i in range(cnt):
    file = f'{path}/{files[i]}'
    # Read the image
    img = cv2.imread(file)
    boxes = []    
    k_means_clustering(img, boxes)
    filtered_boxes = non_max_suppression(boxes)
    for box in boxes:
        cv2.rectangle(img,(box["left"], box["top"]),(box["left"] + box["width"], box["top"] + box["height"]),(0,0,255),1)
        
#     show(file, img)



        
# Save the data
with open("results.json", "w") as f:
    json.dump(results, f)

# Close cv2 windows if any are open
cv2.destroyAllWindows()



## Test algorithm accuracy

In [238]:
def test(file, i, correct, actual):
    expected = data[int(file[:-4]) - 1]
    res = results[i]
    actual[0] += len(expected["boxes"])
    # loop over all the boxes in the expected data and see if any matches
    # the boxes generated by the algorithm
    for exp in expected["boxes"]:
        seen = False
        for box in res["boxes"]:
            if abs(box["left"] - exp["left"]) <= margin_error \
            and abs(box["top"] - exp["top"]) <= margin_error \
            and abs(box["width"] - exp["width"]) <= margin_error \
            and abs(box["height"] - exp["height"]) <= margin_error:
                seen = True
        correct[0] += seen


            
correct_boxes = [0]
actual_boxes = [0]

for i in range(cnt):
    file = files[i]
    test(file, i, correct_boxes, actual_boxes)
        
print(f"Algorithm accuracy: {round(((correct_boxes[0]/actual_boxes[0]) * 100), 2)}%")
    

Algorithm accuracy: 59.92%


# Phase 2
---

## Labeling digits using hit or miss

In [230]:
template_paths = ['HighwayGothic_{}.png', 'Clearview_{}.png',
                  'Frutiger_{}.png', 'Eurostile_{}.png']

with open(expected) as f:
    data = json.load(f)

def label_digits(img_name):
    # Get the index corresponding to the image name
    img_index = int(img_name.split('.')[0]) - 1

    # Get the boxes for the image
    boxes = data[img_index]['boxes']
    
    file = f"{path}/{img_name}"
    
    print(img_index, file)
    # Read the image
    img = cv2.imread(file)

    
    g_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    orb = cv2.ORB_create()

    sift = cv2.xfeatures2d.SIFT_create()



    # Iterate over each box in the image
    for box in boxes:
        left = int(box['left'])
        top = int(box['top'])
        width = int(box['width'])
        height = int(box['height'])

        # Crop the image based on the box coordinates
        digit_img = g_img[top:top + height, left:left + width]
                
        # Resize the digit image to match the template size (32x32)
        resized_digit_img = cv2.resize(digit_img, (32, 32))
            
        # Perform feature extraction using ORB
        kp1, des1 = orb.detectAndCompute(resized_digit_img, None)
        
        # Convert digit image to binary
        _, binary_img = cv2.threshold(resized_digit_img, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
#         show(file, binary_img)
        
        # Perform feature matching with the templates
        max_match_count = 0
        best_template_index = 0


#         # Step 3: Detect and compute the keypoints and descriptors for both images
        keypoints1, descriptors1 = sift.detectAndCompute(binary_img, None)

        
        # Match the resized digit image with the templates
        for template_path in template_paths:
            for label in range(10):
                new_path = f"./templates/{template_path.format(label)}"
                template_img = cv2.imread(new_path, cv2.IMREAD_GRAYSCALE)  # Load as grayscale


               # Perform feature extraction using ORB
                kp2, des2 = orb.detectAndCompute(template_img, None)

                # Create a brute-force matcher
                bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)

                # Perform feature matching
                matches = bf.match(des1, des2)

                # Calculate the match count
                match_count = len(matches)
                
                print(match_count)

                # Check if the current template has a higher match count
                if match_count > max_match_count:
                    max_match_count = match_count
                    best_template_index = template_index




            # If a match is found, do something with the result
            if max_match_count > 0:
                # For example, you can draw a bounding box around the digit
                label_text = str(label)
                label_size, _ = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)
                cv2.putText(img, label_text, (left, top + height + label_size[1] + 5),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 1, cv2.LINE_AA)

    # Display the image with bounding boxes
    cv2.imshow("Labeled Image", img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    

In [231]:
for i in range(10):
    img_name = f"{i+1}.png"
    label_digits(img_name)
    print("------------------------")

0 ./data/train/1.png
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
------------------------
1 ./data/train/2.png
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
------------------------
2 ./data/train/3.png
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
------------------------
3 ./data/train/4.png
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
------------------------
4 ./data/train/5.png
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

In [47]:
import cv2
import json
import numpy as np

# Load the JSON file and templates
with open('data.json') as file:
    data = json.load(file)

template_paths = ['HighwayGothic_{}.png', 'Clearview_{}.png',
                  'Frutiger_{}.png', 'Eurostile_{}.png', "SourceSansPro-Bold_{}.png", 
                   "SourceSansPro-Italic_{}.png", "Roboto-Bold_{}.png", "Roboto-Italic_{}.png"]


def label_digits_hom(img_name, total_digits, detected_digits):    
    # Get the index corresponding to the image name
    img_index = int(img_name.split('.')[0]) - 1

    # Get the boxes for the image
    boxes = data[img_index]['boxes']

    file = f"{path}/{img_name}"
        
    # Read the image
    img = cv2.imread(file)
    
    g_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    total_digits[0] += len(boxes)    
    


    # Iterate over each box in the image
    for box in boxes:
        data_label = int(box['label'])
        left = int(box['left'])
        top = int(box['top'])
        width = int(box['width'])
        height = int(box['height'])
        
                # Validate the ROI coordinates
        if left < 0 or top < 0 or left + width > img.shape[1] or top + height > img.shape[0]:
            print(f"Invalid ROI coordinates for box: {box}")
            continue

        # Crop the image based on the box coordinates
        digit_img = g_img[top:top + height, left:left + width]
        
                # Resize the digit image to match the template size (32x32)
        if digit_img.shape[0] == 0 or digit_img.shape[1] == 0:
            print(f"Failed to crop digit image for box: {box}")
            continue
        

        # Resize the digit image to match the template size (32x32)
        resized_digit_img = cv2.resize(digit_img, (32, 32))
        
        
        label_results = [0] * 10
        


        # Match the resized digit image with the templates
        for template_path in template_paths:
            for label in range(10):

                new_path = f"./templates/{template_path.format(label)}"
                template_img = cv2.imread(new_path, cv2.IMREAD_GRAYSCALE)  # Load as grayscale


                # Perform template matching
                result = cv2.matchTemplate(resized_digit_img, template_img, cv2.TM_CCOEFF_NORMED)
                
                label_results[label] += result
                
                threshold = 0.2  # Adjust as needed

                locations = np.where(result >= threshold)
            
        best_label = label_results.index(max(label_results))
        detected_digits[0] += best_label == data_label

        cv2.rectangle(img,(left, top),(left + width, top + height),(255,0,0),1)
            
        label_text = str(best_label)
        label_size, _ = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)

        # Calculate the position of the label text
        label_x = left + - label_size[0] - 5  # Position to the right of the bounding box
        label_y = top + label_size[1] + 5

        # Calculate the middle point of the bounding box
        box_center_x = left + width // 2
        box_center_y = top + height // 2



        # Draw the label text on the image
        cv2.putText(img, label_text, (label_x, label_y),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2, cv2.LINE_AA)

        # Draw a line from the middle of the bounding box to the label text
        cv2.line(img, (box_center_x, box_center_y), (label_x, label_y), (0, 255, 0), 1)
    # Display the image with bounding boxes
#     cv2.imshow("Labeled Image", img)
#     cv2.waitKey(0)
#     cv2.destroyAllWindows()



In [None]:
total_digits = [0]
detected_digits = [0]
for i in range(len(data)):
    img_name = f"{i+1}.png"
    label_digits_hom(img_name, total_digits, detected_digits)
    
print("Final Accuracy: ", (detected_digits[0] / total_digits[0]) * 100)

#     print("------------------------")

Invalid ROI coordinates for box: {'height': 36.0, 'label': 8.0, 'left': 32.0, 'top': 15.0, 'width': 16.0}
Invalid ROI coordinates for box: {'height': 34.0, 'label': 3.0, 'left': -1.0, 'top': 26.0, 'width': 16.0}
Invalid ROI coordinates for box: {'height': 49.0, 'label': 1.0, 'left': 51.0, 'top': 6.0, 'width': 21.0}
Invalid ROI coordinates for box: {'height': 31.0, 'label': 1.0, 'left': 38.0, 'top': 3.0, 'width': 13.0}
Invalid ROI coordinates for box: {'height': 31.0, 'label': 3.0, 'left': 2.0, 'top': 32.0, 'width': 23.0}
Invalid ROI coordinates for box: {'height': 33.0, 'label': 5.0, 'left': 24.0, 'top': 5.0, 'width': 17.0}
Invalid ROI coordinates for box: {'height': 108.0, 'label': 2.0, 'left': -1.0, 'top': 57.0, 'width': 44.0}
Invalid ROI coordinates for box: {'height': 21.0, 'label': 1.0, 'left': 21.0, 'top': 6.0, 'width': 3.0}
Invalid ROI coordinates for box: {'height': 56.0, 'label': 2.0, 'left': 62.0, 'top': 34.0, 'width': 29.0}
Invalid ROI coordinates for box: {'height': 17.0, '

Invalid ROI coordinates for box: {'height': 59.0, 'label': 6.0, 'left': 86.0, 'top': 7.0, 'width': 24.0}
Invalid ROI coordinates for box: {'height': 15.0, 'label': 2.0, 'left': 22.0, 'top': 5.0, 'width': 7.0}
Invalid ROI coordinates for box: {'height': 35.0, 'label': 3.0, 'left': 43.0, 'top': 4.0, 'width': 22.0}
Invalid ROI coordinates for box: {'height': 32.0, 'label': 1.0, 'left': 23.0, 'top': 3.0, 'width': 16.0}
Invalid ROI coordinates for box: {'height': 43.0, 'label': 2.0, 'left': 52.0, 'top': 66.0, 'width': 42.0}
Invalid ROI coordinates for box: {'height': 48.0, 'label': 7.0, 'left': 40.0, 'top': 46.0, 'width': 39.0}
Invalid ROI coordinates for box: {'height': 17.0, 'label': 3.0, 'left': -1.0, 'top': 4.0, 'width': 8.0}
Invalid ROI coordinates for box: {'height': 40.0, 'label': 4.0, 'left': 40.0, 'top': 17.0, 'width': 21.0}
Invalid ROI coordinates for box: {'height': 42.0, 'label': 8.0, 'left': 29.0, 'top': 5.0, 'width': 29.0}
Invalid ROI coordinates for box: {'height': 16.0, 'lab

hello


In [None]:
#20.455383103321186