## Design Overview

This workflow is designed for automated OMR (Optical Mark Recognition) processing using image analysis and statistical methods. The main phases are:

1. **Template Analysis (`detect_circles_template`)**
    - Analyzes a clean template image to find optimal circle detection parameters (`a_t`, `b_t`).
    - Statistically determines the most common bubble radius (`mode_radius`).

2. **Answer Sheet Processing (`detect_circles`)**
    - Uses template-derived parameters as a starting point.
    - Adapts detection with ±20% tolerance for real-world variations.
    - Efficiently locates answer bubbles without exhaustive parameter search.

3. **Pixel Counting**
    - Counts black and white pixels inside each detected bubble for template, student, and key images.

4. **Grouping Bubbles**
    - Groups detected bubbles by rows using clustering (DBSCAN) or tolerance-based methods.

5. **Mark Detection**
    - Compares student and template bubbles to identify marked/unmarked answers using statistical deviation.

6. **Scoring**
    - Compares detected answers with the key and calculates the total score and per-question correctness.

## Steps


### 1. Preprocessing

In [4]:
import cv2
import numpy as np


def preprocess_image(image_path, blur_ksize=(5, 5)):
    """
    Preprocess the input image by converting it to grayscale and applying Gaussian blur and convert it to binary image.
    
    Args:
        image_path (str): Path to the input image.
        blur_ksize (tuple): Kernel size for the Gaussian blur (default is (5, 5)).
        
    Returns:
        gray_blurred (numpy array): Blurred grayscale version of the image.
        original_img (numpy array): Original color image.
        gray (numpy array): Grayscale version of the image.
        binary_img (numpy array): Binary version of the image
    """
    # Read the image.
    original_img = cv2.imread(image_path, cv2.IMREAD_COLOR)

    if original_img is None:
        raise ValueError(f"Image at {image_path} not found.")
    
    # Convert to grayscale.
    gray = cv2.cvtColor(original_img, cv2.COLOR_BGR2GRAY)

    # Apply Gaussian blur.
    gray_blurred = cv2.GaussianBlur(gray, blur_ksize, 0)

    # Convert the original image to a binary image
    _, binary_img = cv2.threshold(gray_blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    return gray_blurred, original_img, gray, binary_img

In [5]:
# Preprocess the images
gray_blurred_s, original_img_s, gray_s, binary_img_s = preprocess_image('./Test/7_S.jpg')
gray_blurred_t, original_img_t, gray_t, binary_img_t = preprocess_image('./Test/7_T.jpg')
gray_blurred_k, original_img_k, gray_k, binary_img_k = preprocess_image('./Test/7_K.jpg')

questions = 40
options=4

### 2. Circle Detection

In [6]:
from collections import Counter
def detect_circles_template(img, gray_blurred, questions, options):
    """
    Detect circles in the preprocessed grayscale image using the Hough Circle Transform.
    
    Args:
        img (numpy array): Original image for visualization.
        gray_blurred (numpy array): Blurred grayscale image.
        num_questions (int): Number of questions in the OMR.
        num_options (int): Number of options per question.
        dp (float): Inverse ratio of the accumulator resolution to the image resolution (default is 1).
        min_dist (int): Minimum distance between the centers of detected circles (default is 1).
        param1 (float): First method-specific parameter (gradient threshold for Canny edge detection).
        param2 (float): Second method-specific parameter (accumulator threshold for circle detection).
        min_radius_range (tuple): Tuple for minimum and maximum radius values for circle detection.
    
    Returns:
        detected_circles (numpy array): Array of detected circles, if found.
    """
    
    for b in range(15,5,-1): 
        for a in range(5,35):  
            detected_circles = cv2.HoughCircles(gray_blurred, cv2.HOUGH_GRADIENT, 1, a, 
                            param1 = b,param2 = 3*b, minRadius = a//2, maxRadius = a) 
            if detected_circles is not None:
                detected_circles=np.uint16(np.around(detected_circles))
                print(f"circles:{detected_circles.shape[1]},a:{a},b:{b}")
                if detected_circles.shape[1]==questions*options:

                    radius_list = [pt[2] for pt in detected_circles[0, :]]
                    
                    # Find the most common radius
                    most_common_radius = Counter(radius_list).most_common(1)[0][0]
                    print(f"Most common radius: {most_common_radius}")



                    for pt in detected_circles[0, :]: 
                        x, y, r = pt[0], pt[1], pt[2] 

                        # Draw the circumference of the circle. 
                        cv2.circle(img, (x, y), r, (0, 255, 0), 3) 

                        # Draw a small circle (of radius 1) to show the center. 
                        cv2.circle(img, (x, y), 2, (0, 0, 255), 3) 
                    cv2.imwrite(f'./output/detected_circles_template.jpg', img)
                    cv2.imshow("Detected Circle", img) 
                    cv2.waitKey(0) 
                    return detected_circles, most_common_radius,a,b

    cv2.destroyAllWindows()
    return None

In [7]:
import math
def detect_circles(img, gray_blurred, questions, options, mode_radius, a_t, b_t):
    """
    Detect circles in the preprocessed grayscale image using the Hough Circle Transform.
    
    Args:
        img (numpy array): Original image for visualization.
        gray_blurred (numpy array): Blurred grayscale image.
        num_questions (int): Number of questions in the OMR.
        num_options (int): Number of options per question.
        dp (float): Inverse ratio of the accumulator resolution to the image resolution (default is 1).
        min_dist (int): Minimum distance between the centers of detected circles (default is 1).
        param1 (float): First method-specific parameter (gradient threshold for Canny edge detection).
        param2 (float): Second method-specific parameter (accumulator threshold for circle detection).
        min_radius_range (tuple): Tuple for minimum and maximum radius values for circle detection.
    
    Returns:
        detected_circles (numpy array): Array of detected circles, if found.
    """
    #b=b_t
    # a=math.ceil(mode_radius+mode_radius*0.20)

    upper_a = max(a_t, mode_radius)
    lower_a = min(a_t, mode_radius)
    upper_a_threshold = math.ceil(upper_a + upper_a * 0.20)
    lower_a_threshold = math.floor(lower_a - lower_a * 0.20)


    for b in range(b_t, b_t-10, -1):
        for a in range(upper_a_threshold, lower_a_threshold, -1):
                
            detected_circles = cv2.HoughCircles(gray_blurred, cv2.HOUGH_GRADIENT, 1, a, 
                            param1 = b,param2 = 3*b, minRadius = a//2, maxRadius = a) 
            if detected_circles is not None:
                print(f"detect_circles: {detected_circles.shape[1]}, a: {a}, b: {b}")
                detected_circles=np.uint16(np.around(detected_circles))
                if detected_circles.shape[1]==questions*options:
                    for pt in detected_circles[0, :]: 
                        x, y, r = pt[0], pt[1], pt[2] 

                        cv2.circle(img, (x, y), r, (0, 255, 0), 3) 
                        cv2.circle(img, (x, y), 2, (0, 0, 255), 3) 
                    cv2.imshow("Detected Circle", img) 
                    cv2.imwrite(f'./output/detected_circles{a}b{b}.jpg', img)
                    cv2.waitKey(0) 
                    return detected_circles

    cv2.destroyAllWindows()
    return None

In [8]:
detected_circles_t,mode_radius,a_t,b_t = detect_circles_template(
    original_img_t, gray_blurred_t, 
    questions, options,
)

circles:81,a:15,b:15
circles:125,a:16,b:15
circles:142,a:17,b:15
circles:157,a:18,b:15
circles:157,a:19,b:15
circles:156,a:20,b:15
circles:157,a:21,b:15
circles:157,a:22,b:15
circles:157,a:23,b:15
circles:158,a:24,b:15
circles:158,a:25,b:15
circles:155,a:26,b:15
circles:154,a:27,b:15
circles:157,a:28,b:15
circles:158,a:29,b:15
circles:158,a:30,b:15
circles:159,a:31,b:15
circles:149,a:32,b:15
circles:147,a:33,b:15
circles:137,a:34,b:15
circles:98,a:15,b:14
circles:141,a:16,b:14
circles:150,a:17,b:14
circles:159,a:18,b:14
circles:159,a:19,b:14
circles:159,a:20,b:14
circles:159,a:21,b:14
circles:160,a:22,b:14
Most common radius: 14


In [9]:
detected_circles_s = detect_circles(
    original_img_s, gray_blurred_s, 
    questions, options, mode_radius, a_t, b_t
)

detected_circles_k = detect_circles(
    original_img_k, gray_blurred_k, 
    questions, options, mode_radius, a_t, b_t
)

detect_circles: 157, a: 27, b: 14
detect_circles: 157, a: 26, b: 14
detect_circles: 156, a: 25, b: 14
detect_circles: 155, a: 24, b: 14
detect_circles: 155, a: 23, b: 14
detect_circles: 152, a: 22, b: 14
detect_circles: 150, a: 21, b: 14
detect_circles: 151, a: 20, b: 14
detect_circles: 152, a: 19, b: 14
detect_circles: 149, a: 18, b: 14
detect_circles: 122, a: 17, b: 14
detect_circles: 103, a: 16, b: 14
detect_circles: 91, a: 15, b: 14
detect_circles: 16, a: 14, b: 14
detect_circles: 160, a: 27, b: 13
detect_circles: 157, a: 27, b: 14
detect_circles: 157, a: 26, b: 14
detect_circles: 159, a: 25, b: 14
detect_circles: 159, a: 24, b: 14
detect_circles: 158, a: 23, b: 14
detect_circles: 158, a: 22, b: 14
detect_circles: 158, a: 21, b: 14
detect_circles: 158, a: 20, b: 14
detect_circles: 159, a: 19, b: 14
detect_circles: 157, a: 18, b: 14
detect_circles: 128, a: 17, b: 14
detect_circles: 110, a: 16, b: 14
detect_circles: 104, a: 15, b: 14
detect_circles: 21, a: 14, b: 14
detect_circles: 1

### 3. Pixel counts


In [10]:
import cv2
import numpy as np

def count_black_and_white_pixels(img, circles):
    """
    Count black and white pixels in the detected circles.

    Parameters:
    - img: The binary image (where black indicates marked areas).
    - circles: List of detected circles in the format (x, y, r).

    Returns:
    - A list of tuples containing counts of (black_pixels, white_pixels) for each circle.
    """
    counts = []

    for (x, y, r) in circles:
        # Create a mask for the current circle
        mask = np.zeros_like(img, dtype=np.uint8)
        cv2.circle(mask, (x, y), r, (255), thickness=-1)  # Fill the circle in the mask

        # Apply the mask to get the pixel values within the circle
        masked_area = cv2.bitwise_and(img, mask)

        # Count black and white pixels
        black_pixels = np.sum(masked_area == 0)   # Count of black pixels (0)
        white_pixels = np.sum(masked_area == 255) # Count of white pixels (255)

        counts.append((x,y,r,black_pixels, white_pixels))

    return counts


In [11]:
circles_s = detected_circles_s[0, :]
circles_t = detected_circles_t[0, :]  
circles_k = detected_circles_k[0, :]

# Count black and white pixels in the detected circles
pixel_counts_s = count_black_and_white_pixels(binary_img_s, circles_s)
pixel_counts_t = count_black_and_white_pixels(binary_img_t, circles_t)
pixel_counts_k = count_black_and_white_pixels(binary_img_k, circles_k)

# Print the results
print("students answer sheet")
for idx, (x, y,r, black_count, white_count) in enumerate(pixel_counts_s, start=1):
    print(f"Circle {idx}: Coordinates = ({x}, {y}), Black Pixels = {black_count}, White Pixels = {white_count}")

print("template")
for idx, (x, y,r, black_count, white_count) in enumerate(pixel_counts_t, start=1):
    print(f"Circle {idx}: Coordinates = ({x}, {y}), Black Pixels = {black_count}, White Pixels = {white_count}")

print("key")
for idx, (x, y,r, black_count, white_count) in enumerate(pixel_counts_k, start=1):
    print(f"Circle {idx}: Coordinates = ({x}, {y}), Black Pixels = {black_count}, White Pixels = {white_count}")


# Draw the circles on the binary image for visualization
# for (x, y, r) in circles_s:
#     cv2.circle(binary_img_s, (x, y), r, (127), 2)
# cv2.imshow("Detected Circles in Binary Image student", binary_img_s)
# cv2.waitKey(0)


# for (x, y, r) in circles_t:
#     cv2.circle(binary_img_t, (x, y), r, (127), 2)  
# cv2.imshow("Detected Circles in Binary Image template", binary_img_t)
# cv2.waitKey(0)

# for (x, y, r) in circles_k:
#     cv2.circle(binary_img_k, (x, y), r, (127), 2)
# cv2.imshow("Detected Circles in Binary Image key(scheme)", binary_img_k)
# cv2.waitKey(0)

cv2.destroyAllWindows()

students answer sheet
Circle 1: Coordinates = (1108, 738), Black Pixels = 1721669, White Pixels = 481
Circle 2: Coordinates = (1046, 804), Black Pixels = 1722148, White Pixels = 2
Circle 3: Coordinates = (984, 544), Black Pixels = 1721631, White Pixels = 519
Circle 4: Coordinates = (230, 274), Black Pixels = 1722075, White Pixels = 75
Circle 5: Coordinates = (1340, 228), Black Pixels = 1721644, White Pixels = 506
Circle 6: Coordinates = (168, 206), Black Pixels = 1721412, White Pixels = 738
Circle 7: Coordinates = (1108, 804), Black Pixels = 1721667, White Pixels = 483
Circle 8: Coordinates = (730, 608), Black Pixels = 1722135, White Pixels = 15
Circle 9: Coordinates = (348, 610), Black Pixels = 1721553, White Pixels = 597
Circle 10: Coordinates = (612, 412), Black Pixels = 1721604, White Pixels = 546
Circle 11: Coordinates = (730, 542), Black Pixels = 1721576, White Pixels = 574
Circle 12: Coordinates = (1346, 544), Black Pixels = 1721696, White Pixels = 454
Circle 13: Coordinates = (

### additional

#### Pixel count list to json

In [12]:
# Convert the list to a JSON string
import json
input_list_as_int = [tuple(int(x) for x in tpl) for tpl in pixel_counts_t]
json_data = json.dumps(input_list_as_int, indent=4)

In [13]:
# Save the JSON string to a file
save_path = "./output/pixel_counts_template.json"
with open(save_path, "w") as file:
    file.write(json_data)

#### draw detected circles using pixel count


In [53]:
import cv2
import json
import numpy as np

def draw_detected_circles(image_path, pixel_count_json, output_path):
    """
    Draws detected circles on the given image based on pixel count data.
    
    Parameters:
    - image_path (str): Path to the original image.
    - pixel_count_json (str): Path to the JSON file containing circle coordinates and radius.
    - output_path (str): Path to save the output image with drawn circles.
    """
    # Load the image
    image = cv2.imread(image_path, cv2.IMREAD_COLOR)
    if image is None:
        raise ValueError(f"Error: Unable to load image from {image_path}")
    
    # Load the JSON data
    with open(pixel_count_json, 'r') as file:
        circles = json.load(file)
    
    # Draw each circle
    for circle in circles:
        x, y, r, _, _ = circle  # Extract circle parameters
        cv2.circle(image, (x, y), r, (0, 255, 0), 2)  # Draw the circle
        cv2.circle(image, (x, y), 2, (0, 0, 255), 3)  # Mark the center
    
    # Save the image with drawn circles
    cv2.imwrite(output_path, image)
    print(f"Output image saved at {output_path}")

# Example usage:
draw_detected_circles("./Test/7_T.jpg", "./output/pixel_counts_template.json", "./output/detect_from_json.jpg")


Output image saved at ./output/detect_from_json.jpg


### 4. Divide the circles by rows

#### method using tolerance

In [54]:
from collections import defaultdict
import numpy as np

def group_circles_by_rows(circles, percentage=0.5):
    # Sort circles by y-coordinate (vertical position)
    sorted_circles = sorted(circles, key=lambda x: x[1])

    # Calculate the average vertical distance between consecutive circles
    y_distances = [sorted_circles[i+1][1] - sorted_circles[i][1] for i in range(len(sorted_circles) - 1)]
    avg_distance = np.mean(y_distances)
    print(f"Average distance between circles: {avg_distance}")
    # Set the tolerance as a percentage of the average distance
    tolerance = percentage * avg_distance

    # Group circles into rows based on y-coordinate proximity
    grouped_by_rows = defaultdict(list)
    for circle in sorted_circles:
        x, y, r = circle[0], circle[1], circle[2]

        # Find the row where this circle belongs based on y-coordinate
        assigned = False
        for row_y in grouped_by_rows:
            if abs(row_y - y) < tolerance:
                grouped_by_rows[row_y].append((x, y, r))
                assigned = True
                break
        
        # If not assigned to any existing row, create a new row
        if not assigned:
            grouped_by_rows[y].append((x, y, r))

    # For each row, sort circles by x-coordinate (left to right)
    grouped_circles = []
    for row_y in sorted(grouped_by_rows.keys()):
        grouped_by_rows[row_y].sort(key=lambda c: c[0])  # Sort by x (horizontal position)
        grouped_circles.append(grouped_by_rows[row_y])

    return grouped_circles

# Example usage:
if pixel_counts is not None:
    #circles = detected_circles[0, :]  # Extract the circles from detected_circles
    
    # Group the circles by rows with a 50% tolerance of average row distance
    grouped_circles = group_circles_by_rows(pixel_counts, percentage=0.5)

    # For visualization
    question_num = 1
    for group in grouped_circles:
        print(f"Row No {question_num}:")
        for idx, (x, y, r) in enumerate(group, 1):
            print(f"circle {idx}: (x={x}, y={y}, r={r})")
            cv2.circle(original_img, (x, y), r, (0, 255, 0), 2)   # Draw the circle
            cv2.circle(original_img, (x, y), 2, (0, 0, 255), 3)   # Draw the center
        question_num += 1

    # Display the result
    cv2.imshow("Detected Circles", original_img)
    cv2.waitKey(0)

cv2.destroyAllWindows()


NameError: name 'pixel_counts' is not defined

#### Using clustering method DBSCAN


In [14]:
import numpy as np
from sklearn.cluster import DBSCAN


# Function to group circles by rows using DBSCAN (y-coordinate clustering)
def group_by_row_dbscan(pixel_counts, eps=10):
    # Extract the y-coordinates of circles from the pixel_counts
    y_coords = np.array([[y] for (x, y, r, black_count, white_count) in pixel_counts])
    
    # Apply DBSCAN clustering based on y-coordinates
    clustering = DBSCAN(eps=eps, min_samples=1).fit(y_coords)
    
    # Get cluster labels
    labels = clustering.labels_

    # Group circles by the row labels
    rows = {}
    for label, circle_data in zip(labels, pixel_counts):
        if label not in rows:
            rows[label] = []
        rows[label].append(circle_data)

    sorted_rows = sorted(rows.values(), key=lambda row: np.mean([c[1] for c in row]))
    sorted_rows = [sorted(row, key=lambda c: c[0]) for row in sorted_rows]
    
    return sorted_rows


In [15]:
# Group the circles by rows using the group_by_row_dbscan function
grouped_rows_s = group_by_row_dbscan(pixel_counts_s, eps=20)

# Print the grouped results
for row_idx, row in enumerate(grouped_rows_s, start=1):
    print(f"Row {row_idx}:")
    for idx, (x, y, r, black_count, white_count) in enumerate(row, start=1):
        print(f"  Circle {idx}: Coordinates = ({x}, {y}), Black Pixels = {black_count}, White Pixels = {white_count}")

Row 1:
  Circle 1: Coordinates = (168, 206), Black Pixels = 1721412, White Pixels = 738
  Circle 2: Coordinates = (232, 208), Black Pixels = 1722088, White Pixels = 62
  Circle 3: Coordinates = (294, 208), Black Pixels = 1721531, White Pixels = 619
  Circle 4: Coordinates = (360, 210), Black Pixels = 1721612, White Pixels = 538
  Circle 5: Coordinates = (552, 216), Black Pixels = 1722112, White Pixels = 38
  Circle 6: Coordinates = (614, 218), Black Pixels = 1721605, White Pixels = 545
  Circle 7: Coordinates = (674, 216), Black Pixels = 1721444, White Pixels = 706
  Circle 8: Coordinates = (734, 214), Black Pixels = 1721553, White Pixels = 597
  Circle 9: Coordinates = (928, 214), Black Pixels = 1721611, White Pixels = 539
  Circle 10: Coordinates = (986, 216), Black Pixels = 1722133, White Pixels = 17
  Circle 11: Coordinates = (1048, 218), Black Pixels = 1721669, White Pixels = 481
  Circle 12: Coordinates = (1106, 220), Black Pixels = 1721728, White Pixels = 422
  Circle 13: Coordi

In [16]:
grouped_rows_t = group_by_row_dbscan(pixel_counts_t, eps=10)

# Print the grouped results
for row_idx, row in enumerate(grouped_rows_t, start=1):
    print(f"Row {row_idx}:")
    for idx, (x, y, r, black_count, white_count) in enumerate(row, start=1):
        print(f"  Circle {idx}: Coordinates = ({x}, {y}), Black Pixels = {black_count}, White Pixels = {white_count}")

Row 1:
  Circle 1: Coordinates = (148, 246), Black Pixels = 1775882, White Pixels = 736
  Circle 2: Coordinates = (210, 244), Black Pixels = 1776113, White Pixels = 505
  Circle 3: Coordinates = (274, 244), Black Pixels = 1776051, White Pixels = 567
  Circle 4: Coordinates = (340, 244), Black Pixels = 1776052, White Pixels = 566
  Circle 5: Coordinates = (536, 246), Black Pixels = 1776008, White Pixels = 610
  Circle 6: Coordinates = (600, 248), Black Pixels = 1776067, White Pixels = 551
  Circle 7: Coordinates = (662, 246), Black Pixels = 1775955, White Pixels = 663
  Circle 8: Coordinates = (724, 244), Black Pixels = 1776043, White Pixels = 575
  Circle 9: Coordinates = (924, 242), Black Pixels = 1775992, White Pixels = 626
  Circle 10: Coordinates = (986, 244), Black Pixels = 1776190, White Pixels = 428
  Circle 11: Coordinates = (1046, 244), Black Pixels = 1776199, White Pixels = 419
  Circle 12: Coordinates = (1108, 246), Black Pixels = 1776201, White Pixels = 417
  Circle 13: Coo

In [17]:
grouped_rows_k = group_by_row_dbscan(pixel_counts_k, eps=10)

# Print the grouped results
for row_idx, row in enumerate(grouped_rows_k, start=1):
    print(f"Row {row_idx}:")
    for idx, (x, y, r, black_count, white_count) in enumerate(row, start=1):
        print(f"  Circle {idx}: Coordinates = ({x}, {y}), Black Pixels = {black_count}, White Pixels = {white_count}")

Row 1:
  Circle 1: Coordinates = (162, 212), Black Pixels = 1712369, White Pixels = 701
  Circle 2: Coordinates = (226, 212), Black Pixels = 1713011, White Pixels = 59
  Circle 3: Coordinates = (290, 214), Black Pixels = 1712571, White Pixels = 499
  Circle 4: Coordinates = (352, 216), Black Pixels = 1712540, White Pixels = 530
  Circle 5: Coordinates = (542, 218), Black Pixels = 1713050, White Pixels = 20
  Circle 6: Coordinates = (604, 220), Black Pixels = 1712588, White Pixels = 482
  Circle 7: Coordinates = (664, 220), Black Pixels = 1712323, White Pixels = 747
  Circle 8: Coordinates = (726, 218), Black Pixels = 1712548, White Pixels = 522
  Circle 9: Coordinates = (920, 220), Black Pixels = 1712521, White Pixels = 549
  Circle 10: Coordinates = (978, 224), Black Pixels = 1713055, White Pixels = 15
  Circle 11: Coordinates = (1038, 224), Black Pixels = 1712581, White Pixels = 489
  Circle 12: Coordinates = (1098, 226), Black Pixels = 1712634, White Pixels = 436
  Circle 13: Coordi

### 5. Idendifying Marked and unmarked bubbles

In [18]:
def subtract_grouped_rows(grouped_rows_s, grouped_rows_t):
    """
    Subtract the black and white pixel values of grouped_rows_t from grouped_rows_s.

    Args:
        grouped_rows_s (list): List of grouped circles with pixel counts for the student's answer sheet.
        grouped_rows_t (list): List of grouped circles with pixel counts for the template.

    Returns:
        list: List of grouped circles with subtracted pixel counts.
    """
    subtracted_rows = []

    for row_s, row_t in zip(grouped_rows_s, grouped_rows_t):
        subtracted_row = []
        for (x_s, y_s, r_s, black_s, white_s), (x_t, y_t, r_t, black_t, white_t) in zip(row_s, row_t):
            subtracted_black = black_s - black_t
            subtracted_white = white_s - white_t
            subtracted_row.append((x_s, y_s, r_s, subtracted_black, subtracted_white))
        subtracted_rows.append(subtracted_row)

    return subtracted_rows

# Subtract the grouped rows
subtracted_grouped_rows = subtract_grouped_rows(grouped_rows_t, grouped_rows_s)

# Print the results
for row_idx, row in enumerate(subtracted_grouped_rows, start=1):
    print(f"Row {row_idx}:")
    for idx, (x, y, r, subtracted_black, subtracted_white) in enumerate(row, start=1):
        print(f"  Circle {idx}: Coordinates = ({x}, {y}), Subtracted Black Pixels = {subtracted_black}, Subtracted White Pixels = {subtracted_white}")

Row 1:
  Circle 1: Coordinates = (148, 246), Subtracted Black Pixels = 54470, Subtracted White Pixels = -2
  Circle 2: Coordinates = (210, 244), Subtracted Black Pixels = 54025, Subtracted White Pixels = 443
  Circle 3: Coordinates = (274, 244), Subtracted Black Pixels = 54520, Subtracted White Pixels = -52
  Circle 4: Coordinates = (340, 244), Subtracted Black Pixels = 54440, Subtracted White Pixels = 28
  Circle 5: Coordinates = (536, 246), Subtracted Black Pixels = 53896, Subtracted White Pixels = 572
  Circle 6: Coordinates = (600, 248), Subtracted Black Pixels = 54462, Subtracted White Pixels = 6
  Circle 7: Coordinates = (662, 246), Subtracted Black Pixels = 54511, Subtracted White Pixels = -43
  Circle 8: Coordinates = (724, 244), Subtracted Black Pixels = 54490, Subtracted White Pixels = -22
  Circle 9: Coordinates = (924, 242), Subtracted Black Pixels = 54381, Subtracted White Pixels = 87
  Circle 10: Coordinates = (986, 244), Subtracted Black Pixels = 54057, Subtracted White 

In [19]:
subtract_grouped_rows_key = subtract_grouped_rows(grouped_rows_t,grouped_rows_k)
for row_idx, row in enumerate(subtract_grouped_rows_key, start=1):
    print(f"Row {row_idx}:")
    for idx, (x, y, r, subtracted_black, subtracted_white) in enumerate(row, start=1):
        print(f"  Circle {idx}: Coordinates = ({x}, {y}), Subtracted Black Pixels = {subtracted_black}, Subtracted White Pixels = {subtracted_white}")

Row 1:
  Circle 1: Coordinates = (148, 246), Subtracted Black Pixels = 63513, Subtracted White Pixels = 35
  Circle 2: Coordinates = (210, 244), Subtracted Black Pixels = 63102, Subtracted White Pixels = 446
  Circle 3: Coordinates = (274, 244), Subtracted Black Pixels = 63480, Subtracted White Pixels = 68
  Circle 4: Coordinates = (340, 244), Subtracted Black Pixels = 63512, Subtracted White Pixels = 36
  Circle 5: Coordinates = (536, 246), Subtracted Black Pixels = 62958, Subtracted White Pixels = 590
  Circle 6: Coordinates = (600, 248), Subtracted Black Pixels = 63479, Subtracted White Pixels = 69
  Circle 7: Coordinates = (662, 246), Subtracted Black Pixels = 63632, Subtracted White Pixels = -84
  Circle 8: Coordinates = (724, 244), Subtracted Black Pixels = 63495, Subtracted White Pixels = 53
  Circle 9: Coordinates = (924, 242), Subtracted Black Pixels = 63471, Subtracted White Pixels = 77
  Circle 10: Coordinates = (986, 244), Subtracted Black Pixels = 63135, Subtracted White P

In [20]:
def detect_marked_and_unmarked_bubbles(subtracted_grouped_rows, num_options, deviation_threshold=50):
    """
    Detect marked and unmarked bubbles based on white pixel values and a deviation threshold.

    Args:
        subtracted_grouped_rows (list): List of grouped circles with subtracted pixel counts.
        num_options (int): The number of options per question.
        deviation_threshold (float): The threshold below which a question is considered unmarked.

    Returns:
        list: A 2D array where each element is:
              - 1 for the most deviated (marked) bubble.
              - 0 for unmarked bubbles.
    """
    marked_bubbles = []

    for row in subtracted_grouped_rows:
        for question_start in range(0, len(row), num_options):
            question_group = row[question_start:question_start + num_options]

            # Extract white pixel values for the current question
            white_pixel_values = [subtracted_white for (_, _, _, _, subtracted_white) in question_group]

            # Calculate the mean and standard deviation of the white pixel values
            mean_white = np.mean(white_pixel_values)
            std_dev_white = np.std(white_pixel_values)
            print(f"Mean: {mean_white}, Standard Deviation: {std_dev_white}")
            # If the standard deviation is less than the threshold, consider the question unmarked
            if std_dev_white < deviation_threshold:
                marked_bubbles.append([0] * num_options)  
            else:
                # Calculate absolute deviations from the mean
                deviations = [abs(white_value - mean_white) for white_value in white_pixel_values]

                # Create a 1D array for the question (options), initialized to zeros
                marked_question = np.zeros(num_options, dtype=int)

                # Find the index of the maximum deviation
                max_deviation_index = np.argmax(deviations)

                # Mark the option with the highest deviation as 1 (marked bubble)
                marked_question[max_deviation_index] = 1

                marked_bubbles.append(marked_question)

    return np.array(marked_bubbles)


answer_student = detect_marked_and_unmarked_bubbles(subtracted_grouped_rows, options)

# Print the marked bubbles in a readable format
for row_idx, row in enumerate(answer_student, start=1):
    print(f"Question {row_idx}: Marked Bubble = {row.tolist()}")

Mean: 104.25, Standard Deviation: 197.65421194601444
Mean: 128.25, Standard Deviation: 256.78821526697834
Mean: 107.75, Standard Deviation: 182.97455424183985
Mean: 106.25, Standard Deviation: 229.224098863972
Mean: 184.0, Standard Deviation: 278.90589810902173
Mean: 92.5, Standard Deviation: 268.134760894592
Mean: 144.25, Standard Deviation: 180.35572488834393
Mean: 133.75, Standard Deviation: 217.87539443452536
Mean: 154.75, Standard Deviation: 250.45196645265136
Mean: 65.25, Standard Deviation: 239.49778182688874
Mean: 111.25, Standard Deviation: 196.2350312762734
Mean: 104.0, Standard Deviation: 242.61492122291241
Mean: 124.0, Standard Deviation: 231.50269976827485
Mean: 130.0, Standard Deviation: 234.49626862702954
Mean: 124.0, Standard Deviation: 205.6586978466994
Mean: 89.5, Standard Deviation: 178.8442059447272
Mean: 134.25, Standard Deviation: 241.28859794859764
Mean: 158.0, Standard Deviation: 263.8569688297052
Mean: 141.25, Standard Deviation: 213.7350871990839
Mean: 130.5, 

In [21]:
answer_key = detect_marked_and_unmarked_bubbles(subtract_grouped_rows_key, options)
for row_idx, row in enumerate(answer_key, start=1):
    print(f"Question {row_idx}: Marked Bubble = {row.tolist()}")

Mean: 146.25, Standard Deviation: 173.56897044114768
Mean: 157.0, Standard Deviation: 256.9678968275999
Mean: 100.25, Standard Deviation: 188.12147006655036
Mean: 81.0, Standard Deviation: 231.73044685582428
Mean: 119.0, Standard Deviation: 260.80740020175807
Mean: 100.75, Standard Deviation: 175.62370995967487
Mean: 104.5, Standard Deviation: 205.67267684357103
Mean: 114.5, Standard Deviation: 234.54690362484004
Mean: 137.5, Standard Deviation: 259.20503467332577
Mean: 93.5, Standard Deviation: 234.05394677296087
Mean: 97.5, Standard Deviation: 224.35741574550192
Mean: 66.25, Standard Deviation: 233.1655795781187
Mean: 92.0, Standard Deviation: 253.03359460751452
Mean: 65.0, Standard Deviation: 223.0044842598462
Mean: 111.0, Standard Deviation: 227.705950734714
Mean: 64.0, Standard Deviation: 226.02765317544666
Mean: 129.25, Standard Deviation: 250.35012981822078
Mean: 94.25, Standard Deviation: 144.2781601629297
Mean: 122.0, Standard Deviation: 186.25654350921474
Mean: 105.25, Standa

### 6. Calculating score

In [22]:
def calculate_score(answer_key, answer_student):
    """
    Compare the student's answers with the answer key and calculate the score.

    Args:
        answer_key (np.ndarray): The correct answer key with marked bubbles (1 for correct option, 0 for others).
        answer_student (np.ndarray): The student's marked answers with bubbles (1 for marked option, 0 for others).

    Returns:
        int: The total score of the student.
        list: A list indicating correctness for each question (1 for correct, 0 for incorrect).
    """
    total_score = 0
    result_per_question = []

    for key, student in zip(answer_key, answer_student):
        if np.array_equal(key, student):
            total_score += 1
            result_per_question.append(1)  # Correct answer
        else:
            result_per_question.append(0)  # Incorrect answer

    return total_score, result_per_question


total_score, result_per_question = calculate_score(answer_key, answer_student)

# Print the final score and results
print(f"Total Score: {total_score}")
for idx, result in enumerate(result_per_question, start=1):
    status = "Correct" if result == 1 else "Incorrect"
    print(f"Question {idx}: {status}")


Total Score: 18
Question 1: Correct
Question 2: Correct
Question 3: Correct
Question 4: Correct
Question 5: Correct
Question 6: Incorrect
Question 7: Correct
Question 8: Incorrect
Question 9: Correct
Question 10: Incorrect
Question 11: Incorrect
Question 12: Incorrect
Question 13: Incorrect
Question 14: Incorrect
Question 15: Correct
Question 16: Incorrect
Question 17: Incorrect
Question 18: Incorrect
Question 19: Incorrect
Question 20: Incorrect
Question 21: Correct
Question 22: Incorrect
Question 23: Incorrect
Question 24: Correct
Question 25: Correct
Question 26: Correct
Question 27: Incorrect
Question 28: Correct
Question 29: Incorrect
Question 30: Incorrect
Question 31: Incorrect
Question 32: Incorrect
Question 33: Correct
Question 34: Correct
Question 35: Correct
Question 36: Incorrect
Question 37: Correct
Question 38: Correct
Question 39: Incorrect
Question 40: Incorrect


## extra verification step

### Group Circle by radius

this can be used to verify that the detected circles are answer circles. Now we are not using this. we will use this after we finish the core of the function.

In [None]:
import cv2
import numpy as np
from collections import defaultdict

def group_circles_by_radius(circles, img, tolerance_percentage=20):
    """
    Groups circles based on their radius with a given tolerance percentage of the y-coordinate.
    
    Args:
        circles (array): List of detected circles [(x, y, r)].
        img (numpy array): The original image.
        tolerance_percentage (float): Tolerance as a percentage of the y-coordinate (default is 20%).
        
    Returns:
        dict: Groups of circles based on their radius.
    """
    radius_groups = defaultdict(list)
    
    # Iterate over the detected circles and group them by radius
    for circle in circles:
        x, y, r = circle[0], circle[1], circle[2]

        if not radius_groups:
            # If no groups exist yet, start a new group with the first circle
            radius_groups[r].append((x, y, r))
        else:
            matched_group = None
            for existing_radius in radius_groups.keys():
                diff = abs(int(r) - int(existing_radius))
                
                # Calculate tolerance based on the radius
                tol = (tolerance_percentage / 100.0) * existing_radius

                if diff <= tol:
                    matched_group = existing_radius
                    break

            if matched_group is not None:
                radius_groups[matched_group].append((x, y, r))
            else:
                radius_groups[r].append((x, y, r))

    # Print the radius groups for debugging
    for radius, grouped_circles in radius_groups.items():
        print(f"Radius: {radius}, Circles: {grouped_circles}")
        
    # Visualize each group of circles in separate images
    for radius, grouped_circles in radius_groups.items():
        img_copy = img.copy()  # Create a copy of the original image to visualize each group
        
        for x, y, r in grouped_circles:
            cv2.circle(img_copy, (x, y), r, (0, 255, 0), 2)  # Draw outer circle
            cv2.circle(img_copy, (x, y), 2, (0, 0, 255), 3)  # Draw center

        # Display the grouped circles in a separate window
        cv2.imshow(f'Circles with radius ~ {radius}', img_copy)
        cv2.waitKey(0)
    
    return radius_groups



# if detected_circles is not None:
#     detected_circles = np.uint16(np.around(detected_circles))

#     # Call the function with the detected circles and image
#     group_circles_by_radius(detected_circles[0, :], img, tolerance_percentage=20)

cv2.destroyAllWindows()
