## Design Overview

This notebook implements an automated Optical Mark Recognition (OMR) workflow for processing scanned answer sheets. The main steps are:

1. **Preprocessing**: Images are loaded, converted to grayscale, blurred, and binarized to enhance bubble detection.
2. **Circle Detection**: Hough Circle Transform is used to detect answer bubbles. Both sequential and parallel approaches are provided for robustness and speed.
3. **Pixel Counting**: For each detected bubble, black and white pixel counts are computed to distinguish marked from unmarked bubbles.
4. **Row Grouping**: Detected bubbles are grouped by rows using DBSCAN clustering, enabling question-wise analysis.
5. **Marked Bubble Identification**: Statistical analysis (mean and standard deviation) of pixel counts is used to identify marked bubbles per question.
6. **Scoring**: Student answers are compared to the answer key, and the total score is calculated

## Steps


### 1. Preprocessing

In [1]:
import cv2
import numpy as np
import time

def preprocess_image(image_path, blur_ksize=(5, 5)):
    """
    Preprocess the input image by converting it to grayscale and applying Gaussian blur and convert it to binary image.
    
    Args:
        image_path (str): Path to the input image.
        blur_ksize (tuple): Kernel size for the Gaussian blur (default is (5, 5)).
        
    Returns:
        gray_blurred (numpy array): Blurred grayscale version of the image.
        original_img (numpy array): Original color image.
        gray (numpy array): Grayscale version of the image.
        binary_img (numpy array): Binary version of the image
    """
    # Read the image.
    original_img = cv2.imread(image_path, cv2.IMREAD_COLOR)

    if original_img is None:
        raise ValueError(f"Image at {image_path} not found.")
    
    # Convert to grayscale.
    gray = cv2.cvtColor(original_img, cv2.COLOR_BGR2GRAY)

    # Apply Gaussian blur.
    gray_blurred = cv2.GaussianBlur(gray, blur_ksize, 0)

    # Convert the original image to a binary image
    _, binary_img = cv2.threshold(gray_blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    return gray_blurred, original_img, gray, binary_img

In [2]:
# Preprocess the images
start_1= time.time()
gray_blurred_s, original_img_s, gray_s, binary_img_s = preprocess_image('./Test/8df22.jpg')
gray_blurred_t, original_img_t, gray_t, binary_img_t = preprocess_image('./Test/9_T.jpg')
gray_blurred_k, original_img_k, gray_k, binary_img_k = preprocess_image('./Test/79109.jpg')
end_1= time.time()
print("Preprocessing time:", end_1-start_1)
questions = 40
options=4

Preprocessing time: 0.05321168899536133


### 2.0 Circle Detection

In [9]:
from collections import Counter
def detect_circles_template(img, gray_blurred, questions, options):
    """
    Detect circles in the preprocessed grayscale image using the Hough Circle Transform.
    
    Args:
        img (numpy array): Original image for visualization.
        gray_blurred (numpy array): Blurred grayscale image.
        num_questions (int): Number of questions in the OMR.
        num_options (int): Number of options per question.
        dp (float): Inverse ratio of the accumulator resolution to the image resolution (default is 1).
        min_dist (int): Minimum distance between the centers of detected circles (default is 1).
        param1 (float): First method-specific parameter (gradient threshold for Canny edge detection).
        param2 (float): Second method-specific parameter (accumulator threshold for circle detection).
        min_radius_range (tuple): Tuple for minimum and maximum radius values for circle detection.
    
    Returns:
        detected_circles (numpy array): Array of detected circles, if found.
    """
    
    for b in range(15,5,-1): 
        for a in range(5,35):  
            detected_circles = cv2.HoughCircles(gray_blurred, cv2.HOUGH_GRADIENT, 1, a, 
                            param1 = b,param2 = 3*b, minRadius = a//2, maxRadius = a) 
            if detected_circles is not None:
                
                print(f"circles:{detected_circles.shape[1]},a:{a},b:{b}")
                if detected_circles.shape[1]==questions*options:
                    detected_circles=np.uint16(np.around(detected_circles))
                    radii_list = [pt[2] for pt in detected_circles[0, :]]
                    
                    # Find the most common radius
                    most_common_radius = Counter(radii_list).most_common(1)[0][0]
                    print(f"Most common radius: {most_common_radius}")

                    for pt in detected_circles[0, :]: 
                        x, y, r = pt[0], pt[1], pt[2] 

                        # Draw the circumference of the circle. 
                        cv2.circle(img, (x, y), r, (0, 255, 0), 3) 

                        # Draw a small circle (of radius 1) to show the center. 
                        cv2.circle(img, (x, y), 2, (0, 0, 255), 3) 
                    #cv2.imwrite(f'./testoutput/detected_circlesf7.jpg', img)
                    cv2.imshow("Detected Circle", img) 
                    cv2.waitKey(0) 
                    return detected_circles, most_common_radius,a,b

    cv2.destroyAllWindows()
    return None

In [7]:
import math
def detect_circles(img, gray_blurred, questions, options, mode_radius, a_t, b_t):
    """
    Detect circles in the preprocessed grayscale image using the Hough Circle Transform.
    
    Args:
        img (numpy array): Original image for visualization.
        gray_blurred (numpy array): Blurred grayscale image.
        num_questions (int): Number of questions in the OMR.
        num_options (int): Number of options per question.
        dp (float): Inverse ratio of the accumulator resolution to the image resolution (default is 1).
        min_dist (int): Minimum distance between the centers of detected circles (default is 1).
        param1 (float): First method-specific parameter (gradient threshold for Canny edge detection).
        param2 (float): Second method-specific parameter (accumulator threshold for circle detection).
        min_radius_range (tuple): Tuple for minimum and maximum radius values for circle detection.
    
    Returns:
        detected_circles (numpy array): Array of detected circles, if found.
    """
    #b=b_t
    # a=math.ceil(mode_radius+mode_radius*0.20)

    upper_a = max(a_t, mode_radius)
    lower_a = min(a_t, mode_radius)
    upper_a_threshold = math.ceil(upper_a + upper_a * 0.20)
    lower_a_threshold = math.floor(lower_a - lower_a * 0.20)

    # Ensure b_t and lower bound are not less than 0
    for b in range(max(b_t, 0), max(b_t - 10, 0), -1):
        for a in range(max(upper_a_threshold, 0), max(lower_a_threshold, 0) - 1, -1):
            # Your processing logic here

            detected_circles = cv2.HoughCircles(gray_blurred, cv2.HOUGH_GRADIENT, 1, a, 
                            param1 = b,param2 = 3*b, minRadius = a//2, maxRadius = a) 
            if detected_circles is not None:
                print(f"detect_circles: {detected_circles.shape[1]}, a: {a}, b: {b}")
                
                if detected_circles.shape[1]==questions*options:
                    detected_circles=np.uint16(np.around(detected_circles))
                    for pt in detected_circles[0, :]: 
                        a, b, r = pt[0], pt[1], pt[2] 
    
                        cv2.circle(img, (a, b), r, (0, 255, 0), 3) 
                        cv2.circle(img, (a, b), 2, (0, 0, 255), 3) 
                    cv2.imshow("Detected Circle", img) 
                    #cv2.imwrite(f'./testoutput/detected_circles{a}b{b}.jpg', img)
                    cv2.waitKey(0) 
                    return detected_circles

    cv2.destroyAllWindows()
    return None

In [10]:
start_2= time.time()
detected_circles_t,mode_radius,a_t,b_t = detect_circles_template(
    original_img_t, gray_blurred_t, 
    questions, options,
)
end_2= time.time()
print("Detecting circles time template:", end_2-start_2)

circles:1,a:11,b:15
circles:24,a:12,b:15
circles:107,a:13,b:15
circles:87,a:14,b:15
circles:92,a:15,b:15
circles:95,a:16,b:15
circles:94,a:17,b:15
circles:90,a:18,b:15
circles:90,a:19,b:15
circles:79,a:20,b:15
circles:78,a:21,b:15
circles:37,a:22,b:15
circles:37,a:23,b:15
circles:10,a:24,b:15
circles:10,a:25,b:15
circles:1,a:27,b:15
circles:1,a:10,b:14
circles:3,a:11,b:14
circles:44,a:12,b:14
circles:120,a:13,b:14
circles:109,a:14,b:14
circles:111,a:15,b:14
circles:117,a:16,b:14
circles:115,a:17,b:14
circles:115,a:18,b:14
circles:116,a:19,b:14
circles:109,a:20,b:14
circles:109,a:21,b:14
circles:63,a:22,b:14
circles:63,a:23,b:14
circles:26,a:24,b:14
circles:26,a:25,b:14
circles:1,a:26,b:14
circles:1,a:27,b:14
circles:6,a:10,b:13
circles:11,a:11,b:13
circles:70,a:12,b:13
circles:130,a:13,b:13
circles:127,a:14,b:13
circles:122,a:15,b:13
circles:126,a:16,b:13
circles:129,a:17,b:13
circles:129,a:18,b:13
circles:130,a:19,b:13
circles:121,a:20,b:13
circles:120,a:21,b:13
circles:89,a:22,b:13
c

In [11]:
start_3= time.time()
detected_circles_s = detect_circles(
    original_img_s, gray_blurred_s, 
    questions, options, mode_radius, a_t, b_t
)

detected_circles_k = detect_circles(
    original_img_k, gray_blurred_k, 
    questions, options, mode_radius, a_t, b_t
)
end_3= time.time()
print("Detecting circles time:", end_3-start_3)

detect_circles: 156, a: 20, b: 10
detect_circles: 159, a: 19, b: 10
detect_circles: 158, a: 18, b: 10
detect_circles: 158, a: 17, b: 10
detect_circles: 158, a: 16, b: 10
detect_circles: 153, a: 15, b: 10
detect_circles: 153, a: 14, b: 10
detect_circles: 144, a: 13, b: 10
detect_circles: 153, a: 12, b: 10
detect_circles: 103, a: 11, b: 10
detect_circles: 26, a: 10, b: 10
detect_circles: 12, a: 9, b: 10
detect_circles: 159, a: 20, b: 9
detect_circles: 160, a: 19, b: 9
detect_circles: 155, a: 20, b: 10
detect_circles: 156, a: 19, b: 10
detect_circles: 155, a: 18, b: 10
detect_circles: 156, a: 17, b: 10
detect_circles: 157, a: 16, b: 10
detect_circles: 152, a: 15, b: 10
detect_circles: 149, a: 14, b: 10
detect_circles: 148, a: 13, b: 10
detect_circles: 149, a: 12, b: 10
detect_circles: 135, a: 11, b: 10
detect_circles: 38, a: 10, b: 10
detect_circles: 3, a: 9, b: 10
detect_circles: 159, a: 20, b: 9
detect_circles: 158, a: 19, b: 9
detect_circles: 158, a: 18, b: 9
detect_circles: 159, a: 17

In [12]:
# Draw detected circles on the original images and display them
for title, img, circles in [
    ("Student Sheet", original_img_s, detected_circles_s),
    ("Template",    original_img_t, detected_circles_t),
    ("Answer Key",  original_img_k, detected_circles_k),
]:
    img_drawn = img.copy()
    # circles is of shape (1, N, 3)
    if circles is None:
        print(f"No circles detected in {title}.")
        continue
    for (x, y, r) in circles[0]:
        # outer circle
        cv2.circle(img_drawn, (int(x), int(y)), int(r), (0, 255, 0), 2)
        # center point
        cv2.circle(img_drawn, (int(x), int(y)), 2, (0, 0, 255), 3)
    cv2.imshow(f"Detected Circles - {title}", img_drawn)
    cv2.waitKey(0)

cv2.destroyAllWindows()

### 2.1 circle detection using parallel processing

In [20]:
import concurrent.futures
from collections import Counter

def hough_for_params(params):
    a, b, gray_blurred, questions, options = params
    detected_circles = cv2.HoughCircles(gray_blurred, cv2.HOUGH_GRADIENT, 1, a,
                                        param1=b, param2=3*b, minRadius=a//2, maxRadius=a)
    if detected_circles is not None and detected_circles.shape[1] == questions * options:
        detected_circles = np.uint16(np.around(detected_circles))
        radii_list = [pt[2] for pt in detected_circles[0, :]]
        most_common_radius = Counter(radii_list).most_common(1)[0][0]
        return detected_circles, most_common_radius, a, b
    return None

def detect_circles_template_parallel(img, gray_blurred, questions, options):
    # Generate all (a,b) pairs, same ranges as before
    param_combinations = [(a, b, gray_blurred, questions, options) 
                          for b in range(15, 5, -1) 
                          for a in range(5, 35)]
    
    with concurrent.futures.ThreadPoolExecutor() as executor:
        results = list(executor.map(hough_for_params, param_combinations))
        
    # Filter out None results and return the first valid detection
    for result in results:
        if result is not None:
            detected_circles, most_common_radius, a, b = result
            print(f"circles:{detected_circles.shape[1]}, a:{a}, b:{b}")
            print(f"Most common radius: {most_common_radius}")
            return detected_circles, most_common_radius, a, b

    return None
if __name__ == "__main__":
    start_2= time.time()
    detected_circles_t,mode_radius,a_t,b_t = detect_circles_template_parallel(
        original_img_t, gray_blurred_t, 
        questions, options,
    )
    end_2= time.time()
    print("Detecting circles time template:", end_2-start_2)

circles:160, a:16, b:10
Most common radius: 12
Detecting circles time template: 2.2580857276916504


### 3. Pixel counts


In [21]:
import cv2
import numpy as np

def count_black_and_white_pixels(img, circles):
    """
    Count black and white pixels in the detected circles.

    Parameters:
    - img: The binary image (where black indicates marked areas).
    - circles: List of detected circles in the format (x, y, r).

    Returns:
    - A list of tuples containing counts of (black_pixels, white_pixels) for each circle.
    """
    counts = []

    for (x, y, r) in circles:
        # Create a mask for the current circle
        mask = np.zeros_like(img, dtype=np.uint8)
        cv2.circle(mask, (x, y), r, (255), thickness=-1)  # Fill the circle in the mask

        # Apply the mask to get the pixel values within the circle
        masked_area = cv2.bitwise_and(img, mask)

        # Count black and white pixels
        black_pixels = np.sum(masked_area == 0)   # Count of black pixels (0)
        white_pixels = np.sum(masked_area == 255) # Count of white pixels (255)

        counts.append((x,y,r,black_pixels, white_pixels))

    return counts

In [22]:
circles_s = detected_circles_s[0, :]
circles_t = detected_circles_t[0, :]  
circles_k = detected_circles_k[0, :]
start = time.time()
# Count black and white pixels in the detected circles
pixel_counts_s = count_black_and_white_pixels(binary_img_s, circles_s)
pixel_counts_t = count_black_and_white_pixels(binary_img_t, circles_t)
pixel_counts_k = count_black_and_white_pixels(binary_img_k, circles_k)
end = time.time()
print("Counting pixels time:", end-start)
# Print the results
print("students answer sheet")
for idx, (x, y,r, black_count, white_count) in enumerate(pixel_counts_s, start=1):
    print(f"Circle {idx}: Coordinates = ({x}, {y}), Black Pixels = {black_count}, White Pixels = {white_count}")

print("template")
for idx, (x, y,r, black_count, white_count) in enumerate(pixel_counts_t, start=1):
    print(f"Circle {idx}: Coordinates = ({x}, {y}), Black Pixels = {black_count}, White Pixels = {white_count}")

print("key")
for idx, (x, y,r, black_count, white_count) in enumerate(pixel_counts_k, start=1):
    print(f"Circle {idx}: Coordinates = ({x}, {y}), Black Pixels = {black_count}, White Pixels = {white_count}")

# Display the binary image
#cv2.imshow("Binary Image", binary_img)

# Draw the circles on the binary image for visualization
for (x, y, r) in circles_s:
    cv2.circle(binary_img_s, (x, y), r, (127), 2)  # Draw circles in a different color (grey)
cv2.imshow("Detected Circles in Binary Image student", binary_img_s)
cv2.waitKey(0)


for (x, y, r) in circles_t:
    cv2.circle(binary_img_t, (x, y), r, (127), 2)  
cv2.imshow("Detected Circles in Binary Image template", binary_img_t)
cv2.waitKey(0)

for (x, y, r) in circles_k:
    cv2.circle(binary_img_k, (x, y), r, (127), 2)
cv2.imshow("Detected Circles in Binary Image key(scheme)", binary_img_k)
cv2.waitKey(0)

cv2.destroyAllWindows()

Counting pixels time: 1.6478755474090576
students answer sheet
Circle 1: Coordinates = (762, 554), Black Pixels = 1228645, White Pixels = 155
Circle 2: Coordinates = (594, 516), Black Pixels = 1228657, White Pixels = 143
Circle 3: Coordinates = (594, 234), Black Pixels = 1228594, White Pixels = 206
Circle 4: Coordinates = (760, 512), Black Pixels = 1228644, White Pixels = 156
Circle 5: Coordinates = (634, 516), Black Pixels = 1228665, White Pixels = 135
Circle 6: Coordinates = (800, 512), Black Pixels = 1228793, White Pixels = 7
Circle 7: Coordinates = (838, 432), Black Pixels = 1228677, White Pixels = 123
Circle 8: Coordinates = (876, 236), Black Pixels = 1228646, White Pixels = 154
Circle 9: Coordinates = (384, 606), Black Pixels = 1228639, White Pixels = 161
Circle 10: Coordinates = (556, 474), Black Pixels = 1228776, White Pixels = 24
Circle 11: Coordinates = (838, 472), Black Pixels = 1228782, White Pixels = 18
Circle 12: Coordinates = (876, 354), Black Pixels = 1228684, White Pix

### 4. Divide the circles by rows

#### Using clustering method DBSCAN


In [23]:
import numpy as np
from sklearn.cluster import DBSCAN
from collections import defaultdict

def group_by_row_dbscan(pixel_counts, eps=20, fallback_eps=10):
    # Extract y-coordinates
    y_coords = np.array([[y] for x, y, r, bc, wc in pixel_counts])

    def _cluster(eps_val):
        labels = DBSCAN(eps=eps_val, min_samples=1).fit(y_coords).labels_
        groups = defaultdict(list)
        for lbl, data in zip(labels, pixel_counts):
            groups[lbl].append(data)
        # Sort rows by mean y and then sort circles in each row by x
        sorted_rows = sorted(groups.values(), key=lambda row: np.mean([c[1] for c in row]))
        return [sorted(row, key=lambda c: c[0]) for row in sorted_rows]

    rows = _cluster(eps)
    if len({len(r) for r in rows}) != 1:
        print(f"Warning: inconsistent row lengths, retrying with eps={fallback_eps}")
        rows = _cluster(fallback_eps)
    return rows

In [24]:
# Group the circles by rows using the group_by_row_dbscan function
start = time.time()
grouped_rows_s = group_by_row_dbscan(pixel_counts_s, eps=10)
end = time.time()
print("Grouping by row time:", end-start)
# Print the grouped results
for row_idx, row in enumerate(grouped_rows_s, start=1):
    print(f"Row {row_idx}:")
    for idx, (x, y, r, black_count, white_count) in enumerate(row, start=1):
        print(f"  Circle {idx}: Coordinates = ({x}, {y}), Black Pixels = {black_count}, White Pixels = {white_count}")

Grouping by row time: 0.002513885498046875
Row 1:
  Circle 1: Coordinates = (262, 228), Black Pixels = 1228589, White Pixels = 211
  Circle 2: Coordinates = (302, 228), Black Pixels = 1228634, White Pixels = 166
  Circle 3: Coordinates = (344, 228), Black Pixels = 1228795, White Pixels = 5
  Circle 4: Coordinates = (388, 230), Black Pixels = 1228632, White Pixels = 168
  Circle 5: Coordinates = (514, 232), Black Pixels = 1228800, White Pixels = 0
  Circle 6: Coordinates = (556, 234), Black Pixels = 1228662, White Pixels = 138
  Circle 7: Coordinates = (594, 234), Black Pixels = 1228594, White Pixels = 206
  Circle 8: Coordinates = (634, 234), Black Pixels = 1228605, White Pixels = 195
  Circle 9: Coordinates = (760, 232), Black Pixels = 1228629, White Pixels = 171
  Circle 10: Coordinates = (800, 234), Black Pixels = 1228798, White Pixels = 2
  Circle 11: Coordinates = (838, 234), Black Pixels = 1228668, White Pixels = 132
  Circle 12: Coordinates = (876, 236), Black Pixels = 1228646, 

In [25]:
start_2= time.time()
grouped_rows_t = group_by_row_dbscan(pixel_counts_t, eps=10)
end_2= time.time()
print("Grouping by row time:", end_2-start_2)
# Print the grouped results
for row_idx, row in enumerate(grouped_rows_t, start=1):
    print(f"Row {row_idx}:")
    for idx, (x, y, r, black_count, white_count) in enumerate(row, start=1):
        print(f"  Circle {idx}: Coordinates = ({x}, {y}), Black Pixels = {black_count}, White Pixels = {white_count}")

Grouping by row time: 0.0021054744720458984
Row 1:
  Circle 1: Coordinates = (222, 244), Black Pixels = 1228580, White Pixels = 220
  Circle 2: Coordinates = (264, 246), Black Pixels = 1228566, White Pixels = 234
  Circle 3: Coordinates = (308, 246), Black Pixels = 1228618, White Pixels = 182
  Circle 4: Coordinates = (352, 248), Black Pixels = 1228596, White Pixels = 204
  Circle 5: Coordinates = (484, 254), Black Pixels = 1228560, White Pixels = 240
  Circle 6: Coordinates = (526, 256), Black Pixels = 1228634, White Pixels = 166
  Circle 7: Coordinates = (566, 256), Black Pixels = 1228606, White Pixels = 194
  Circle 8: Coordinates = (606, 256), Black Pixels = 1228624, White Pixels = 176
  Circle 9: Coordinates = (736, 258), Black Pixels = 1228597, White Pixels = 203
  Circle 10: Coordinates = (776, 258), Black Pixels = 1228649, White Pixels = 151
  Circle 11: Coordinates = (816, 260), Black Pixels = 1228652, White Pixels = 148
  Circle 12: Coordinates = (856, 260), Black Pixels = 12

In [26]:
start = time.time()
grouped_rows_k = group_by_row_dbscan(pixel_counts_k, eps=10)
end = time.time()
print("Grouping by row time:", end-start)
# Print the grouped results
for row_idx, row in enumerate(grouped_rows_k, start=1):
    print(f"Row {row_idx}:")
    for idx, (x, y, r, black_count, white_count) in enumerate(row, start=1):
        print(f"  Circle {idx}: Coordinates = ({x}, {y}), Black Pixels = {black_count}, White Pixels = {white_count}")

Grouping by row time: 0.003481626510620117
Row 1:
  Circle 1: Coordinates = (234, 272), Black Pixels = 1228593, White Pixels = 207
  Circle 2: Coordinates = (272, 272), Black Pixels = 1228679, White Pixels = 121
  Circle 3: Coordinates = (314, 272), Black Pixels = 1228800, White Pixels = 0
  Circle 4: Coordinates = (356, 272), Black Pixels = 1228661, White Pixels = 139
  Circle 5: Coordinates = (476, 276), Black Pixels = 1228766, White Pixels = 34
  Circle 6: Coordinates = (516, 278), Black Pixels = 1228669, White Pixels = 131
  Circle 7: Coordinates = (552, 278), Black Pixels = 1228660, White Pixels = 140
  Circle 8: Coordinates = (590, 278), Black Pixels = 1228666, White Pixels = 134
  Circle 9: Coordinates = (710, 280), Black Pixels = 1228644, White Pixels = 156
  Circle 10: Coordinates = (746, 280), Black Pixels = 1228800, White Pixels = 0
  Circle 11: Coordinates = (784, 282), Black Pixels = 1228691, White Pixels = 109
  Circle 12: Coordinates = (820, 282), Black Pixels = 1228677,

### 5. Idendifying Marked and unmarked bubbles

In [27]:
def mean_sd(grouped_rows, num_options):

    temp=[]
    for row in grouped_rows:
        for question_start in range(0, len(row), num_options):
            question_group = row[question_start:question_start + num_options]

            # Extract white pixel values for the current question
            white_pixel_values = [white for (_, _, _, _, white) in question_group]

            # Calculate the mean and standard deviation of the white pixel values
            mean_white = np.mean(white_pixel_values)
            std_dev_white = np.std(white_pixel_values)
            print(f"Mean: {mean_white}, Standard Deviation: {std_dev_white}")
            temp.append((mean_white,std_dev_white))

    return temp

start = time.time()
stat_template = mean_sd(grouped_rows_t, options)
end = time.time()
print("Mean and SD time:", end-start)

Mean: 210.0, Standard Deviation: 19.339079605813716
Mean: 194.0, Standard Deviation: 28.39013913315678
Mean: 159.0, Standard Deviation: 26.201145013147805
Mean: 137.0, Standard Deviation: 18.76166303929372
Mean: 184.0, Standard Deviation: 30.124740662784138
Mean: 172.25, Standard Deviation: 17.239127008059313
Mean: 148.75, Standard Deviation: 18.14352501582865
Mean: 140.75, Standard Deviation: 15.07274029498286
Mean: 215.5, Standard Deviation: 19.20286436967152
Mean: 172.75, Standard Deviation: 14.042346669983617
Mean: 157.75, Standard Deviation: 16.7388022271607
Mean: 132.0, Standard Deviation: 6.96419413859206
Mean: 226.25, Standard Deviation: 19.62619423117992
Mean: 207.75, Standard Deviation: 16.48294573187693
Mean: 161.5, Standard Deviation: 14.857657958103626
Mean: 148.25, Standard Deviation: 14.148763196831021
Mean: 202.25, Standard Deviation: 15.481844205391036
Mean: 180.5, Standard Deviation: 39.16950344336777
Mean: 151.75, Standard Deviation: 26.621185172715357
Mean: 135.25, 

In [28]:
max_tuple = max(stat_template, key=lambda x: x[1])
max_sd = max_tuple[1]
print("Maximum Standard Deviation:", max_sd)

Maximum Standard Deviation: 40.38873605350878


In [29]:
start = time.time()
mean_sd(grouped_rows_s, options)
end = time.time()
print("Mean and SD time:", end-start)

Mean: 137.5, Standard Deviation: 78.58275892331599
Mean: 134.75, Standard Deviation: 81.96760030646256
Mean: 114.75, Standard Deviation: 66.54838465357368
Mean: 111.0, Standard Deviation: 56.47565847336355
Mean: 139.75, Standard Deviation: 59.767779781417346
Mean: 130.25, Standard Deviation: 62.61938597591005
Mean: 98.5, Standard Deviation: 57.67798540171111
Mean: 104.0, Standard Deviation: 49.462106708064915
Mean: 132.75, Standard Deviation: 77.89536250637775
Mean: 141.0, Standard Deviation: 52.407060593015515
Mean: 121.75, Standard Deviation: 65.94078783272157
Mean: 111.0, Standard Deviation: 64.07417576528005
Mean: 148.0, Standard Deviation: 105.00714261420507
Mean: 125.25, Standard Deviation: 64.85898164479612
Mean: 105.5, Standard Deviation: 62.2434735534578
Mean: 100.25, Standard Deviation: 59.14970414127192
Mean: 129.5, Standard Deviation: 65.5190811901388
Mean: 108.5, Standard Deviation: 64.24367673164419
Mean: 108.0, Standard Deviation: 51.176166327695945
Mean: 104.25, Standar

In [30]:
start = time.time()
mean_sd(grouped_rows_k, options)
end = time.time()
print("Mean and SD time:", end-start)

Mean: 116.75, Standard Deviation: 74.6470863463538
Mean: 109.75, Standard Deviation: 43.85416171813115
Mean: 97.0, Standard Deviation: 58.54485459884583
Mean: 91.0, Standard Deviation: 42.84273567362383
Mean: 130.25, Standard Deviation: 55.670346684747706
Mean: 113.75, Standard Deviation: 72.17470124635086
Mean: 88.0, Standard Deviation: 39.15992849840255
Mean: 86.5, Standard Deviation: 51.14929129518805
Mean: 131.75, Standard Deviation: 79.74451391788654
Mean: 109.75, Standard Deviation: 62.174653195655225
Mean: 71.75, Standard Deviation: 36.88749788207381
Mean: 89.0, Standard Deviation: 52.54997621312497
Mean: 125.0, Standard Deviation: 64.40108694734896
Mean: 109.0, Standard Deviation: 55.87486017879597
Mean: 93.0, Standard Deviation: 55.76289088632332
Mean: 70.25, Standard Deviation: 40.43126883984721
Mean: 115.25, Standard Deviation: 60.30495419117736
Mean: 102.25, Standard Deviation: 50.3158772158451
Mean: 77.25, Standard Deviation: 31.72045869781835
Mean: 73.25, Standard Deviati

In [35]:
def detect_marked_and_unmarked_bubbles(subtracted_grouped_rows, num_options, deviation_threshold=50):
    """
    Detect marked and unmarked bubbles based on white pixel values and a deviation threshold.

    Args:
        subtracted_grouped_rows (list): List of grouped circles with subtracted pixel counts.
        num_options (int): The number of options per question.
        deviation_threshold (float): The threshold below which a question is considered unmarked.

    Returns:
        list: A 2D array where each element is:
              - 1 for the most deviated (marked) bubble.
              - 0 for unmarked bubbles.
    """
    marked_bubbles = []

    for row in subtracted_grouped_rows:
        for question_start in range(0, len(row), num_options):
            question_group = row[question_start:question_start + num_options]

            # Extract white pixel values for the current question
            white_pixel_values = [subtracted_white for (_, _, _, _, subtracted_white) in question_group]

            # Calculate the mean and standard deviation of the white pixel values
            mean_white = np.mean(white_pixel_values)
            std_dev_white = np.std(white_pixel_values)
            print(f"Mean: {mean_white}, Standard Deviation: {std_dev_white}")
            # If the standard deviation is less than the threshold, consider the question unmarked
            if std_dev_white < deviation_threshold:
                marked_bubbles.append([0] * num_options)  
            else:

                # Create a 1D array for the question (options), initialized to zeros
                marked_question = np.zeros(num_options, dtype=int)

                min_white_index = np.argmin(white_pixel_values)

                # Mark the option with the highest deviation as 1 (marked bubble)
                marked_question[min_white_index] = 1

                marked_bubbles.append(marked_question)

    return np.array(marked_bubbles)
start = time.time()
answer_student = detect_marked_and_unmarked_bubbles(grouped_rows_s, options, deviation_threshold=max_sd)
end = time.time()
print("Detecting marked and unmarked bubbles time:", end-start)
# Print the marked bubbles in a readable format
for row_idx, row in enumerate(answer_student, start=1):
    print(f"Question {row_idx}: Marked Bubble = {row.tolist()}")

Mean: 137.5, Standard Deviation: 78.58275892331599
Mean: 134.75, Standard Deviation: 81.96760030646256
Mean: 114.75, Standard Deviation: 66.54838465357368
Mean: 111.0, Standard Deviation: 56.47565847336355
Mean: 139.75, Standard Deviation: 59.767779781417346
Mean: 130.25, Standard Deviation: 62.61938597591005
Mean: 98.5, Standard Deviation: 57.67798540171111
Mean: 104.0, Standard Deviation: 49.462106708064915
Mean: 132.75, Standard Deviation: 77.89536250637775
Mean: 141.0, Standard Deviation: 52.407060593015515
Mean: 121.75, Standard Deviation: 65.94078783272157
Mean: 111.0, Standard Deviation: 64.07417576528005
Mean: 148.0, Standard Deviation: 105.00714261420507
Mean: 125.25, Standard Deviation: 64.85898164479612
Mean: 105.5, Standard Deviation: 62.2434735534578
Mean: 100.25, Standard Deviation: 59.14970414127192
Mean: 129.5, Standard Deviation: 65.5190811901388
Mean: 108.5, Standard Deviation: 64.24367673164419
Mean: 108.0, Standard Deviation: 51.176166327695945
Mean: 104.25, Standar

In [36]:
start = time.time()
answer_key = detect_marked_and_unmarked_bubbles(grouped_rows_k, options, deviation_threshold=max_sd)
end = time.time()
print("Detecting marked and unmarked bubbles time:", end-start)
for row_idx, row in enumerate(answer_key, start=1):
    print(f"Question {row_idx}: Marked Bubble = {row.tolist()}")

Mean: 116.75, Standard Deviation: 74.6470863463538
Mean: 109.75, Standard Deviation: 43.85416171813115
Mean: 97.0, Standard Deviation: 58.54485459884583
Mean: 91.0, Standard Deviation: 42.84273567362383
Mean: 130.25, Standard Deviation: 55.670346684747706
Mean: 113.75, Standard Deviation: 72.17470124635086
Mean: 88.0, Standard Deviation: 39.15992849840255
Mean: 86.5, Standard Deviation: 51.14929129518805
Mean: 131.75, Standard Deviation: 79.74451391788654
Mean: 109.75, Standard Deviation: 62.174653195655225
Mean: 71.75, Standard Deviation: 36.88749788207381
Mean: 89.0, Standard Deviation: 52.54997621312497
Mean: 125.0, Standard Deviation: 64.40108694734896
Mean: 109.0, Standard Deviation: 55.87486017879597
Mean: 93.0, Standard Deviation: 55.76289088632332
Mean: 70.25, Standard Deviation: 40.43126883984721
Mean: 115.25, Standard Deviation: 60.30495419117736
Mean: 102.25, Standard Deviation: 50.3158772158451
Mean: 77.25, Standard Deviation: 31.72045869781835
Mean: 73.25, Standard Deviati

### 6. Calculating score

In [38]:
def calculate_score(answer_key, answer_student):
    """
    Compare the student's answers with the answer key and calculate the score.

    Args:
        answer_key (np.ndarray): The correct answer key with marked bubbles (1 for correct option, 0 for others).
        answer_student (np.ndarray): The student's marked answers with bubbles (1 for marked option, 0 for others).

    Returns:
        int: The total score of the student.
        list: A list indicating correctness for each question (1 for correct, 0 for incorrect).
    """
    total_score = 0
    result_per_question = []

    for key, student in zip(answer_key, answer_student):
        if np.array_equal(key, student):
            total_score += 1
            result_per_question.append(1)  # Correct answer
        else:
            result_per_question.append(0)  # Incorrect answer

    return total_score, result_per_question

start = time.time()
total_score, result_per_question = calculate_score(answer_key, answer_student)
end = time.time()
print("Calculating score time:", end-start)
# Print the final score and results
print(f"Total Score: {total_score}")
for idx, result in enumerate(result_per_question, start=1):
    status = "Correct" if result == 1 else "Incorrect"
    print(f"Question {idx}: {status}")


Calculating score time: 0.0010046958923339844
Total Score: 9
Question 1: Correct
Question 2: Correct
Question 3: Correct
Question 4: Incorrect
Question 5: Incorrect
Question 6: Incorrect
Question 7: Incorrect
Question 8: Incorrect
Question 9: Incorrect
Question 10: Correct
Question 11: Incorrect
Question 12: Correct
Question 13: Incorrect
Question 14: Incorrect
Question 15: Incorrect
Question 16: Correct
Question 17: Incorrect
Question 18: Incorrect
Question 19: Incorrect
Question 20: Incorrect
Question 21: Incorrect
Question 22: Correct
Question 23: Correct
Question 24: Incorrect
Question 25: Incorrect
Question 26: Incorrect
Question 27: Incorrect
Question 28: Incorrect
Question 29: Correct
Question 30: Incorrect
Question 31: Incorrect
Question 32: Incorrect
Question 33: Incorrect
Question 34: Incorrect
Question 35: Incorrect
Question 36: Incorrect
Question 37: Incorrect
Question 38: Incorrect
Question 39: Incorrect
Question 40: Incorrect


## extra verification step

### Group Circle by radius

this can be used to verify that the detected circles are answer circles. Now we are not using this. we will use this after we finish the core of the function.

In [None]:
import cv2
import numpy as np
from collections import defaultdict

def group_circles_by_radius(circles, img, tolerance_percentage=20):
    """
    Groups circles based on their radius with a given tolerance percentage of the y-coordinate.
    
    Args:
        circles (array): List of detected circles [(x, y, r)].
        img (numpy array): The original image.
        tolerance_percentage (float): Tolerance as a percentage of the y-coordinate (default is 20%).
        
    Returns:
        dict: Groups of circles based on their radius.
    """
    radius_groups = defaultdict(list)
    
    # Iterate over the detected circles and group them by radius
    for circle in circles:
        x, y, r = circle[0], circle[1], circle[2]

        if not radius_groups:
            # If no groups exist yet, start a new group with the first circle
            radius_groups[r].append((x, y, r))
        else:
            matched_group = None
            for existing_radius in radius_groups.keys():
                diff = abs(int(r) - int(existing_radius))
                
                # Calculate tolerance based on the radius
                tol = (tolerance_percentage / 100.0) * existing_radius

                if diff <= tol:
                    matched_group = existing_radius
                    break

            if matched_group is not None:
                radius_groups[matched_group].append((x, y, r))
            else:
                radius_groups[r].append((x, y, r))

    # Print the radius groups for debugging
    for radius, grouped_circles in radius_groups.items():
        print(f"Radius: {radius}, Circles: {grouped_circles}")
        
    # Visualize each group of circles in separate images
    for radius, grouped_circles in radius_groups.items():
        img_copy = img.copy()  # Create a copy of the original image to visualize each group
        
        for x, y, r in grouped_circles:
            cv2.circle(img_copy, (x, y), r, (0, 255, 0), 2)  # Draw outer circle
            cv2.circle(img_copy, (x, y), 2, (0, 0, 255), 3)  # Draw center

        # Display the grouped circles in a separate window
        cv2.imshow(f'Circles with radius ~ {radius}', img_copy)
        cv2.waitKey(0)
    
    return radius_groups



# if detected_circles is not None:
#     detected_circles = np.uint16(np.around(detected_circles))

#     # Call the function with the detected circles and image
#     group_circles_by_radius(detected_circles[0, :], img, tolerance_percentage=20)

cv2.destroyAllWindows()
