## OMR without Template Image


### 1. preprocessing

In [39]:
import cv2
import numpy as np


def preprocess_image(image_path, blur_ksize=(5, 5)):
    """
    Preprocess the input image by converting it to grayscale and applying Gaussian blur and convert it to binary image.
    
    Args:
        image_path (str): Path to the input image.
        blur_ksize (tuple): Kernel size for the Gaussian blur (default is (5, 5)).
        
    Returns:
        gray_blurred (numpy array): Blurred grayscale version of the image.
        original_img (numpy array): Original color image.
        gray (numpy array): Grayscale version of the image.
        binary_img (numpy array): Binary version of the image
    """
    # Read the image.
    original_img = cv2.imread(image_path, cv2.IMREAD_COLOR)

    if original_img is None:
        raise ValueError(f"Image at {image_path} not found.")
    
    # Convert to grayscale.
    gray = cv2.cvtColor(original_img, cv2.COLOR_BGR2GRAY)

    # Apply Gaussian blur.
    gray_blurred = cv2.GaussianBlur(gray, blur_ksize, 0)

    # Convert the original image to a binary image
    _, binary_img = cv2.threshold(gray_blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    return gray_blurred, original_img, gray, binary_img




### 

In [40]:
# Preprocess the images
gray_blurred_s, original_img_s, gray_s, binary_img_s = preprocess_image('./Test/5_S.jpg')
#gray_blurred_t, original_img_t, gray_t, binary_img_t = preprocess_image('./Test/7_T.jpg')
gray_blurred_k, original_img_k, gray_k, binary_img_k = preprocess_image('./Test/5_K.jpg')

questions = 50
options=5

### 2. circle detection

In [41]:
from collections import Counter
def detect_circles(img, gray_blurred, questions, options):
    """
    Detect circles in the preprocessed grayscale image using the Hough Circle Transform.
    
    Args:
        img (numpy array): Original image for visualization.
        gray_blurred (numpy array): Blurred grayscale image.
        num_questions (int): Number of questions in the OMR.
        num_options (int): Number of options per question.
        dp (float): Inverse ratio of the accumulator resolution to the image resolution (default is 1).
        min_dist (int): Minimum distance between the centers of detected circles (default is 1).
        param1 (float): First method-specific parameter (gradient threshold for Canny edge detection).
        param2 (float): Second method-specific parameter (accumulator threshold for circle detection).
        min_radius_range (tuple): Tuple for minimum and maximum radius values for circle detection.
    
    Returns:
        detected_circles (numpy array): Array of detected circles, if found.
    """
    
    for b in range(15,5,-1): 
        for a in range(5,35):  
            detected_circles = cv2.HoughCircles(gray_blurred, cv2.HOUGH_GRADIENT, 1, a, 
                            param1 = b,param2 = 3*b, minRadius = a//2, maxRadius = a) 
            if detected_circles is not None:
                detected_circles=np.uint16(np.around(detected_circles))
                print(f"circles:{detected_circles.shape[1]},a:{a},b:{b}")
                if detected_circles.shape[1]==questions*options:

                    radii_list = [pt[2] for pt in detected_circles[0, :]]
                    
                    # Find the most common radius
                    most_common_radius = Counter(radii_list).most_common(1)[0][0]
                    print(f"Most common radius: {most_common_radius}")



                    for pt in detected_circles[0, :]: 
                        x, y, r = pt[0], pt[1], pt[2] 

                        # Draw the circumference of the circle. 
                        cv2.circle(img, (x, y), r, (0, 255, 0), 3) 

                        # Draw a small circle (of radius 1) to show the center. 
                        cv2.circle(img, (x, y), 2, (0, 0, 255), 3) 
                    #cv2.imwrite(f'./testoutput/detected_circlesf7.jpg', img)
                    cv2.imshow("Detected Circle", img) 
                    cv2.waitKey(0) 
                    return detected_circles

    cv2.destroyAllWindows()
    return None



In [42]:
# Detect circles in the preprocessed image
detected_circles_s = detect_circles(
    original_img_s, gray_blurred_s, 
    questions, options
)

detected_circles_k = detect_circles(
    original_img_k, gray_blurred_k, 
    questions, options
)

circles:32,a:16,b:15
circles:145,a:17,b:15
circles:152,a:18,b:15
circles:165,a:19,b:15
circles:223,a:20,b:15
circles:235,a:21,b:15
circles:235,a:22,b:15
circles:238,a:23,b:15
circles:238,a:24,b:15
circles:240,a:25,b:15
circles:240,a:26,b:15
circles:238,a:27,b:15
circles:238,a:28,b:15
circles:239,a:29,b:15
circles:241,a:30,b:15
circles:242,a:31,b:15
circles:241,a:32,b:15
circles:243,a:33,b:15
circles:241,a:34,b:15
circles:1,a:15,b:14
circles:47,a:16,b:14
circles:162,a:17,b:14
circles:172,a:18,b:14
circles:180,a:19,b:14
circles:230,a:20,b:14
circles:242,a:21,b:14
circles:239,a:22,b:14
circles:241,a:23,b:14
circles:243,a:24,b:14
circles:245,a:25,b:14
circles:244,a:26,b:14
circles:243,a:27,b:14
circles:242,a:28,b:14
circles:244,a:29,b:14
circles:244,a:30,b:14
circles:245,a:31,b:14
circles:246,a:32,b:14
circles:247,a:33,b:14
circles:247,a:34,b:14
circles:1,a:15,b:13
circles:61,a:16,b:13
circles:173,a:17,b:13
circles:187,a:18,b:13
circles:197,a:19,b:13
circles:236,a:20,b:13
circles:246,a:21,

### 3.pixelcount

In [43]:
import cv2
import numpy as np

def count_black_and_white_pixels(img, circles):
    """
    Count black and white pixels in the detected circles.

    Parameters:
    - img: The binary image (where black indicates marked areas).
    - circles: List of detected circles in the format (x, y, r).

    Returns:
    - A list of tuples containing counts of (black_pixels, white_pixels) for each circle.
    """
    counts = []

    for (x, y, r) in circles:
        # Create a mask for the current circle
        mask = np.zeros_like(img, dtype=np.uint8)
        cv2.circle(mask, (x, y), r, (255), thickness=-1)  # Fill the circle in the mask

        # Apply the mask to get the pixel values within the circle
        masked_area = cv2.bitwise_and(img, mask)

        # Count black and white pixels
        black_pixels = np.sum(masked_area == 0)   # Count of black pixels (0)
        white_pixels = np.sum(masked_area == 255) # Count of white pixels (255)

        counts.append((x,y,r,black_pixels, white_pixels))

    return counts


In [44]:
circles_s = detected_circles_s[0, :]
circles_k = detected_circles_k[0, :]

# Count black and white pixels in the detected circles
pixel_counts_s = count_black_and_white_pixels(binary_img_s, circles_s)
#pixel_counts_t = count_black_and_white_pixels(binary_img_t, circles_t)
pixel_counts_k = count_black_and_white_pixels(binary_img_k, circles_k)

# Print the results
print("students answer sheet")
for idx, (x, y,r, black_count, white_count) in enumerate(pixel_counts_s, start=1):
    print(f"Circle {idx}: Coordinates = ({x}, {y}), Black Pixels = {black_count}, White Pixels = {white_count}")

# print("template")
# for idx, (x, y,r, black_count, white_count) in enumerate(pixel_counts_t, start=1):
#     print(f"Circle {idx}: Coordinates = ({x}, {y}), Black Pixels = {black_count}, White Pixels = {white_count}")

print("key")
for idx, (x, y,r, black_count, white_count) in enumerate(pixel_counts_k, start=1):
    print(f"Circle {idx}: Coordinates = ({x}, {y}), Black Pixels = {black_count}, White Pixels = {white_count}")



# Draw the circles on the binary image for visualization
for (x, y, r) in circles_s:
    cv2.circle(binary_img_s, (x, y), r, (127), 2)  # Draw circles in a different color (grey)
cv2.imshow("Detected Circles in Binary Image student", binary_img_s)
cv2.waitKey(0)


# for (x, y, r) in circles_t:
#     cv2.circle(binary_img_t, (x, y), r, (127), 2)  
# cv2.imshow("Detected Circles in Binary Image template", binary_img_t)
# cv2.waitKey(0)

for (x, y, r) in circles_k:
    cv2.circle(binary_img_k, (x, y), r, (127), 2)
cv2.imshow("Detected Circles in Binary Image key(scheme)", binary_img_k)
cv2.waitKey(0)




cv2.destroyAllWindows()


students answer sheet
Circle 1: Coordinates = (938, 1336), Black Pixels = 2061503, White Pixels = 585
Circle 2: Coordinates = (498, 1446), Black Pixels = 2061503, White Pixels = 585
Circle 3: Coordinates = (500, 1010), Black Pixels = 2061463, White Pixels = 625
Circle 4: Coordinates = (500, 1118), Black Pixels = 2061538, White Pixels = 550
Circle 5: Coordinates = (1042, 254), Black Pixels = 2061459, White Pixels = 629
Circle 6: Coordinates = (1110, 574), Black Pixels = 2061481, White Pixels = 607
Circle 7: Coordinates = (492, 374), Black Pixels = 2061411, White Pixels = 677
Circle 8: Coordinates = (440, 748), Black Pixels = 2061452, White Pixels = 636
Circle 9: Coordinates = (1052, 738), Black Pixels = 2061461, White Pixels = 627
Circle 10: Coordinates = (1054, 1116), Black Pixels = 2061466, White Pixels = 622
Circle 11: Coordinates = (864, 152), Black Pixels = 2061447, White Pixels = 641
Circle 12: Coordinates = (258, 1174), Black Pixels = 2061488, White Pixels = 600
Circle 13: Coordi

### 4. sorting the bubbles

In [45]:
import numpy as np
from sklearn.cluster import DBSCAN


# Function to group circles by rows using DBSCAN (y-coordinate clustering)
def group_by_row_dbscan(pixel_counts, eps=10):
    # Extract the y-coordinates of circles from the pixel_counts
    y_coords = np.array([[y] for (x, y, r, black_count, white_count) in pixel_counts])
    
    # Apply DBSCAN clustering based on y-coordinates
    clustering = DBSCAN(eps=eps, min_samples=1).fit(y_coords)
    
    # Get cluster labels
    labels = clustering.labels_

    # Group circles by the row labels
    rows = {}
    for label, circle_data in zip(labels, pixel_counts):
        if label not in rows:
            rows[label] = []
        rows[label].append(circle_data)

    sorted_rows = sorted(rows.values(), key=lambda row: np.mean([c[1] for c in row]))
    sorted_rows = [sorted(row, key=lambda c: c[0]) for row in sorted_rows]
    
    return sorted_rows


In [46]:
# Group the circles by rows using the group_by_row_dbscan function
grouped_rows_s = group_by_row_dbscan(pixel_counts_s, eps=20)

# Print the grouped results
for row_idx, row in enumerate(grouped_rows_s, start=1):
    print(f"Row {row_idx}:")
    for idx, (x, y, r, black_count, white_count) in enumerate(row, start=1):
        print(f"  Circle {idx}: Coordinates = ({x}, {y}), Black Pixels = {black_count}, White Pixels = {white_count}")

Row 1:
  Circle 1: Coordinates = (246, 152), Black Pixels = 2061335, White Pixels = 753
  Circle 2: Coordinates = (306, 152), Black Pixels = 2061431, White Pixels = 657
  Circle 3: Coordinates = (366, 154), Black Pixels = 2061394, White Pixels = 694
  Circle 4: Coordinates = (428, 156), Black Pixels = 2061442, White Pixels = 646
  Circle 5: Coordinates = (488, 156), Black Pixels = 2062050, White Pixels = 38
  Circle 6: Coordinates = (864, 152), Black Pixels = 2061447, White Pixels = 641
  Circle 7: Coordinates = (922, 152), Black Pixels = 2061425, White Pixels = 663
  Circle 8: Coordinates = (980, 150), Black Pixels = 2062026, White Pixels = 62
  Circle 9: Coordinates = (1040, 150), Black Pixels = 2061525, White Pixels = 563
  Circle 10: Coordinates = (1096, 148), Black Pixels = 2061467, White Pixels = 621
Row 2:
  Circle 1: Coordinates = (248, 210), Black Pixels = 2061393, White Pixels = 695
  Circle 2: Coordinates = (308, 212), Black Pixels = 2062062, White Pixels = 26
  Circle 3: Co

In [47]:
grouped_rows_k = group_by_row_dbscan(pixel_counts_k, eps=10)

# Print the grouped results
for row_idx, row in enumerate(grouped_rows_k, start=1):
    print(f"Row {row_idx}:")
    for idx, (x, y, r, black_count, white_count) in enumerate(row, start=1):
        print(f"  Circle {idx}: Coordinates = ({x}, {y}), Black Pixels = {black_count}, White Pixels = {white_count}")

Row 1:
  Circle 1: Coordinates = (238, 132), Black Pixels = 2042816, White Pixels = 1661
  Circle 2: Coordinates = (294, 136), Black Pixels = 2043335, White Pixels = 1142
  Circle 3: Coordinates = (352, 140), Black Pixels = 2043794, White Pixels = 683
  Circle 4: Coordinates = (408, 142), Black Pixels = 2043929, White Pixels = 548
  Circle 5: Coordinates = (464, 142), Black Pixels = 2043945, White Pixels = 532
  Circle 6: Coordinates = (820, 142), Black Pixels = 2044391, White Pixels = 86
  Circle 7: Coordinates = (876, 142), Black Pixels = 2043950, White Pixels = 527
  Circle 8: Coordinates = (932, 142), Black Pixels = 2043889, White Pixels = 588
  Circle 9: Coordinates = (986, 140), Black Pixels = 2044043, White Pixels = 434
  Circle 10: Coordinates = (1042, 142), Black Pixels = 2044035, White Pixels = 442
Row 2:
  Circle 1: Coordinates = (238, 190), Black Pixels = 2043853, White Pixels = 624
  Circle 2: Coordinates = (296, 190), Black Pixels = 2043949, White Pixels = 528
  Circle 3:

### 5. detecting marked and unmarked bubble

In [48]:


def detect_marked_and_unmarked_bubbles(subtracted_grouped_rows, num_options, deviation_threshold=50):
    """
    Detect marked and unmarked bubbles based on white pixel values and a deviation threshold.

    Args:
        subtracted_grouped_rows (list): List of grouped circles with subtracted pixel counts.
        num_options (int): The number of options per question.
        deviation_threshold (float): The threshold below which a question is considered unmarked.

    Returns:
        list: A 2D array where each element is:
              - 1 for the most deviated (marked) bubble.
              - 0 for unmarked bubbles.
    """
    marked_bubbles = []

    for row in subtracted_grouped_rows:
        for question_start in range(0, len(row), num_options):
            question_group = row[question_start:question_start + num_options]

            # Extract white pixel values for the current question
            white_pixel_values = [subtracted_white for (_, _, _, _, subtracted_white) in question_group]

            # Calculate the mean and standard deviation of the white pixel values
            mean_white = np.mean(white_pixel_values)
            std_dev_white = np.std(white_pixel_values)
            print(f"Mean: {mean_white}, Standard Deviation: {std_dev_white}")
            # If the standard deviation is less than the threshold, consider the question unmarked
            if std_dev_white < deviation_threshold:
                marked_bubbles.append([0] * num_options)  
            else:
                # Calculate absolute deviations from the mean
                deviations = [abs(white_value - mean_white) for white_value in white_pixel_values]

                # Create a 1D array for the question (options), initialized to zeros
                marked_question = np.zeros(num_options, dtype=int)

                # Find the index of the maximum deviation
                max_deviation_index = np.argmax(deviations)

                # Mark the option with the highest deviation as 1 (marked bubble)
                marked_question[max_deviation_index] = 1

                marked_bubbles.append(marked_question)

    return np.array(marked_bubbles)


  
answer_student = detect_marked_and_unmarked_bubbles(grouped_rows_s, options)

# Print the marked bubbles in a readable format
for row_idx, row in enumerate(answer_student, start=1):
    print(f"Question {row_idx}: Marked Bubble = {row.tolist()}")


Mean: 557.6, Standard Deviation: 262.47483688917686
Mean: 510.0, Standard Deviation: 226.45264405610283
Mean: 556.0, Standard Deviation: 265.7404748998541
Mean: 516.4, Standard Deviation: 200.47204293866017
Mean: 523.4, Standard Deviation: 232.13926854369123
Mean: 519.4, Standard Deviation: 193.75819982648477
Mean: 506.2, Standard Deviation: 221.31642505697585
Mean: 512.2, Standard Deviation: 231.25691341017247
Mean: 579.8, Standard Deviation: 253.3214558619147
Mean: 513.0, Standard Deviation: 245.9447092335999
Mean: 548.6, Standard Deviation: 221.09871098674455
Mean: 485.2, Standard Deviation: 240.9335178010731
Mean: 534.0, Standard Deviation: 199.09394767295163
Mean: 511.2, Standard Deviation: 245.1623135802075
Mean: 506.4, Standard Deviation: 263.9868178527102
Mean: 524.6, Standard Deviation: 233.81411420185907
Mean: 561.2, Standard Deviation: 297.8720530697702
Mean: 557.8, Standard Deviation: 193.72495967221158
Mean: 503.6, Standard Deviation: 248.02790165624512
Mean: 515.0, Standa

In [49]:
answer_key = detect_marked_and_unmarked_bubbles(grouped_rows_k, options)
for row_idx, row in enumerate(answer_key, start=1):
    print(f"Question {row_idx}: Marked Bubble = {row.tolist()}")

Mean: 913.2, Standard Deviation: 434.3341570726392
Mean: 415.4, Standard Deviation: 174.24304864183247
Mean: 522.2, Standard Deviation: 212.06829088762893
Mean: 469.2, Standard Deviation: 198.13167338918834
Mean: 605.2, Standard Deviation: 130.4996551719582
Mean: 415.2, Standard Deviation: 178.961895385582
Mean: 431.2, Standard Deviation: 207.29823925928557
Mean: 538.25, Standard Deviation: 71.13499490405549
Mean: 490.0, Standard Deviation: 203.9186112153572
Mean: 678.6, Standard Deviation: 253.83585247163174
Mean: 992.8, Standard Deviation: 563.1267708074266
Mean: 696.8, Standard Deviation: 298.1123278229198
Mean: 463.6, Standard Deviation: 215.02241743595016
Mean: 812.8, Standard Deviation: 529.270970297824
Mean: 486.2, Standard Deviation: 134.2466386916261
Mean: 672.8, Standard Deviation: 230.93583524433794
Mean: 440.6, Standard Deviation: 207.5115418476765
Mean: 526.4, Standard Deviation: 264.6330289287412
Mean: 663.2, Standard Deviation: 520.165896613763
Mean: 443.4, Standard Devi

### 6. calculate score

In [50]:
def calculate_score(answer_key, answer_student):
    """
    Compare the student's answers with the answer key and calculate the score.

    Args:
        answer_key (np.ndarray): The correct answer key with marked bubbles (1 for correct option, 0 for others).
        answer_student (np.ndarray): The student's marked answers with bubbles (1 for marked option, 0 for others).

    Returns:
        int: The total score of the student.
        list: A list indicating correctness for each question (1 for correct, 0 for incorrect).
    """
    total_score = 0
    result_per_question = []

    for key, student in zip(answer_key, answer_student):
        if np.array_equal(key, student):
            total_score += 1
            result_per_question.append(1)  # Correct answer
        else:
            result_per_question.append(0)  # Incorrect answer

    return total_score, result_per_question


total_score, result_per_question = calculate_score(answer_key, answer_student)

# Print the final score and results
print(f"Total Score: {total_score}")
for idx, result in enumerate(result_per_question, start=1):
    status = "Correct" if result == 1 else "Incorrect"
    print(f"Question {idx}: {status}")


Total Score: 11
Question 1: Incorrect
Question 2: Incorrect
Question 3: Incorrect
Question 4: Incorrect
Question 5: Correct
Question 6: Correct
Question 7: Incorrect
Question 8: Correct
Question 9: Incorrect
Question 10: Incorrect
Question 11: Incorrect
Question 12: Incorrect
Question 13: Correct
Question 14: Incorrect
Question 15: Incorrect
Question 16: Incorrect
Question 17: Incorrect
Question 18: Incorrect
Question 19: Incorrect
Question 20: Incorrect
Question 21: Incorrect
Question 22: Correct
Question 23: Incorrect
Question 24: Correct
Question 25: Incorrect
Question 26: Incorrect
Question 27: Incorrect
Question 28: Correct
Question 29: Incorrect
Question 30: Incorrect
Question 31: Incorrect
Question 32: Incorrect
Question 33: Incorrect
Question 34: Correct
Question 35: Incorrect
Question 36: Incorrect
Question 37: Incorrect
Question 38: Incorrect
Question 39: Incorrect
Question 40: Incorrect
Question 41: Incorrect
Question 42: Correct
Question 43: Incorrect
Question 44: Incorrect