<a href="https://colab.research.google.com/github/stevenbowler/EmployeeSurvey/blob/main/ParsePNG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Parse a single PNG of the Employee Survey to see if can read questions and answers correctly

In [None]:
!pip install opencv-python-headless numpy matplotlib
import cv2
import numpy as np
import matplotlib.pyplot as plt
from google.colab import files
import os

In [None]:
uploaded = files.upload()  # This opens a file picker—upload your two PNGs (e.g., page1.png, page2.png)
# Or drag-drop them into the Colab files panel on the left.

Step 3: The Parsing Script (Run After Upload)
This code:

Loads the image(s).
Grayscales and thresholds to find dark (filled) areas.
Defines rough ROI (regions of interest) for each checkbox based on typical form layout (adjust x,y,w,h coords by inspecting your image—use the preview below).
For each question's 4 boxes (Strongly Agree, Agree, Disagree, Strongly Disagree), sums dark pixels; if > threshold (e.g., 50% filled), it's selected.
Outputs a list of answers.

In [None]:
# Load images (replace with your filenames)
img1 = cv2.imread('Accounting_1_page001 (1).png')  # First page PNG
# img2 = cv2.imread('Accounting_2_page001.png')  # Second page PNG

# Function to detect filled checkbox in a region
# ─────── HYPER-SENSITIVE MARK DETECTOR ───────
def is_checkbox_filled(img, x, y, w, h,
                       dark_threshold=240, min_pixels=8):
    roi = img[y:y+h, x:x+w]
    gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    dark_pixels = np.sum(gray < dark_threshold)
    return dark_pixels >= min_pixels

# # Ultra-sensitive (catches pencil dots)
# is_checkbox_filled(..., dark_threshold=245, min_pixels=5)

# # Normal (still catches everything you saw by eye)
# is_checkbox_filled(..., dark_threshold=240, min_pixels=8)

# # Paranoid (ignores dust)
# is_checkbox_filled(..., dark_threshold=220, min_pixels=15)

# ─────────────────────────────────────────────

# Preview image to get coords (run this first to zoom in and note box positions)
def preview_image(img, title='Preview'):
    plt.figure(figsize=(12, 8))
    plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    plt.title(title)
    plt.axis('off')
    plt.show()

preview_image(img1, 'Page 1 Preview')
# preview_image(img2, 'Page 2 Preview')

# Define ROIs for checkboxes: [x, y, width, height] for each column per question.
# Columns: 0=Strongly Agree, 1=Agree, 2=Disagree, 3=Strongly Disagree
# You'll need to adjust these based on your preview (e.g., use plt to measure pixels).
# Example for Page 1 (Q1-12 or so; extend for full):
rois_page1 = [
    # Q1: 4 boxes
    [[2174, 856, 47, 85], [2234, 856, 47, 85], [2301, 856, 47, 85], [2371, 856, 47, 85]],
    # Q2: 4 boxes
    [[2174, 956, 47, 85], [2234, 956, 47, 85], [2301, 956, 47, 85], [2371, 956, 47, 85]],
    # Q3: 4 boxes
    [[2174, 1108, 47, 36], [2234, 1108, 47, 36], [2301, 1108, 47, 36], [2371, 1108, 47, 36]],
    # Q4: 4 boxes
    [[2174, 1168, 47, 36], [2234, 1168, 47, 36], [2301, 1168, 47, 36], [2371, 1168, 47, 36]],
    # Q5: 4 boxes
    [[2174, 1168, 47, 36], [2234, 1168, 47, 36], [2301, 1168, 47, 36], [2371, 1168, 47, 36]],
    # Q6: 4 boxes
   [[2174, 1168, 47, 36], [2234, 1168, 47, 36], [2301, 1168, 47, 36], [2371, 1168, 47, 36]],
    # Q7: 4 boxes
    [[2174, 1168, 47, 36], [2234, 1168, 47, 36], [2301, 1168, 47, 36], [2371, 1168, 47, 36]],
    # Q8: 4 boxes
    [[2174, 1168, 47, 36], [2234, 1168, 47, 36], [2301, 1168, 47, 36], [2371, 1168, 47, 36]],
    # Q9: 4 boxes
    [[2174, 1168, 47, 36], [2234, 1168, 47, 36], [2301, 1168, 47, 36], [2371, 1168, 47, 36]],
    # Q10: 4 boxes
   [[2174, 1168, 47, 36], [2234, 1168, 47, 36], [2301, 1168, 47, 36], [2371, 1168, 47, 36]],
    # Q11: 4 boxes
   [[2174, 1168, 47, 36], [2234, 1168, 47, 36], [2301, 1168, 47, 36], [2371, 1168, 47, 36]],
    # Q12: 4 boxes
   [[2174, 1168, 47, 36], [2234, 1168, 47, 36], [2301, 1168, 47, 36], [2371, 1168, 47, 36]],
    # Q13: 4 boxes
    [[2174, 1168, 47, 36], [2234, 1168, 47, 36], [2301, 1168, 47, 36], [2371, 1168, 47, 36]],
    # Q14: 4 boxes
    [[2174, 1168, 47, 36], [2234, 1168, 47, 36], [2301, 1168, 47, 36], [2371, 1168, 47, 36]],
    # Q15: 4 boxes
    [[2174, 1168, 47, 36], [2234, 1168, 47, 36], [2301, 1168, 47, 36], [2371, 1168, 47, 36]],
    # Q16: 4 boxes
    [[2174, 1168, 47, 36], [2234, 1168, 47, 36], [2301, 1168, 47, 36], [2371, 1168, 47, 36]],
    # Q17: 4 boxes
    [[2174, 1168, 47, 36], [2234, 1168, 47, 36], [2301, 1168, 47, 36], [2371, 1168, 47, 36]],
    # Q18: 4 boxes
    [[2174, 1168, 47, 85], [2234, 1168, 47, 85], [2301, 1168, 47, 85], [2371, 1168, 47, 85]],
    # Q19: 4 boxes
    [[2174, 1168, 47, 36], [2234, 1168, 47, 36], [2301, 1168, 47, 36], [2371, 1168, 47, 36]],
    # Q20: 4 boxes
    [[2174, 1168, 47, 36], [2234, 1168, 47, 36], [2301, 1168, 47, 36], [2371, 1168, 47, 36]],
    # Q21: 4 boxes
    [[2174, 1168, 47, 36], [2234, 1168, 47, 36], [2301, 1168, 47, 36], [2371, 1168, 47, 36]],
    # Q22: 4 boxes
    [[2174, 1168, 47, 36], [2234, 1168, 47, 36], [2301, 1168, 47, 36], [2371, 1168, 47, 36]],
    # Q23: 4 boxes
    [[2174, 1168, 47, 85], [2234, 1168, 47, 85], [2301, 1168, 47, 85], [2371, 1168, 47, 85]],
    # Q2?: ...
    # ... (add for all 23; Q13+ on page2)
]

# For Page 2, similar structure starting from Q13 or whatever the split is.

# Detect answers
options = ['Strongly Agree', 'Agree', 'Disagree', 'Strongly Disagree']
answers = []

for q_num, q_rois in enumerate(rois_page1, 1):  # Adjust for page2
    selected = -1
    for col, roi in enumerate(q_rois):
        if is_checkbox_filled(img1, *roi, dark_threshold=220, min_pixels=15):  # Use img2 for page2
            selected = col
            break
    answers.append(options[selected] if selected != -1 else 'No selection')

# Output
print("Answers:")
for i, ans in enumerate(answers, 1):
    print(f"Q{i}: {ans}")