<a href="https://colab.research.google.com/github/stevenbowler/Basic-Portfolio/blob/master/GenerateLocatorsAndROIs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

On any given PDF, output usable PNG, with ROI coords

1. Add the 4 black witness crosses (✕) to the corners of the blank PDF (with margins to avoid cropping).
2. Outline all 92 answer boxes (23 questions × 4 options) in red on a copy.
3. Generate a high-res (4K-ish) PNG of the outlined form for visual confirmation.
4. Output a CSV file with the ROI coordinates (X, Y, Width, Height in points; convert to pixels based on your scan DPI, e.g., 72 pt = 1 inch).

In [7]:
!pip install PyMuPDF



In [8]:
# Quick Setup (1 min)

# Install the library: Open terminal/Command Prompt and run pip install pymupdf (it's free and lightweight).
# Save the script below as process_survey.py.
# Place your "Employee Survey v4-2.pdf" in the same folder.
# Run python process_survey.py.

import fitz  # pymupdf
import csv
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')
target_folder = "/content/drive/MyDrive/ColabStuff/"
# Function to add a cross mark
def add_cross(page, x, y):
    page.draw_line(fitz.Point(x - 10, y - 10), fitz.Point(x + 10, y + 10), color=(0, 0, 0), width=2)
    page.draw_line(fitz.Point(x - 10, y + 10), fitz.Point(x + 10, y - 10), color=(0, 0, 0), width=2)

# Open the blank PDF
doc = fitz.open(target_folder +"Employee Survey v4-3 Spanish.pdf")
page = doc[0]
rect = page.rect
margin = 30  # Safe margin to avoid scanner cropping

# Add 4 crosses to corners
add_cross(page, margin, margin)  # Top-left
add_cross(page, rect.width - margin, margin)  # Top-right
add_cross(page, margin, rect.height - margin)  # Bottom-left
add_cross(page, rect.width - margin, rect.height - margin)  # Bottom-right

# Save the PDF with crosses (print this version for employees)
doc.save(target_folder +"blank_with_crosses.pdf")

# Now extract table grid from drawings (lines in the table)
drawings = page.get_drawings()
horizontal_lines = []
vertical_lines = []
for d in drawings:
    for item in d["items"]:
        if item[0] == "l":  # Line item
            p1, p2 = item[1], item[2]
            if abs(p1.y - p2.y) < 1:  # Horizontal line
                x1, x2 = min(p1.x, p2.x), max(p1.x, p2.x)
                y = p1.y
                horizontal_lines.append((x1, x2, y))
            elif abs(p1.x - p2.x) < 1:  # Vertical line
                x = p1.x
                y1, y2 = min(p1.y, p2.y), max(p1.y, p2.y)
                vertical_lines.append((x, y1, y2))

# Unique sorted coordinates for grid
x_coords = sorted(set([l[0] for l in vertical_lines]))
y_coords = sorted(set([l[2] for l in horizontal_lines]))

# Find ROIs for each question's 4 boxes
question_rois = []
for q in range(1, 24):
    search_str = f"{q}."
    rects = page.search_for(search_str)
    if rects:
        q_rect = rects[0]
        # Find matching row in y_coords
        for j in range(len(y_coords) - 1):
            row_top = y_coords[j]
            row_bottom = y_coords[j + 1]
            if row_top <= q_rect.y0 and q_rect.y1 <= row_bottom:
                break
        else:
            continue
        # Last 4 columns are the answer boxes
        num_cols = len(x_coords) - 1
        start_col = num_cols - 4
        rois = []
        for c in range(start_col, num_cols):
            left = x_coords[c]
            right = x_coords[c + 1]
            rois.append((left, row_top, right, row_bottom))
        question_rois.append(rois)

# Output CSV with ROIs
options = ["Strongly Agree", "Agree", "Disagree", "Strongly Disagree"]
with open(target_folder +"roi_coordinates.csv", "w", newline="") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["Question", "Option", "X", "Y", "Width", "Height"])
    for q, rois in enumerate(question_rois, 1):
        for i, r in enumerate(rois):
            x = r[0]
            y = r[1]
            w = r[2] - r[0]
            h = r[3] - r[1]
            writer.writerow([q, options[i], x, y, w, h])

# Draw red outlines on a copy for visual
doc_outline = fitz.open(target_folder +"Employee Survey v4-3 Spanish.pdf")
page_outline = doc_outline[0]
for rois in question_rois:
    for r in rois:
        page_outline.draw_rect(fitz.Rect(r), color=(1, 0, 0), width=1.5)

# Save outlined PDF and high-res PNG
pix = page_outline.get_pixmap(dpi=400)  # ~3400x4400 for US letter (4K-ish)
pix.save(target_folder +"/outlined.png")

print("Done! Files generated: blank_with_crosses.pdf (print this), roi_coordinates.csv, blank_with_outlines.pdf, outlined.png")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Done! Files generated: blank_with_crosses.pdf (print this), roi_coordinates.csv, blank_with_outlines.pdf, outlined.png


In [None]:
variable_string = "Hello"
literal_string = ", World!"
concatenated_string = variable_string + literal_string
print(concatenated_string)