In [68]:
import cv2
import numpy as np
import easyocr
import puz
import os
from itertools import product
import matplotlib.pyplot as plt

image_path = input("Enter the path to the crossword image: ").strip()
# === CONFIGURATION ===
CELL_THRESH = 150       # threshold to decide if cell is white vs black

# Paths
# image_path = "test.jpeg"  # update with your image path
output_puz = "converted_crossword.puz"

# === STEP 1: Load and Preprocess Image ===
image = cv2.imread(image_path)
if image is None:
    raise ValueError("Image not found. Check your image_path.")

gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5, 5), 0)
thresh = cv2.adaptiveThreshold(
    blur, 255,
    cv2.ADAPTIVE_THRESH_MEAN_C,
    cv2.THRESH_BINARY_INV,
    15, 8
)

# === STEP 2: Detect the Grid ===
# Find contours and assume the largest contour is the grid.
contours, _ = cv2.findContours(
    thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
)
if not contours:
    raise ValueError("No contours found!")

max_area = 0
best_cnt = None
for cnt in contours:
    area = cv2.contourArea(cnt)
    if area > max_area:
        max_area = area
        best_cnt = cnt

x, y, w, h = cv2.boundingRect(best_cnt)
grid_img = gray[y:y+h, x:x+w]

# Optional debugging: Save or display extracted grid
# cv2.imwrite("extracted_grid.jpeg", grid_img)
# plt.imshow(grid_img, cmap='gray')
# plt.title("Extracted Grid")
# plt.show()

# === STEP 2.5: Determine GRID_SIZE and CELL_SIZE Dynamically by Detecting Horizontal Lines ===
edges = cv2.Canny(grid_img, 50, 150, apertureSize=3)
lines = cv2.HoughLines(edges, 1, np.pi / 180, threshold=200)
if lines is None:
    raise ValueError("No lines detected in the grid image.")

# Filter horizontal lines (approximately 90 degrees)
horizontal_lines = []
for line in lines:
    rho, theta = line[0]
    if abs(theta - np.pi / 2) < 0.1:  # horizontal (near 90 degrees)
        horizontal_lines.append(rho)

# Remove duplicates by rounding and merging very close lines
horizontal_lines = sorted(np.unique(np.round(horizontal_lines)))
merged_lines = []
line_spacing_threshold = 10  # pixels
for rho in horizontal_lines:
    if not merged_lines or abs(rho - merged_lines[-1]) > line_spacing_threshold:
        merged_lines.append(rho)

# GRID_SIZE: number of cells is one less than the number of detected horizontal grid lines
GRID_SIZE = len(merged_lines) - 1
if GRID_SIZE <= 0:
    raise ValueError("Invalid GRID_SIZE detected.")

# Calculate CELL_SIZE as the average difference between adjacent horizontal grid lines
CELL_SIZE = int(np.mean(np.diff(merged_lines)))

print(f"Detected GRID_SIZE: {GRID_SIZE}")
print(f"Detected CELL_SIZE: {CELL_SIZE}")

# Resize grid image to a standard size based on detected GRID_SIZE and CELL_SIZE
resized_grid = cv2.resize(
    grid_img, (GRID_SIZE * CELL_SIZE, GRID_SIZE * CELL_SIZE)
)

# === STEP 3: Split Grid into Cells and Classify Cells ===
grid_cells = []
for r in range(GRID_SIZE):
    row = []
    for c in range(GRID_SIZE):
        cell = resized_grid[r * CELL_SIZE:(r + 1) * CELL_SIZE, c * CELL_SIZE:(c + 1) * CELL_SIZE]
        mean_val = np.mean(cell)
        cell_is_white = mean_val > CELL_THRESH
        row.append(cell_is_white)
    grid_cells.append(row)
grid_cells = np.array(grid_cells)

# === STEP 4: Number the Grid ===
clue_numbers = np.full((GRID_SIZE, GRID_SIZE), -1, dtype=int)  # Initialize all cells to -1 (black tiles)

current_number = 1
for i, j in product(range(GRID_SIZE), range(GRID_SIZE)):
    if not grid_cells[i, j]:
        continue  # Skip black cells; already set to -1
    left_black = (j == 0) or (not grid_cells[i, j - 1])
    top_black = (i == 0) or (not grid_cells[i - 1, j])
    if left_black or top_black:
        clue_numbers[i, j] = current_number
        current_number += 1
    else:
        clue_numbers[i, j] = 0  # White cell, but no clue number


Detected GRID_SIZE: 15
Detected CELL_SIZE: 37


In [72]:
# === STEP 5: Create .puz File ===
# For this step, just fill with placeholder characters, as clues are not included in this conversion.
solution = ""
for i in range(GRID_SIZE):
    for j in range(GRID_SIZE):
        solution += "-" if grid_cells[i, j] else "."

puzzle = puz.Puzzle()
puzzle.width = GRID_SIZE
puzzle.height = GRID_SIZE
puzzle.fill = solution
puzzle.solution = solution
puzzle.clues = []  # No clues included as per request
puzzle.save(output_puz)
print(f".puz file saved as {output_puz}")


# this is my manually created .puz file. How to do below things
# Assuming clue_numbers is the 2D numpy array containing the matrix
for row in clue_numbers:
    # Format each cell to have a width of 4 (including spaces for alignment)
    print("".join(f"{cell:4}" for cell in row))

.puz file saved as converted_crossword.puz
   1   2   3  -1   4   5   6   7   8   9  -1  10  11  12  13
  14   0   0  -1  15   0   0   0   0   0  -1  16   0   0   0
  17   0   0  -1  18   0   0   0   0   0  -1  19   0   0   0
  20   0   0  21   0   0  -1  22   0   0  23   0   0   0   0
  24   0   0   0   0  -1  25   0   0   0   0   0   0   0   0
  -1  26   0   0   0  -1  27   0   0  -1  28   0   0   0   0
  -1  -1  29   0   0  30   0   0  -1  31   0   0  -1  -1  -1
  -1  -1  -1  32   0   0   0   0  33   0   0   0  34  -1  -1
  35  36  37  -1  38   0   0   0   0   0   0  -1  39  40  41
  42   0   0  43  -1  44   0   0   0  -1  -1  45   0   0   0
  46   0   0   0  47   0   0  -1  -1  48  49   0   0   0   0
  50   0   0   0   0   0  -1  51  52   0   0   0   0   0   0
  53   0   0   0   0   0  -1  54   0   0   0   0   0   0   0
  55   0   0   0   0   0  -1  56   0   0  -1  57   0   0   0
  -1  58   0   0   0   0  -1  59   0   0  -1  -1  60   0   0


In [37]:
from utils.extract_clues import extract_clues
import json
try:
    # Extract the clues as JSON
    clues_json = extract_clues(image_path)
    
    # Print the JSON result
    print("Extracted Clues:")
    print(json.dumps(clues_json, indent=4))
except Exception as e:
    print(f"An error occurred: {e}")

Extracted Clues:
"```json\n{\n  \"across\": {\n    \"1\": \"Do a chore that might involve maneuvering around lawn ornaments\",\n    \"4\": \"Brings down a notch\",\n    \"10\": \"London weather phenomenon\",\n    \"14\": \"Yoko in the collage 'Some Living American Women Artists'\",\n    \"15\": \"'I still can't find it'\",\n    \"16\": \"'Behold my creation!'\",\n    \"17\": \"'I'm onto your scheme!'\",\n    \"18\": \"Almost\",\n    \"19\": \"'Popstar: Never Stop Never Stopping' star Samberg\",\n    \"20\": \"It's easy to clear\",\n    \"22\": \"Stints in the Vatican\",\n    \"24\": \"Hard to get close to\",\n    \"25\": \"Paper bag?\",\n    \"26\": \"Automatic ___ (entrance that avoids the push/pull dilemma)\",\n    \"27\": \"Pocket contents\",\n    \"28\": \"Follow-up to 'F9'\",\n    \"29\": \"Supersonic ___ (velocities above Mach 1)\",\n    \"31\": \"Large, flat muscle\",\n    \"32\": \"Hypnotic ___ (statement that a person is unusually receptive to)\",\n    \"35\": \"Question texte

In [None]:
{
  "grid": [[1, 0, 2], [0, 3, 0], [4, 0, 5]],
  "across_clues": {
    "1": {"clue": "Opposite of out", "length": 3},
    "4": {"clue": "Not odd", "length": 3}
  },
  "down_clues": {
    "1": {"clue": "Opposite of cold", "length": 3},
    "2": {"clue": "Drinkable water", "length": 3}
  }
}


In [54]:
def print_clues(clues, title):
    """
    Prints the clues with their numbers.

    Args:
        clues (dict): A dictionary of clues where keys are clue numbers and values are the clues.
        title (str): The title to display above the clues (e.g., "Across Clues" or "Down Clues").
    """
    print(f"{title}:")
    for number, clue in clues.items():
        print(f"{number}: {clue}")
    print()  # Add a blank line for better readability


# Accessing and printing Across Clues
across_clues = clues_json["across"]
print_clues(across_clues, "Across Clues")

# Accessing and printing Down Clues
down_clues = clues_json["down"]
print_clues(down_clues, "Down Clues")

TypeError: string indices must be integers, not 'str'

In [67]:
# print(type(clues_json))
print(clues_json)
across_clues = clues_json["across"]
# print_clues(across_clues, "Across Clues")
clue_json = json.loads(clues_json)
# print(type(clue_json))
# print(json.dumps(clue_json, indent=4)) 

```json
{
  "across": {
    "1": "Do a chore that might involve maneuvering around lawn ornaments",
    "4": "Brings down a notch",
    "10": "London weather phenomenon",
    "14": "Yoko in the collage 'Some Living American Women Artists'",
    "15": "'I still can't find it'",
    "16": "'Behold my creation!'",
    "17": "'I'm onto your scheme!'",
    "18": "Almost",
    "19": "'Popstar: Never Stop Never Stopping' star Samberg",
    "20": "It's easy to clear",
    "22": "Stints in the Vatican",
    "24": "Hard to get close to",
    "25": "Paper bag?",
    "26": "Automatic ___ (entrance that avoids the push/pull dilemma)",
    "27": "Pocket contents",
    "28": "Follow-up to 'F9'",
    "29": "Supersonic ___ (velocities above Mach 1)",
    "31": "Large, flat muscle",
    "32": "Hypnotic ___ (statement that a person is unusually receptive to)",
    "35": "Question texted to a late friend",
    "38": "'Funky ___' (widely sampled James Brown song)",
    "39": "Org. concerned with pesticides

TypeError: string indices must be integers, not 'str'

In [38]:
solution = ""
for i in range(GRID_SIZE):
    for j in range(GRID_SIZE):
        solution += "-" if grid_cells[i, j] else "."

fill = "." * len(solution)  # User grid is blank


In [51]:
puzzle = puz.Puzzle()
puzzle.width = GRID_SIZE
puzzle.height = GRID_SIZE
puzzle.solution = solution
puzzle.fill = fill

numbering = puzzle.clue_numbering()

# Calculate the total number of Across and Down clues
total_clues = len(numbering["across"]) + len(numbering["down"])

print(f"Total number of clues: {total_clues}")


TypeError: 'DefaultClueNumbering' object is not subscriptable

In [44]:
ordered_clues = []

# Across clues
for entry in numbering.across:
    clue_num = str(entry['num'])
    clue_text = clues_json['across'].get(clue_num, '???')
    print(f"Clue {clue_num}: {clue_text}")
    clue_text = clue_text.rsplit(",", 1)[0].strip()  # Remove ", 5" if included
    ordered_clues.append(clue_text)

# Down clues
for entry in numbering.down:
    clue_num = str(entry['num'])
    clue_text = clues_json['down'].get(clue_num, '???')
    clue_text = clue_text.rsplit(",", 1)[0].strip()
    ordered_clues.append(clue_text)


In [42]:
import puz

puzzle = puz.Puzzle()
puzzle.width = GRID_SIZE
puzzle.height = GRID_SIZE
puzzle.solution = solution
puzzle.fill = fill
puzzle.clues = ordered_clues
puzzle.title = "Auto Generated Crossword"
puzzle.author = "CrosswordBot"
puzzle.notes = "Generated from image using OCR"

puzzle.save("final_with_clues.puz")
print("✅ .puz file saved as: final_with_clues.puz")


✅ .puz file saved as: final_with_clues.puz


In [43]:
import puz

puzzle = puz.read("final_with_clues.puz")
width, height = puzzle.width, puzzle.height
solution = puzzle.solution

grid = [solution[i * width:(i + 1) * width] for i in range(height)]

print("\n📦 Crossword Grid:")
for row in grid:
    print(" ".join(cell if cell != '.' else '█' for cell in row))
numbering = puzzle.clue_numbering()

print("\n🧩 Across Clues:")
for clue in numbering.across:
    print(f"{clue['num']}. {clue['clue']}")

print("\n🔻 Down Clues:")
for clue in numbering.down:
    print(f"{clue['num']}. {clue['clue']}")



📦 Crossword Grid:
- - - █ - - - - - - █ - - - -
- - - █ - - - - - - █ - - - -
- - - █ - - - - - - █ - - - -
- - - - - - █ - - - - - - - -
- - - - - █ - - - - - - - - -
█ - - - - █ - - - █ - - - - -
█ █ - - - - - - █ - - - █ █ █
█ █ █ - - - - - - - - - - █ █
- - - █ - - - - - - - █ - - -
- - - - █ - - - - █ █ - - - -
- - - - - - - █ █ - - - - - -
- - - - - - █ - - - - - - - -
- - - - - - █ - - - - - - - -
- - - - - - █ - - - █ - - - -
█ - - - - - █ - - - █ █ - - -

🧩 Across Clues:

🔻 Down Clues:


In [None]:

# === STEP 5: Create .puz File ===
# For this step, just fill with placeholder characters, as clues are not included in this conversion.
solution = ""
for i in range(GRID_SIZE):
    for j in range(GRID_SIZE):
        solution += "-" if grid_cells[i, j] else "."

puzzle = puz.Puzzle()
puzzle.width = GRID_SIZE
puzzle.height = GRID_SIZE
puzzle.fill = solution
puzzle.solution = solution
puzzle.clues = []  # No clues included as per request
puzzle.save(output_puz)
print(f".puz file saved as {output_puz}")


# Assuming `clue_numbers` is the 2D numpy array containing the matrix
for row in clue_numbers:
    # Format each cell to have a width of 4 (including spaces for alignment)
    print("".join(f"{cell:4}" for cell in row))

In [27]:
import puz

# Load the puzzle file
puzzle = puz.read("041425 - 3 through 5, too.puz")

# Print metadata
print("Title:", puzzle.title)
print("Author:", puzzle.author)
print("Copyright:", puzzle.copyright)

# Print dimensions
print("Size:", puzzle.width, "x", puzzle.height)

# Print notes (if any)
print("Notes:", puzzle.notes or "None")

# Print all clues
print("\nClues:")
for i, clue in enumerate(puzzle.clues, start=1):
    print(f"{i}. {clue}")

# Print the solution grid (row by row)
print("\nSolution Grid:")
for row in range(puzzle.height):
    start = row * puzzle.width
    end = start + puzzle.width
    print(puzzle.solution[start:end])


Title: 3 Through 5, Too
Author: Will Nediger
Copyright: 
Size: 15 x 15
Notes: None

Clues:
1. Do a chore that might involve maneuvering around lawn ornaments
2. Dough
3. Waiting for an agent
4. Drinks made with vodka, peach schnapps, and cranberry juice
5. Brings down a notch
6. Psychoanalyst who feuded with Melanie Klein
7. Dutch colonist
8. French phrase on a menu
9. "You choose"
10. Choux pastry, filled with cream and iced
11. Company that lost out to Zoom in 2020
12. London weather phenomenon
13. Choppy, to Chopin
14. Crazes
15. Least conventional
16. Main attraction of the films "L.A. Plays Itself" and "Boys in the Sand"
17. Yoko in the collage "Some Living American Women Artists"
18. "I still can't find it"
19. "Behold my creation!"
20. "I'm onto your scheme!"
21. Almost
22. "Popstar: Never Stop Never Stopping" star Samberg
23. It's easy to clear
24. Playfully hits on the snoot
25. Stints in the Vatican
26. Secret relationship
27. Hard to get close to
28. Paper bag?
29. Character

In [None]:
import puz

p = puz.read("041425 - 3 through 5, too.puz")

# Get structured clue data
clue_data = p.clue_numbering()

# Print across clues
print("Across:\n")
for entry in clue_data.across:
    print(f"{entry['num']}. {entry['clue']}")

# Print down clues
print("\nDown:\n")
for entry in clue_data.down:
    print(f"{entry['num']}. {entry['clue']}")


IndexError: list index out of range

In [69]:
import puz

puzzle = puz.read("converted_crossword.puz")
width = puzzle.width
height = puzzle.height
grid = puzzle.solution  # This is a flat string, row-major

grid_2d = [list(grid[i * width:(i + 1) * width]) for i in range(height)]


# Start with -1 for black squares
clue_grid = np.full((height, width), fill_value=-1, dtype=int)

# Set white cell placeholders
for r in range(height):
    for c in range(width):
        if grid_2d[r][c] != ".":
            clue_grid[r][c] = 0  # White square, no number yet

# Set clue numbers
numbered = puzzle.clue_numbering()
for entry in numbered.across + numbered.down:
    clue_grid[entry['y'], entry['x']] = entry['num']

# Print it
for row in clue_grid:
    print("".join(f"{num if num > 0 else ('██' if num == -1 else '  '):>4}" for num in row))


IndexError: list index out of range

# Finding Length of Each Clue