In [None]:
import ollama
from pydantic import BaseModel
import time
import os

class NumberClueOrientation(BaseModel):
    number:      int
    clue:        str
    orientation: str

class CrosswordPuzzleEntries(BaseModel):
    items: list[NumberClueOrientation]

model    = 'gemma3:27b' # 'gemma3:12b' 'llama3.2-vision:11b' #
client   = ollama.Client()
_prompt_ = """Extract the crossword entries."""

_lu_           = {}
_process_time_ = {}

_screenshot_dir_ = '../../../data/crossword_puzzle_screenshots/'
_files_ = os.listdir(_screenshot_dir_)

for _file_ in _files_:
    break
    screenshots = []
    if _file_.endswith('.jpeg'): 
        screenshots.append(os.path.join(_screenshot_dir_, _file_))
        print(screenshots)
        t0 = time.time()
        response = client.chat(model=model, messages=[
            {
                'role':    'user',
                'content': _prompt_,
                'images':  screenshots,
            },],
            format=CrosswordPuzzleEntries.model_json_schema(),
            options={'num_ctx': 8192+2048}
        )
        t1 = time.time()
        _items_ = CrosswordPuzzleEntries.model_validate_json(response['message']['content'])
        _lu_[_file_], _process_time_[_file_] = _items_, t1-t0

In [None]:
#
# Group By number and orientation -- and into a set to see where there are duplicates (or discrepancies)
#
entry_to_clues = {}
entry_to_files = {}
for x in _lu_:
    for y in _lu_[x].items:
        entry = (y.number, y.orientation)
        if entry not in entry_to_clues: 
            entry_to_clues[entry] = set()
            entry_to_files[entry] = set()
        entry_to_clues[entry].add(y.clue)
        entry_to_files[entry].add(x)

In [None]:
#
# Just dump this to a file and fix by hand
#
_tuples_ = list(entry_to_clues.keys())
#print('{')
#for x in sorted(_tuples_, key=lambda x: (x[1], x[0])): print(x, ':', entry_to_clues[x], ',')
#print('}')
for _file_ in _files_:
    if _file_.endswith('_entries.txt') and _file_.startswith('.') == False:
        with open(os.path.join(_screenshot_dir_, _file_), 'rt') as f:
            crossword_entries = eval(f.read())

In [None]:
#
# No attempts with the smaller models was able to yield geometric results
#
_prompt_ = """How many rows are in the crossword puzzle?"""
for _file_ in _files_:
    screenshots = []
    if _file_.endswith('.jpeg'): 
        screenshots.append(os.path.join(_screenshot_dir_, _file_))
        print(screenshots)
        t0 = time.time()
        response = client.chat(model=model, messages=[
            {
                'role':    'user',
                'content': _prompt_,
                'images':  screenshots,
            },],
            options={'num_ctx': 1024}
        )
        t1 = time.time()
        print(response['message']['content'])
        break

In [None]:
from PIL import Image

In [None]:
Image.open(os.path.join(_screenshot_dir_, _file_))

In [None]:
#
# Try to find the lines / but this produces a lot of noise...
# ... probably could minimize the bad lines via the angle test below
# ... because we already know that the screenshot had a grid
#
from math import pi
import cv2
import numpy as np
# Load the image
image = cv2.imread(os.path.join(_screenshot_dir_, _file_), cv2.IMREAD_GRAYSCALE)
# Apply Canny edge detection
edges = cv2.Canny(image, 200, 220)
# Apply Hough Line Transform
lines = cv2.HoughLines(edges, 1, np.pi/180, 200)
# Draw lines on the original image
if lines is not None:
    for rho, theta in lines[:, 0]:
        # if abs(theta) > 0.005 and abs(pi + theta) > 0.01: continue
        if abs(pi + theta) > 0.1: continue
        a = np.cos(theta)
        b = np.sin(theta)
        x0 = a * rho
        y0 = b * rho
        x1 = int(x0 + 1000*(-b))
        y1 = int(y0 + 1000*(a))
        x2 = int(x0 - 1000*(-b))
        y2 = int(y0 - 1000*(a))
        cv2.line(image, (x1, y1), (x2, y2), (0, 0, 255), 2)

# Display the result
#cv2.imshow('Hough Lines', image)
#cv2.waitKey(0)
#cv2.destroyAllWindows()
Image.fromarray(image.astype(np.uint8))