In [3]:
!pip install easyocr numpy opencv-python matplotlib puzpy openai

Collecting easyocr
  Downloading easyocr-1.7.2-py3-none-any.whl.metadata (10 kB)
Collecting puzpy
  Downloading puzpy-0.2.6-py2.py3-none-any.whl.metadata (3.1 kB)
Collecting python-bidi (from easyocr)
  Downloading python_bidi-0.6.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)
Collecting pyclipper (from easyocr)
  Downloading pyclipper-1.3.0.post6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.0 kB)
Collecting ninja (from easyocr)
  Downloading ninja-1.11.1.4-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (5.0 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->easyocr)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->easyocr)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch->easyocr)
  Downlo

In [4]:
import cv2
import numpy as np
import easyocr
import puz
import os
from itertools import product
import matplotlib.pyplot as plt

image_path = input("Enter the path to the crossword image: ").strip()
# === CONFIGURATION ===
CELL_THRESH = 150       # threshold to decide if cell is white vs black

# Paths
# image_path = "test.jpeg"  # update with your image path
output_puz = "converted_crossword.puz"

# === STEP 1: Load and Preprocess Image ===
image = cv2.imread(image_path)
if image is None:
    raise ValueError("Image not found. Check your image_path.")

gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5, 5), 0)
thresh = cv2.adaptiveThreshold(
    blur, 255,
    cv2.ADAPTIVE_THRESH_MEAN_C,
    cv2.THRESH_BINARY_INV,
    15, 8
)

# === STEP 2: Detect the Grid ===
# Find contours and assume the largest contour is the grid.
contours, _ = cv2.findContours(
    thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
)
if not contours:
    raise ValueError("No contours found!")

max_area = 0
best_cnt = None
for cnt in contours:
    area = cv2.contourArea(cnt)
    if area > max_area:
        max_area = area
        best_cnt = cnt

x, y, w, h = cv2.boundingRect(best_cnt)
grid_img = gray[y:y+h, x:x+w]

# Optional debugging: Save or display extracted grid
# cv2.imwrite("extracted_grid.jpeg", grid_img)
# plt.imshow(grid_img, cmap='gray')
# plt.title("Extracted Grid")
# plt.show()

# === STEP 2.5: Determine GRID_SIZE and CELL_SIZE Dynamically by Detecting Horizontal Lines ===
edges = cv2.Canny(grid_img, 50, 150, apertureSize=3)
lines = cv2.HoughLines(edges, 1, np.pi / 180, threshold=200)
if lines is None:
    raise ValueError("No lines detected in the grid image.")

# Filter horizontal lines (approximately 90 degrees)
horizontal_lines = []
for line in lines:
    rho, theta = line[0]
    if abs(theta - np.pi / 2) < 0.1:  # horizontal (near 90 degrees)
        horizontal_lines.append(rho)

# Remove duplicates by rounding and merging very close lines
horizontal_lines = sorted(np.unique(np.round(horizontal_lines)))
merged_lines = []
line_spacing_threshold = 10  # pixels
for rho in horizontal_lines:
    if not merged_lines or abs(rho - merged_lines[-1]) > line_spacing_threshold:
        merged_lines.append(rho)

# GRID_SIZE: number of cells is one less than the number of detected horizontal grid lines
GRID_SIZE = len(merged_lines) - 1
if GRID_SIZE <= 0:
    raise ValueError("Invalid GRID_SIZE detected.")

# Calculate CELL_SIZE as the average difference between adjacent horizontal grid lines
CELL_SIZE = int(np.mean(np.diff(merged_lines)))

print(f"Detected GRID_SIZE: {GRID_SIZE}")
print(f"Detected CELL_SIZE: {CELL_SIZE}")

# Resize grid image to a standard size based on detected GRID_SIZE and CELL_SIZE
resized_grid = cv2.resize(
    grid_img, (GRID_SIZE * CELL_SIZE, GRID_SIZE * CELL_SIZE)
)

# === STEP 3: Split Grid into Cells and Classify Cells ===
grid_cells = []
for r in range(GRID_SIZE):
    row = []
    for c in range(GRID_SIZE):
        cell = resized_grid[r * CELL_SIZE:(r + 1) * CELL_SIZE, c * CELL_SIZE:(c + 1) * CELL_SIZE]
        mean_val = np.mean(cell)
        cell_is_white = mean_val > CELL_THRESH
        row.append(cell_is_white)
    grid_cells.append(row)
grid_cells = np.array(grid_cells)

# === STEP 4: Number the Grid ===
clue_numbers = np.full((GRID_SIZE, GRID_SIZE), -1, dtype=int)  # Initialize all cells to -1 (black tiles)

current_number = 1
for i, j in product(range(GRID_SIZE), range(GRID_SIZE)):
    if not grid_cells[i, j]:
        continue  # Skip black cells; already set to -1
    left_black = (j == 0) or (not grid_cells[i, j - 1])
    top_black = (i == 0) or (not grid_cells[i - 1, j])
    if left_black or top_black:
        clue_numbers[i, j] = current_number
        current_number += 1
    else:
        clue_numbers[i, j] = 0  # White cell, but no clue number


Enter the path to the crossword image: cwpuzzle.jpeg
Detected GRID_SIZE: 9
Detected CELL_SIZE: 62


In [5]:
# === STEP 5: Create .puz File ===
# For this step, just fill with placeholder characters, as clues are not included in this conversion.
solution = ""
for i in range(GRID_SIZE):
    for j in range(GRID_SIZE):
        solution += "-" if grid_cells[i, j] else "."

puzzle = puz.Puzzle()
puzzle.width = GRID_SIZE
puzzle.height = GRID_SIZE
puzzle.fill = solution
puzzle.solution = solution
puzzle.clues = []  # No clues included as per request
puzzle.save(output_puz)
print(f".puz file saved as {output_puz}")


# this is my manually created .puz file. How to do below things
# Assuming clue_numbers is the 2D numpy array containing the matrix
print(type(clue_numbers))
for row in clue_numbers:
    # Format each cell to have a width of 4 (including spaces for alignment)
    print("".join(f"{cell:4}" for cell in row))

.puz file saved as converted_crossword.puz
<class 'numpy.ndarray'>
   1   2   3   4  -1   5   6   7   8
   9   0   0   0  -1  10   0   0   0
  11   0   0   0  12   0   0   0   0
  13   0   0  -1  14   0   0   0  -1
  -1  -1  15  16   0   0   0  -1  -1
  -1  17   0   0   0  -1  18  19  20
  21   0   0   0   0  22   0   0   0
  23   0   0   0  -1  24   0   0   0
  25   0   0   0  -1  26   0   0   0


In [6]:
import os
import base64
from openai import OpenAI

# Hidden API configuration
TOKEN = "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"
# TOKEN="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"
ENDPOINT = "https://models.inference.ai.azure.com"
MODEL_NAME = "gpt-4o"

def get_image_data_url(image_file: str, image_format: str) -> str:
    """
    Helper function to converts an image file to a data URL string.

    Args:
        image_file (str): The path to the image file.
        image_format (str): The format of the image file.

    Returns:
        str: The data URL of the image.
    """
    try:
        with open(image_file, "rb") as f:
            image_data = base64.b64encode(f.read()).decode("utf-8")
    except FileNotFoundError:
        raise ValueError(f"Could not read '{image_file}'. Ensure the file exists.")
    return f"data:image/{image_format};base64,{image_data}"


def extract_clues(image_path: str, image_format: str = "jpeg") -> dict:
    """
    Extract crossword clues and their corresponding numbers from an image.

    Args:
        image_path (str): The path to the crossword image.
        image_format (str): The format of the image (default is 'jpeg').

    Returns:
        dict: A JSON object with "across" and "down" clues.
    """
    print("token= ", TOKEN)
    # Convert the image to a data URL
    image_data_url = get_image_data_url(image_path, image_format)

    # Initialize the OpenAI client
    client = OpenAI(
        base_url=ENDPOINT,
        api_key=TOKEN,
    )


    # Send the image and instructions to the GPT model
    response = client.chat.completions.create(
        messages=[
            {
                "role": "system",
                "content": """
You are tasked with extracting crossword clues and their corresponding clue numbers from an image of a crossword puzzle. The image contains a grid with numbered cells and a list of clues divided into "Across" and "Down" sections. Your goal is to produce a JSON object with the following structure:

The JSON should have two main keys: "across" and "down".

Under each key, include a sub-object where the keys are the clue numbers (as strings) and the values are the clue text.

Include all clues exactly as they appear in the image, preserving punctuation, capitalization, and special characters.
"""
            },
            {
                "role": "user",
                "content": [
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": image_data_url,
                            "detail": "high"
                        },
                    },
                ],
            },
        ],
        model=MODEL_NAME,
    )

    # Return the extracted JSON
    return response.choices[0].message.content

In [7]:
import json
try:
    # Extract the clues as JSON
    clues_json = extract_clues(image_path)

    # Print the JSON result
    print("Extracted Clues:")
    print(json.dumps(clues_json, indent=4))
except Exception as e:
    print(f"An error occurred: {e}")

token=  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
Extracted Clues:
"```json\n{\n  \"across\": {\n    \"1\": \"Part of a fast food combo\",\n    \"5\": \"Dalmatian feature\",\n    \"9\": \"Nonbinary pronoun\",\n    \"10\": \"Krusty ___ (burger restaurant on SpongeBob SquarePants)\",\n    \"11\": \"Important stand at a convention\",\n    \"13\": \"Succession surname\",\n    \"14\": \"Where ulnae and humeri are located, in the human body\",\n    \"15\": \"Pride letters\",\n    \"17\": \"Fire station fixture\",\n    \"18\": \"Model ___ (some school clubs)\",\n    \"21\": \"Euphemisms used to evade automated social media moderation (like 'panini' instead of 'pandemic')\",\n    \"23\": \"Luke\u2019s sister in Star Wars\",\n    \"24\": \"Luke in Star Wars, e.g.\",\n    \"25\": \"Science ___ (what science fiction might turn into)\",\n    \"26\": \"Blow a fuse\"\n  },\n  \"down\": {\n    \"1\": \"___ stick (coffee shop item)\",\n    \"2\": \"\

In [8]:
# print(type(clues_json))
print(clues_json)
print(type(clues_json))

```json
{
  "across": {
    "1": "Part of a fast food combo",
    "5": "Dalmatian feature",
    "9": "Nonbinary pronoun",
    "10": "Krusty ___ (burger restaurant on SpongeBob SquarePants)",
    "11": "Important stand at a convention",
    "13": "Succession surname",
    "14": "Where ulnae and humeri are located, in the human body",
    "15": "Pride letters",
    "17": "Fire station fixture",
    "18": "Model ___ (some school clubs)",
    "21": "Euphemisms used to evade automated social media moderation (like 'panini' instead of 'pandemic')",
    "23": "Luke’s sister in Star Wars",
    "24": "Luke in Star Wars, e.g.",
    "25": "Science ___ (what science fiction might turn into)",
    "26": "Blow a fuse"
  },
  "down": {
    "1": "___ stick (coffee shop item)",
    "2": "“That can’t be good”",
    "3": "Make zero sense",
    "4": "Bottoms star Edebiri",
    "5": "Portmanteau clothing item worn by tennis players",
    "6": "Title awarded at a high school dance",
    "7": "Cereal grains"

In [9]:
import json
import ast
import re
from typing import List, Dict, Any, Optional

def generate_crossword_input(
    grid: Any,
    clues_input: Any,
    output_path: Optional[str] = None
) -> Dict[str, Any]:
    """
    Convert a numbered-grid + clues into the BP-solver input schema.
    Handles numpy arrays, raw dicts, JSON strings (even with markdown fences),
    or Python‐literal strings with curly quotes.
    """
    # 1) Parse clues_input into a dict
    if isinstance(clues_input, dict):
        clues = clues_input
    elif isinstance(clues_input, str):
        # Strip markdown fences if present
        s = clues_input.strip()
        # remove ```json or ``` markers
        s = re.sub(r"^```(?:json)?\s*|\s*```$", "", s, flags=re.MULTILINE)
        # now try JSON
        try:
            clues = json.loads(s)
        except json.JSONDecodeError:
            # fallback to Python literal
            clues = ast.literal_eval(s)
    else:
        raise TypeError("clues_input must be a dict or string")

    # 2) Convert numpy array to list-of-lists
    if hasattr(grid, "tolist"):
        grid = grid.tolist()

    n_rows = len(grid)
    n_cols = len(grid[0]) if n_rows else 0
    if n_rows != n_cols:
        raise ValueError("Grid must be square")
    grid_size = n_rows

    # 3) Collect null squares
    null_squares = [
        [r, c]
        for r in range(n_rows)
        for c in range(n_cols)
        if grid[r][c] == -1
    ]

    # 4) Helper to follow an across/down run
    def collect_run(r: int, c: int, dr: int, dc: int) -> List[List[int]]:
        coords = []
        while 0 <= r < n_rows and 0 <= c < n_cols and grid[r][c] != -1:
            coords.append([r, c])
            r += dr
            c += dc
        return coords

    # 5) Build clue_positions
    clue_positions: Dict[str, List[List[int]]] = {}

    # Across
    for r in range(n_rows):
        for c in range(n_cols):
            num = grid[r][c]
            if num <= 0:
                continue
            if c == 0 or grid[r][c-1] == -1:
                key = f"{num}A"
                if str(num) in clues.get("across", {}):
                    clue_positions[key] = collect_run(r, c, 0, 1)

    # Down
    for r in range(n_rows):
        for c in range(n_cols):
            num = grid[r][c]
            if num <= 0:
                continue
            if r == 0 or grid[r-1][c] == -1:
                key = f"{num}D"
                if str(num) in clues.get("down", {}):
                    clue_positions[key] = collect_run(r, c, 1, 0)

    crossword_json = {
        "grid_size": grid_size,
        "null_squares": null_squares,
        "clue_positions": clue_positions
    }

    if output_path:
        with open(output_path, "w") as fout:
            json.dump(crossword_json, fout, indent=2)
        print(f"✅ crossword.json written to {output_path}")

    return crossword_json

# Example usage:
# crossword = generate_crossword_input(clue_numbers, clues_json, output_path="crossword.json")


In [10]:

crossword_dict = generate_crossword_input(clue_numbers, clues_json,
                                          output_path="crossword.json")

✅ crossword.json written to crossword.json


In [11]:
print(crossword_dict)
print(type(crossword_dict))

{'grid_size': 9, 'null_squares': [[0, 4], [1, 4], [3, 3], [3, 8], [4, 0], [4, 1], [4, 7], [4, 8], [5, 0], [5, 5], [7, 4], [8, 4]], 'clue_positions': {'1A': [[0, 0], [0, 1], [0, 2], [0, 3]], '5A': [[0, 5], [0, 6], [0, 7], [0, 8]], '9A': [[1, 0], [1, 1], [1, 2], [1, 3]], '10A': [[1, 5], [1, 6], [1, 7], [1, 8]], '11A': [[2, 0], [2, 1], [2, 2], [2, 3], [2, 4], [2, 5], [2, 6], [2, 7], [2, 8]], '13A': [[3, 0], [3, 1], [3, 2]], '14A': [[3, 4], [3, 5], [3, 6], [3, 7]], '15A': [[4, 2], [4, 3], [4, 4], [4, 5], [4, 6]], '17A': [[5, 1], [5, 2], [5, 3], [5, 4]], '18A': [[5, 6], [5, 7], [5, 8]], '21A': [[6, 0], [6, 1], [6, 2], [6, 3], [6, 4], [6, 5], [6, 6], [6, 7], [6, 8]], '23A': [[7, 0], [7, 1], [7, 2], [7, 3]], '24A': [[7, 5], [7, 6], [7, 7], [7, 8]], '25A': [[8, 0], [8, 1], [8, 2], [8, 3]], '26A': [[8, 5], [8, 6], [8, 7], [8, 8]], '1D': [[0, 0], [1, 0], [2, 0], [3, 0]], '2D': [[0, 1], [1, 1], [2, 1], [3, 1]], '3D': [[0, 2], [1, 2], [2, 2], [3, 2], [4, 2], [5, 2], [6, 2], [7, 2], [8, 2]], '4D': 

In [12]:
clue_lengths = {
    clue_id: len(positions)
    for clue_id, positions in crossword_dict["clue_positions"].items()
}

In [13]:
print(clue_lengths)

{'1A': 4, '5A': 4, '9A': 4, '10A': 4, '11A': 9, '13A': 3, '14A': 4, '15A': 5, '17A': 4, '18A': 3, '21A': 9, '23A': 4, '24A': 4, '25A': 4, '26A': 4, '1D': 4, '2D': 4, '3D': 9, '4D': 3, '5D': 5, '6D': 9, '7D': 4, '8D': 3, '12D': 5, '16D': 5, '17D': 4, '19D': 4, '20D': 4, '21D': 3, '22D': 3}


In [14]:
import random
import string
from collections import defaultdict

# Sample clue_lengths format (replace with your actual one)
# clue_lengths = {'1A': 4, '2D': 5, '3A': 6, ...}

def random_word(length):
    return ''.join(random.choices(string.ascii_uppercase, k=length))

# Simulate answers from clue_lengths
answers = defaultdict(list)
for clue_id, length in clue_lengths.items():
    direction = "across" if clue_id.endswith("A") else "down"
    answers[direction].append(random_word(length))

answers_dict = {
    "across": answers["across"],
    "down": answers["down"]
}

# Print as JSON
print(json.dumps(answers_dict, indent=2))


{
  "across": [
    "DWKG",
    "SEFJ",
    "VEUF",
    "ZRTB",
    "KJOLINKVN",
    "RPX",
    "FINV",
    "EFXPY",
    "UJJQ",
    "LZO",
    "JKTSWXAOQ",
    "UNLX",
    "SBVH",
    "SIHZ",
    "DCJS"
  ],
  "down": [
    "VWWH",
    "EZGV",
    "YDZYRMFLA",
    "AJM",
    "IZGZI",
    "XLWKZWJBV",
    "GUQN",
    "OJY",
    "NUPHE",
    "XDMLI",
    "PLKW",
    "SZVE",
    "GFCX",
    "HJN",
    "RPP"
  ]
}


In [15]:
import json
import re
import ast

# Step 1: Clean up markdown/codeblock fences if present
if isinstance(clues_json, str):
    cleaned = clues_json.strip()
    cleaned = re.sub(r"^```(?:json)?\s*", "", cleaned)
    cleaned = re.sub(r"\s*```$", "", cleaned)

    try:
        clues_dict = json.loads(cleaned)
    except json.JSONDecodeError:
        clues_dict = ast.literal_eval(cleaned)
else:
    clues_dict = clues_json  # already a dictionary

# ✅ Extract into separate across and down dictionaries
across = clues_dict.get("across", {})
down = clues_dict.get("down", {})

# ✅ Confirm they're dictionaries
print("Across clues:")
print(across)

print("\nDown clues:")
print(down)

Across clues:
{'1': 'Part of a fast food combo', '5': 'Dalmatian feature', '9': 'Nonbinary pronoun', '10': 'Krusty ___ (burger restaurant on SpongeBob SquarePants)', '11': 'Important stand at a convention', '13': 'Succession surname', '14': 'Where ulnae and humeri are located, in the human body', '15': 'Pride letters', '17': 'Fire station fixture', '18': 'Model ___ (some school clubs)', '21': "Euphemisms used to evade automated social media moderation (like 'panini' instead of 'pandemic')", '23': 'Luke’s sister in Star Wars', '24': 'Luke in Star Wars, e.g.', '25': 'Science ___ (what science fiction might turn into)', '26': 'Blow a fuse'}

Down clues:
{'1': '___ stick (coffee shop item)', '2': '“That can’t be good”', '3': 'Make zero sense', '4': 'Bottoms star Edebiri', '5': 'Portmanteau clothing item worn by tennis players', '6': 'Title awarded at a high school dance', '7': 'Cereal grains', '8': '“and i mean that”', '12': 'Term of endearment that might be used for one person or multiple

In [16]:
clues_formatted = {
    "across": [f"{num}. {text}" for num, text in sorted(across.items(), key=lambda x: int(x[0]))],
    "down":   [f"{num}. {text}" for num, text in sorted(down.items(), key=lambda x: int(x[0]))]
}

# Optional: preview
import json
print(json.dumps({"clues": clues_formatted}, indent=2))


{
  "clues": {
    "across": [
      "1. Part of a fast food combo",
      "5. Dalmatian feature",
      "9. Nonbinary pronoun",
      "10. Krusty ___ (burger restaurant on SpongeBob SquarePants)",
      "11. Important stand at a convention",
      "13. Succession surname",
      "14. Where ulnae and humeri are located, in the human body",
      "15. Pride letters",
      "17. Fire station fixture",
      "18. Model ___ (some school clubs)",
      "21. Euphemisms used to evade automated social media moderation (like 'panini' instead of 'pandemic')",
      "23. Luke\u2019s sister in Star Wars",
      "24. Luke in Star Wars, e.g.",
      "25. Science ___ (what science fiction might turn into)",
      "26. Blow a fuse"
    ],
    "down": [
      "1. ___ stick (coffee shop item)",
      "2. \u201cThat can\u2019t be good\u201d",
      "3. Make zero sense",
      "4. Bottoms star Edebiri",
      "5. Portmanteau clothing item worn by tennis players",
      "6. Title awarded at a high school d

In [17]:
import json

size = {
    "cols": GRID_SIZE,
    "rows": GRID_SIZE
}
print(json.dumps({"size": size}, indent=2))

{
  "size": {
    "cols": 9,
    "rows": 9
  }
}


In [18]:
# Convert to pure Python ints
gridnums = [int(0 if cell == -1 else cell) for row in clue_numbers for cell in row]

# Now safe to print as JSON
import json
print(json.dumps({"gridnums": gridnums}, indent=2))


{
  "gridnums": [
    1,
    2,
    3,
    4,
    0,
    5,
    6,
    7,
    8,
    9,
    0,
    0,
    0,
    0,
    10,
    0,
    0,
    0,
    11,
    0,
    0,
    0,
    12,
    0,
    0,
    0,
    0,
    13,
    0,
    0,
    0,
    14,
    0,
    0,
    0,
    0,
    0,
    0,
    15,
    16,
    0,
    0,
    0,
    0,
    0,
    0,
    17,
    0,
    0,
    0,
    0,
    18,
    19,
    20,
    21,
    0,
    0,
    0,
    0,
    22,
    0,
    0,
    0,
    23,
    0,
    0,
    0,
    0,
    24,
    0,
    0,
    0,
    25,
    0,
    0,
    0,
    0,
    26,
    0,
    0,
    0
  ]
}


In [19]:
import random
import string
import json

# Replace -1 with ".", others with a random letter A–Z
grid_alpha = [
    "." if cell == -1 else random.choice(string.ascii_uppercase)
    for row in clue_numbers
    for cell in row
]

# Preview
print(json.dumps({"grid": grid_alpha}, indent=2))



{
  "grid": [
    "B",
    "E",
    "R",
    "R",
    ".",
    "P",
    "J",
    "D",
    "E",
    "F",
    "L",
    "R",
    "L",
    ".",
    "W",
    "E",
    "J",
    "C",
    "Z",
    "G",
    "H",
    "Q",
    "E",
    "N",
    "T",
    "G",
    "Z",
    "Z",
    "T",
    "A",
    ".",
    "D",
    "B",
    "P",
    "H",
    ".",
    ".",
    ".",
    "H",
    "A",
    "I",
    "M",
    "M",
    ".",
    ".",
    ".",
    "J",
    "A",
    "Y",
    "B",
    ".",
    "M",
    "Q",
    "H",
    "Q",
    "A",
    "L",
    "G",
    "P",
    "B",
    "M",
    "J",
    "P",
    "P",
    "R",
    "O",
    "Q",
    ".",
    "F",
    "U",
    "K",
    "S",
    "V",
    "N",
    "A",
    "Z",
    ".",
    "D",
    "K",
    "O",
    "R"
  ]
}


In [20]:
merged_json = {
    "acrossmap": None,
    "admin": False,
    "answers": answers_dict,  # your existing {"across": [...], "down": [...]}
    "author": "John Doe",  # random author
    "autowrap": None,
    "bbars": None,
    "circles": None,
    "clues": clues_formatted,  # your formatted clue list {"across": [...], "down": [...]}
    "code": None,
    "copyright": "2024, Example Syndicate",
    "date": "5/8/2025",
    "dow": "Thursday",
    "downmap": None,
    "editor": "Jane Editor",
    "grid": grid_alpha,  # your 1D list of letters and dots
    "gridnums": gridnums,  # from clue_numbers as flattened ints
    "hold": None,
    "id": "xyz123",
    "id2": None,
    "interpretcolors": None,
    "jnotes": None,
    "key": None,
    "mini": None,
    "notepad": "This puzzle was generated as a sample.",
    "publisher": "Example Crossword Publisher",
    "rbars": None,
    "shadecircles": None,
    "size": {
        "cols": GRID_SIZE,
        "rows": GRID_SIZE
    },
    "title": "Sample Puzzle, Auto-Generated",
    "track": None,
    "type": "standard"
}

# Optional: write to file
import json
with open("merged_puzzle.json", "w") as f:
    json.dump(merged_json, f, indent=2)

print("✅ merged_puzzle.json written.")


✅ merged_puzzle.json written.


# Inference

In [2]:
!unzip fine_tuned_t5_crossword\(10epochs\).zip

Archive:  fine_tuned_t5_crossword(10epochs).zip
   creating: fine_tuned_t5_crossword/
  inflating: fine_tuned_t5_crossword/added_tokens.json  
  inflating: fine_tuned_t5_crossword/config.json  
  inflating: fine_tuned_t5_crossword/generation_config.json  
  inflating: fine_tuned_t5_crossword/model.safetensors  
  inflating: fine_tuned_t5_crossword/special_tokens_map.json  
  inflating: fine_tuned_t5_crossword/spiece.model  
  inflating: fine_tuned_t5_crossword/tokenizer_config.json  


In [43]:
BIGRAMS_DICT = {'t': {'h': 0.035562033867788265,
  'i': 0.013425788170089764,
  'e': 0.012048696340679248,
  'o': 0.010412665302782832,
  'a': 0.005298860706648515,
  'r': 0.004258201782026581,
  's': 0.0033748821262235386,
  'u': 0.0025490671946763704,
  'y': 0.0022727710093077376,
  't': 0.0017068330270812787,
  'l': 0.0009844918163128189,
  'w': 0.0008237223232097947,
  'm': 0.0002647908857415045,
  'c': 0.00026136282448192903,
  'n': 0.0001001065175042166,
  'f': 5.661194659804483e-05,
  'p': 4.294463459127656e-05,
  'z': 3.8489544871772874e-05,
  'b': 2.544494525651471e-05,
  'g': 1.969131766647826e-05,
  'd': 1.2958822338897517e-05,
  'v': 1.1634636011898067e-05,
  'k': 4.639558582150014e-06,
  'x': 1.1806014302411626e-06,
  'j': 1.1241265932976448e-06,
  'q': 8.928017741874646e-07},
 'h': {'e': 0.030747412428375958,
  'a': 0.009257635591819875,
  'i': 0.0076324181389379,
  'o': 0.004849020685694829,
  't': 0.0013018587622421166,
  'r': 0.0008439253556003175,
  'u': 0.0007369276170545124,
  'y': 0.000500890899862585,
  'n': 0.0002575797725545694,
  's': 0.00014684170882079659,
  'm': 0.0001274324149425344,
  'l': 0.00012591917480757334,
  'w': 4.774158645863246e-05,
  'b': 4.389827449001692e-05,
  'd': 2.8565229294317865e-05,
  'f': 2.2431732884890087e-05,
  'c': 1.228216023502966e-05,
  'p': 5.921321042846898e-06,
  'h': 5.224179540214263e-06,
  'q': 4.229354221105825e-06,
  'z': 3.8054131113845875e-06,
  'g': 2.762652274121417e-06,
  'k': 2.3391484507224056e-06,
  'v': 2.2318263999992716e-06,
  'j': 4.547955077320962e-07,
  'x': 9.362715102096135e-08},
 'i': {'n': 0.024327452898924896,
  's': 0.01128429882563713,
  't': 0.011232737362990464,
  'o': 0.008349318507738292,
  'c': 0.0069870748758413,
  'l': 0.004315346175614546,
  'e': 0.003846463877356222,
  'm': 0.0031775994649595714,
  'r': 0.0031517239753216133,
  'd': 0.002955039109640445,
  'v': 0.0028784821891635827,
  'a': 0.0028628243462715157,
  'g': 0.002549613798323326,
  'f': 0.002032565155377681,
  'b': 0.0009860286830313163,
  'p': 0.0008921120651337005,
  'z': 0.0006433975365040388,
  'k': 0.0004291274372183285,
  'i': 0.00022782299229399052,
  'x': 0.0002203199193791771,
  'u': 0.0001740899395268762,
  'q': 0.00011303759361811866,
  'h': 2.0912551259453547e-05,
  'j': 1.1109294481398722e-05,
  'w': 6.246125832261312e-06,
  'y': 2.2157216377184053e-06},
 'e': {'r': 0.020482648127014873,
  'n': 0.014542484564787907,
  's': 0.013393937507580644,
  'd': 0.011681233651308664,
  'a': 0.006881652898734964,
  'l': 0.0053030155892002185,
  'c': 0.004772827186326274,
  't': 0.004126082418439024,
  'e': 0.0037760540847057225,
  'm': 0.003736636382769038,
  'v': 0.0025478371547114024,
  'x': 0.0021404459030375522,
  'i': 0.0018335165422055663,
  'p': 0.001715737389512778,
  'f': 0.0016273211504883858,
  'y': 0.0014374572644723898,
  'g': 0.0011953613384758778,
  'w': 0.0011680577993843661,
  'o': 0.0007250045774136436,
  'q': 0.0005725195733425061,
  'u': 0.00031152734742272863,
  'b': 0.0002707357514431997,
  'h': 0.00026323716594431096,
  'k': 0.00016471802226078754,
  'j': 4.546450784000317e-05,
  'z': 4.523242832073589e-05},
 'a': {'n': 0.019851510793865493,
  't': 0.014867323009999081,
  'l': 0.010874495288754523,
  'r': 0.010748984742942346,
  's': 0.008710950725934656,
  'c': 0.004477721995513402,
  'd': 0.0036795641822092583,
  'i': 0.003164477518153824,
  'm': 0.0028486596900530927,
  'b': 0.0022978641720814404,
  'y': 0.0021736083500363624,
  'v': 0.002049326470620682,
  'g': 0.0020472490648100632,
  'p': 0.002028459081852819,
  'u': 0.0011903277430684266,
  'k': 0.0010469932031918583,
  'f': 0.000742072946387665,
  'w': 0.0005991626391450328,
  'x': 0.0001883937857903907,
  'h': 0.0001364329491555188,
  'e': 0.00012396519722856773,
  'z': 0.00011869129223989193,
  'j': 0.00011752630331449835,
  'o': 4.62616737123756e-05,
  'a': 2.8299407088468595e-05,
  'q': 2.2443811633926902e-05},
 'r': {'e': 0.018543231910521065,
  'i': 0.007276188663828457,
  'o': 0.007267244407620373,
  'a': 0.006856330314206551,
  's': 0.003965272774436477,
  't': 0.0036167641252256148,
  'y': 0.0024774012156725655,
  'd': 0.001893163848503906,
  'm': 0.001751329245044116,
  'n': 0.0016036105108033557,
  'u': 0.001283287568917398,
  'c': 0.0012138664054551589,
  'r': 0.0012074305480743342,
  'g': 0.0009977345013122607,
  'k': 0.0009703434723282896,
  'l': 0.0008626468395880397,
  'v': 0.000692833068035106,
  'p': 0.00041619943700407986,
  'f': 0.000322604150785616,
  'b': 0.00026712224389915793,
  'h': 0.00015111580775304897,
  'w': 0.00012757362047681434,
  'x': 1.1700197750827059e-05,
  'q': 9.984375594797058e-06,
  'z': 6.381304759506693e-06,
  'j': 5.531870226818162e-06},
 'o': {'n': 0.01758046422760261,
  'r': 0.012765390616045856,
  'f': 0.011749752810478072,
  'u': 0.008700023193609326,
  'm': 0.005462568850726722,
  't': 0.004421032981545556,
  'l': 0.0036549264841722333,
  'w': 0.0033047804182783813,
  's': 0.002899667676599962,
  'p': 0.002239110516949617,
  'o': 0.0021025921710596876,
  'd': 0.0019544942921995924,
  'v': 0.0017808654499976906,
  'c': 0.0016640508583674763,
  'b': 0.0009667081767183874,
  'g': 0.0009402421033527496,
  'i': 0.0008775074676657127,
  'k': 0.0006431180495439353,
  'a': 0.00057486066316008,
  'e': 0.00038630665183098036,
  'y': 0.00036181283908123666,
  'h': 0.00021354371492525182,
  'x': 0.0001857541269441267,
  'j': 6.97589276024244e-05,
  'z': 3.472223489736102e-05,
  'q': 1.0365656994416639e-05},
 'n': {'d': 0.013522814458699572,
  't': 0.010412511512410641,
  'g': 0.009530148421324101,
  'e': 0.006917222650205216,
  's': 0.005089374518664006,
  'o': 0.004645749577986965,
  'c': 0.004157458431293705,
  'a': 0.003472349728587426,
  'i': 0.0033921247759627383,
  'y': 0.0009791744504544292,
  'u': 0.0007864445487428901,
  'n': 0.0007276469489955757,
  'f': 0.0006718072826705972,
  'l': 0.000637839069480425,
  'v': 0.0005200714830640574,
  'k': 0.0005160546485769444,
  'm': 0.00027749485704436466,
  'j': 0.00011086395999094682,
  'h': 0.00010883711235245899,
  'r': 9.151747362162302e-05,
  'p': 6.035706136572198e-05,
  'q': 5.9500129128136294e-05,
  'w': 5.797005116452936e-05,
  'z': 4.36906610163499e-05,
  'b': 4.335938098470417e-05,
  'x': 2.6208656774648996e-05},
 's': {'t': 0.010534756583067857,
  'e': 0.009321145792569853,
  'i': 0.00550057242402309,
  's': 0.004050752088578574,
  'o': 0.003977321583723639,
  'h': 0.0031524000359351304,
  'u': 0.0031117653434659404,
  'a': 0.0021801744573863575,
  'p': 0.0019125422113792543,
  'c': 0.0015474937939797774,
  'm': 0.0006519902425265863,
  'y': 0.000568447139655051,
  'l': 0.0005588067998722335,
  'k': 0.0003946469241277682,
  'w': 0.00023528201313365683,
  'f': 0.00017164460994197454,
  'n': 9.174955952463224e-05,
  'b': 7.916276818864494e-05,
  'q': 7.443946555193908e-05,
  'r': 5.989948006394196e-05,
  'd': 5.258615288378493e-05,
  'g': 2.4679789239442523e-05,
  'v': 1.2414606566076536e-05,
  'j': 2.703105793352429e-06,
  'z': 2.497125493662587e-06,
  'x': 1.7866391331769582e-07},
 'l': {'e': 0.008292542349309354,
  'i': 0.0062435218432823885,
  'l': 0.00576571076300642,
  'a': 0.005275294441670981,
  'y': 0.0042501351601765104,
  'o': 0.0038688419999310334,
  'd': 0.002526063785288789,
  's': 0.0014151349134589523,
  'u': 0.001351893577122505,
  't': 0.001236370993149481,
  'f': 0.0005347688515523429,
  'v': 0.0003490591288531265,
  'm': 0.00023020919388787198,
  'k': 0.00019714520156759115,
  'p': 0.00019030486604223394,
  'w': 0.0001263889135912073,
  'c': 0.00011821911415753592,
  'r': 0.00010051268485779453,
  'b': 6.690295672480866e-05,
  'g': 6.087869252355049e-05,
  'n': 5.8698357385961136e-05,
  'h': 1.6187405494591476e-05,
  'z': 4.173474136118815e-06,
  'x': 3.0544297107023596e-06,
  'q': 1.2706944707990358e-06,
  'j': 1.1241886574553005e-06},
 'v': {'e': 0.008252805659945416,
  'i': 0.0026954434935998663,
  'a': 0.0013998007454339783,
  'o': 0.0007110718485686386,
  'y': 4.897224174539224e-05,
  'u': 2.2124959683862503e-05,
  'r': 8.760351597293566e-06,
  's': 5.767798787817088e-06,
  'l': 4.1214214590713575e-06,
  'd': 2.9898368819374584e-06,
  'c': 2.330365485783281e-06,
  't': 2.192436584375253e-06,
  'p': 1.9541506493423943e-06,
  'n': 1.5313078978855e-06,
  'm': 1.1271641904994861e-06,
  'g': 1.049865942031611e-06,
  'h': 9.54397081463754e-07,
  'v': 9.301009143293054e-07,
  'b': 8.852171794695919e-07,
  'f': 5.498235354544322e-07,
  'w': 2.632187030986915e-07,
  'j': 2.568995078923388e-07,
  'z': 1.7774784635069566e-07,
  'x': 1.5954176901776744e-07,
  'k': 1.1971112054822743e-07,
  'q': 7.548987623994875e-08},
 'c': {'o': 0.00793859724517974,
  'e': 0.006514173632205198,
  'h': 0.005977659776993852,
  'a': 0.005381640982859478,
  't': 0.004609717569833274,
  'i': 0.0028148480328139764,
  'u': 0.0016261397695323246,
  'r': 0.0014945583064235584,
  'l': 0.0014901135115518744,
  'k': 0.0011762612425566196,
  'c': 0.0008313828514739717,
  'y': 0.0004171861457499762,
  's': 0.00022822966999227055,
  'q': 5.4745391172903915e-05,
  'm': 2.665030461122328e-05,
  'd': 2.230972751842257e-05,
  'f': 1.4081226385830964e-05,
  'p': 1.3146047914100753e-05,
  'g': 8.85768060932945e-06,
  'n': 8.600191669265546e-06,
  'z': 7.30644125052819e-06,
  'b': 6.295765100206781e-06,
  'v': 2.081598865110163e-06,
  'w': 1.3864150433361575e-06,
  'j': 1.0504972231780532e-06,
  'x': 3.1093646472313536e-07},
 'm': {'e': 0.007930064859134691,
  'a': 0.0056526934547922415,
  'o': 0.0033687762253118664,
  'i': 0.0031769136939652737,
  'p': 0.002391752260327065,
  'u': 0.001146185325560156,
  'm': 0.0009606901207061564,
  's': 0.0009283316556428383,
  'b': 0.0009025552218191082,
  'y': 0.0006218641333591955,
  'n': 8.790070008427924e-05,
  'l': 4.60653782584987e-05,
  'c': 4.312266403157685e-05,
  'f': 3.818344692878059e-05,
  'r': 3.106055847393173e-05,
  't': 1.3667842921305478e-05,
  'g': 1.2248635659630283e-05,
  'd': 6.713220682779148e-06,
  'w': 5.839477215472309e-06,
  'h': 5.331806946593063e-06,
  'v': 2.898408575359161e-06,
  'k': 1.4033177733583133e-06,
  'j': 1.0367640210242988e-06,
  'x': 3.526428693631995e-07,
  'z': 3.4411277153498116e-07,
  'q': 2.512442218467389e-07},
 'd': {'e': 0.007648183905725693,
  'i': 0.0049296640450796365,
  'o': 0.0018823497105864835,
  'a': 0.0015106736394717565,
  'u': 0.0014846077100445331,
  's': 0.0012626067496179288,
  'r': 0.0008544990498779348,
  'y': 0.0005042273930825643,
  'd': 0.00042751454226135495,
  'l': 0.00032340266504614503,
  'g': 0.0003100328772623043,
  'v': 0.00019052697025543044,
  'm': 0.00018176736981177067,
  'w': 8.162407911681682e-05,
  'n': 7.569403327201283e-05,
  'h': 5.438395966416432e-05,
  'j': 4.781874391012554e-05,
  't': 2.895677114048111e-05,
  'f': 2.778612054529999e-05,
  'b': 2.773035182114443e-05,
  'c': 2.5190982269910315e-05,
  'p': 1.7326765824048826e-05,
  'q': 7.067476156284412e-06,
  'k': 3.367078791524467e-06,
  'z': 7.80484800056189e-07,
  'x': 4.597276576779233e-07},
 'b': {'e': 0.005762837161652202,
  'l': 0.002334001713419505,
  'o': 0.001954105307751377,
  'u': 0.001849441938716155,
  'y': 0.001764684295250406,
  'a': 0.0014620446497566957,
  'r': 0.0011155985184063137,
  'i': 0.0010659711855756885,
  's': 0.00045832404128730357,
  'j': 0.00023224515573657463,
  't': 0.00017103922160745399,
  'b': 0.00010933104622731352,
  'v': 3.843901719974211e-05,
  'm': 3.1290520718792114e-05,
  'd': 2.474095967322812e-05,
  'n': 2.094651915026475e-05,
  'c': 1.8895200855696378e-05,
  'h': 1.056790670625287e-05,
  'p': 5.471310149691573e-06,
  'w': 2.950660921668105e-06,
  'g': 2.5546511654583004e-06,
  'f': 1.4571586074509174e-06,
  'k': 9.361016317438012e-07,
  'z': 3.572675357393831e-07,
  'x': 2.761401060703075e-07,
  'q': 1.228891600725358e-07},
 'u': {'r': 0.005427477814999868,
  's': 0.004542570592090613,
  't': 0.004051512681993427,
  'n': 0.003944130463793296,
  'l': 0.0034582949383408857,
  'c': 0.0018765226217315315,
  'e': 0.001474803472298135,
  'm': 0.001383826156162727,
  'a': 0.001363332529810447,
  'p': 0.001360124828938564,
  'g': 0.001279075756509812,
  'i': 0.0010115331265358833,
  'd': 0.0009140148635053416,
  'b': 0.0008858033353629379,
  'f': 0.00018532281509504224,
  'o': 0.00010656740128132193,
  'k': 4.6040929945514914e-05,
  'y': 4.567302106663002e-05,
  'x': 3.934788366049721e-05,
  'v': 2.9170987885870377e-05,
  'z': 1.9106469730923158e-05,
  'h': 1.067402577627447e-05,
  'u': 7.804796575974118e-06,
  'j': 5.159490600668077e-06,
  'w': 2.7749785704841955e-06,
  'q': 2.5661674359005885e-06},
 'f': {'o': 0.004877535684515768,
  'i': 0.002845856271912309,
  'e': 0.002365731954439461,
  'r': 0.0021318861484722396,
  'a': 0.0016399978536375908,
  'f': 0.0014631657862762036,
  'u': 0.0009597491046570832,
  't': 0.000816643643941075,
  'l': 0.0006490488181701867,
  'y': 9.098073817573506e-05,
  's': 5.509522093998982e-05,
  'm': 6.8592512620014045e-06,
  'c': 5.764657277482714e-06,
  'g': 5.115691037979528e-06,
  'd': 4.953369504003438e-06,
  'n': 4.193077898282152e-06,
  'b': 2.741761124002132e-06,
  'w': 2.288043404699271e-06,
  'p': 2.2211503009254794e-06,
  'h': 1.8321924516346958e-06,
  'k': 1.1679438886026663e-06,
  'j': 7.325116887545325e-07,
  'x': 2.880188311933054e-07,
  'v': 2.863097616176284e-07,
  'z': 9.357856365182507e-08,
  'q': 5.299569759149632e-08},
 'p': {'e': 0.004779891850444238,
  'r': 0.00474470915872401,
  'o': 0.003613731820145834,
  'a': 0.0032357247098555595,
  'l': 0.002629870834216346,
  'p': 0.0013654559840341753,
  'i': 0.001230941047519584,
  't': 0.0010578213360448539,
  'u': 0.001045402050905247,
  'h': 0.0009439001974329991,
  's': 0.0005457118644518548,
  'm': 0.0001595616355773069,
  'y': 0.00011763752370362693,
  'f': 1.4548641132749975e-05,
  'b': 1.3087201162377793e-05,
  'w': 1.2071464477967291e-05,
  'd': 1.1986672780430216e-05,
  'n': 1.1893666270985813e-05,
  'c': 1.17590888340912e-05,
  'k': 8.192278007617351e-06,
  'g': 4.736364836652272e-06,
  'z': 2.263462097091935e-06,
  'v': 2.206680840535762e-06,
  'j': 9.968471920386997e-07,
  'x': 5.225688585876123e-07,
  'q': 3.812686321357193e-07},
 'w': {'a': 0.0038533707704842494,
  'h': 0.0037879343139092495,
  'i': 0.003744207031848012,
  'e': 0.0036089923262116784,
  'o': 0.0022175431498151384,
  'n': 0.0007898759689146779,
  's': 0.00035084111991287353,
  'r': 0.00030761160268095306,
  'l': 0.00015221175189898594,
  't': 6.541432485103197e-05,
  'd': 3.538276280280467e-05,
  'y': 2.4247208444323264e-05,
  'f': 1.6076585507942857e-05,
  'm': 1.0899257668364493e-05,
  'k': 1.0673521815314304e-05,
  'b': 1.0614429644179357e-05,
  'p': 7.441508816942119e-06,
  'u': 6.951773316363763e-06,
  'c': 6.741321560104064e-06,
  'w': 2.6164897642326007e-06,
  'g': 9.759262511237404e-07,
  'j': 3.2421571189517844e-07,
  'v': 1.380033783972149e-07,
  'x': 5.342376295315687e-08,
  'q': 0.0,
  'z': 0.0},
 'g': {'e': 0.0038518951302346054,
  'h': 0.0022750336049891886,
  'r': 0.0019677786609059254,
  'i': 0.001516365615010144,
  'a': 0.0014807706704570165,
  'o': 0.0013212780817870803,
  'u': 0.0008576950869440064,
  'n': 0.0006563910133406573,
  'l': 0.0006063676260676287,
  's': 0.0005119317272931717,
  'y': 0.0002593998156813537,
  'g': 0.00024754730610389725,
  't': 0.0001540263965080112,
  'm': 9.858149369095683e-05,
  'd': 3.159517026167088e-05,
  'f': 1.2044892506834687e-05,
  'w': 6.476265048634101e-06,
  'b': 4.945574245801865e-06,
  'p': 4.118557641510953e-06,
  'k': 2.6027558527741875e-06,
  'c': 2.289304193730508e-06,
  'z': 1.2168809449358004e-06,
  'v': 4.228753794712046e-07,
  'j': 3.8764492636711533e-07,
  'x': 1.9755198708017932e-07,
  'q': 1.2257635671795039e-07},
 'k': {'e': 0.0021376796971470896,
  'i': 0.0009787843031012254,
  'n': 0.0005143883089325302,
  's': 0.0004750889700828898,
  'a': 0.0001695576568906465,
  'l': 0.0001056981973638322,
  'o': 6.0760796868276776e-05,
  'y': 5.949708692045417e-05,
  'h': 3.18518637165598e-05,
  'u': 3.0256752445994783e-05,
  'r': 2.7217223452914765e-05,
  'g': 2.5629904957985015e-05,
  'w': 2.1781518269187193e-05,
  'm': 1.789193339253743e-05,
  'f': 1.5874099243002875e-05,
  't': 1.0331795499304272e-05,
  'b': 9.010369431385025e-06,
  'p': 7.267775989613261e-06,
  'd': 6.701069939319453e-06,
  'k': 4.533401564329163e-06,
  'c': 2.2435384385274225e-06,
  'v': 1.5657758484812462e-06,
  'j': 1.2310556892397347e-06,
  'x': 1.9698170613440454e-07,
  'q': 6.640581147311086e-08,
  'z': 5.346738518968069e-08},
 'y': {'o': 0.0014990161014310313,
  's': 0.0009683226243432593,
  'e': 0.0009266857606412607,
  'i': 0.00028819725500699606,
  'p': 0.0002491432422957806,
  'm': 0.00023674882112168774,
  't': 0.00016683904605210232,
  'a': 0.00015765816439119228,
  'l': 0.00014756455942544424,
  'c': 0.00013500567107078203,
  'n': 0.00013214179641135189,
  'r': 7.791586450541088e-05,
  'd': 6.818024879289267e-05,
  'b': 4.2991211791009456e-05,
  'w': 3.371689165411773e-05,
  'g': 2.5925958437702277e-05,
  'z': 1.8122008062186258e-05,
  'u': 1.3276847949034266e-05,
  'f': 7.5351693052820495e-06,
  'h': 5.2073200777763014e-06,
  'k': 3.295636562320667e-06,
  'v': 1.8143172649251557e-06,
  'x': 1.4935090526114474e-06,
  'y': 7.06828121707229e-07,
  'j': 1.9907841070618347e-07,
  'q': 1.3649929786796687e-07},
 'q': {'u': 0.0014754132569029317,
  'i': 3.1757757784894065e-06,
  'l': 2.3793337515214076e-06,
  's': 1.5777070621490045e-06,
  'a': 1.5243999798122404e-06,
  'n': 6.595859488565936e-07,
  'r': 6.186289245886825e-07,
  'q': 6.072424568989777e-07,
  't': 4.913725757311525e-07,
  'o': 4.696884230031911e-07,
  'c': 3.9983468158305136e-07,
  'b': 3.842455837892227e-07,
  'm': 3.5105899205982257e-07,
  'f': 3.3616749539804555e-07,
  'v': 2.253865205056695e-07,
  'e': 2.0301824343417607e-07,
  'd': 1.8439119378617653e-07,
  'p': 9.652927103294833e-08,
  'h': 9.345408068418392e-08,
  'j': 8.402706711475635e-08,
  'w': 5.791188818249761e-08,
  'x': 5.6568819810825274e-08,
  'g': 0.0,
  'k': 0.0,
  'y': 0.0,
  'z': 0.0},
 'x': {'p': 0.000668638321181966,
  't': 0.0004666052494304905,
  'i': 0.00039418316655567056,
  'a': 0.00029601048911654044,
  'c': 0.000264597695268772,
  'e': 0.00021794560324881292,
  'u': 4.771072567687967e-05,
  'h': 4.17137338851048e-05,
  'x': 2.8041737630368995e-05,
  'o': 2.6988043216440842e-05,
  'y': 2.5764015317761724e-05,
  'v': 1.9533085273860866e-05,
  'f': 1.8574645865145906e-05,
  'l': 5.932714958235521e-06,
  'q': 2.9172055034729726e-06,
  'm': 2.329789885052564e-06,
  'w': 2.2316586494474362e-06,
  'b': 7.677244092421487e-07,
  's': 6.400552450731251e-07,
  'n': 5.722708999953956e-07,
  'r': 3.218147156247212e-07,
  'g': 1.9241591204491164e-07,
  'j': 1.8125358462742683e-07,
  'k': 9.974774092281535e-08,
  'd': 5.967486491225286e-08,
  'z': 4.841749067047226e-08},
 'j': {'u': 0.0005870242886563383,
  'o': 0.0005378966907487205,
  'e': 0.0005188748751102307,
  'a': 0.00025861490978535843,
  'i': 2.7627374616109534e-05,
  'r': 2.4281548365428075e-06,
  'p': 2.1738226568634436e-06,
  'n': 1.472122098535575e-06,
  'm': 1.2978643857591143e-06,
  'h': 1.2561320914989996e-06,
  'c': 1.2226890861353984e-06,
  's': 1.1806510815672873e-06,
  'l': 1.1321662069542173e-06,
  'j': 1.0568462091800807e-06,
  't': 1.0348222995204949e-06,
  'd': 9.2746797543538e-07,
  'f': 8.588913370536327e-07,
  'b': 7.54031637453953e-07,
  'k': 6.171422220235781e-07,
  'v': 6.099048319362484e-07,
  'w': 4.1349411605227764e-07,
  'y': 3.9728898716217363e-07,
  'g': 3.670296957601522e-07,
  'z': 7.826290280401158e-08,
  'x': 5.736501429046731e-08,
  'q': 0.0},
 'z': {'e': 0.0004973256335470059,
  'a': 0.00024903661607292787,
  'i': 0.00012117448344352996,
  'o': 7.181018489679211e-05,
  'z': 2.6603391555734385e-05,
  'y': 2.3574870970960865e-05,
  'u': 2.152485921557423e-05,
  'l': 1.2574854802534591e-05,
  'h': 6.661331855107903e-06,
  's': 2.977626911538743e-06,
  't': 2.350404761007198e-06,
  'w': 2.22696340725769e-06,
  'b': 2.077628177629507e-06,
  'r': 1.8869341026441333e-06,
  'g': 1.6763184970082236e-06,
  'm': 1.6716920573703928e-06,
  'v': 1.6380344873474672e-06,
  'n': 1.5251901452022816e-06,
  'c': 7.450525497766359e-07,
  'd': 5.018387206259035e-07,
  'p': 4.829818562684127e-07,
  'q': 3.2381034428260345e-07,
  'k': 3.215054587934304e-07,
  'f': 2.1576657606901463e-07,
  'j': 1.0959785472129102e-07,
  'x': 3.8381893703688e-08}}


In [44]:
import numpy as np
import string
from scipy.special import logsumexp
import random

class VariableNode:
    def __init__(self, position):
        self.position = position
        self.letters = list(string.ascii_uppercase)
        self.log_probs = np.log(np.full(len(self.letters), 1.0 / len(self.letters)))
        self.neighbors = []

    def add_neighbor(self, neighbor):
        self.neighbors.append(neighbor)

    def receive_message(self, factor_node, message):
        self.log_probs += message

    def normalize(self):
        self.log_probs -= logsumexp(self.log_probs)

class FactorNode:
    def __init__(self, clue, length, candidates, confidence_ratings, bigram_probs):
        self.clue = clue
        self.length = length
        self.candidates = candidates
        self.confidence_ratings = confidence_ratings
        self.bigram_probs = bigram_probs
        self.neighbors = []
        self.messages = {}

    def add_neighbor(self, variable_node):
        self.neighbors.append(variable_node)
        self.messages[variable_node.position] = np.zeros(len(variable_node.letters))

    def send_message(self, variable_node, min_log_prob=-100):
        idx = self.neighbors.index(variable_node)
        other_vars = self.neighbors[:idx] + self.neighbors[idx + 1:]
        message = np.full(len(variable_node.letters), min_log_prob)  # Initialize with minimum log probability

        for candidate in self.candidates:
            candidate_indices = [string.ascii_uppercase.index(char) for char in candidate]
            candidate_log_prob = np.log(self.confidence_ratings[candidate])

            for other_var in other_vars:
                other_idx = self.neighbors.index(other_var)
                candidate_log_prob += other_var.log_probs[candidate_indices[other_idx]]

            # Incorporate bigram probabilities
            if idx > 0:  # Previous letter bigram
                prev_var = self.neighbors[idx - 1]
                prev_letter_idx = candidate_indices[idx - 1]
                prev_letter = prev_var.letters[prev_letter_idx].lower()
                curr_letter = candidate[idx].lower()
                if prev_letter in self.bigram_probs and curr_letter in self.bigram_probs[prev_letter]:
                    candidate_log_prob += np.log(max(self.bigram_probs[prev_letter][curr_letter], np.exp(min_log_prob)))

            if idx < self.length - 1:  # Next letter bigram
                next_var = self.neighbors[idx + 1]
                next_letter_idx = candidate_indices[idx + 1]
                next_letter = next_var.letters[next_letter_idx].lower()
                curr_letter = candidate[idx].lower()
                if curr_letter in self.bigram_probs and next_letter in self.bigram_probs[curr_letter]:
                    candidate_log_prob += np.log(max(self.bigram_probs[curr_letter][next_letter], np.exp(min_log_prob)))

            message[candidate_indices[idx]] = logsumexp([message[candidate_indices[idx]], candidate_log_prob])

        self.messages[variable_node.position] = message - logsumexp(message)
        variable_node.receive_message(self, self.messages[variable_node.position])


class CrosswordSolvingGrid:
    def __init__(self, variables, factors):
        self.variables = variables
        self.factors = factors

    def run_belief_propagation(self, num_iterations=10):
        for _ in range(num_iterations):
            factor_list = list(self.factors.values())
            random.shuffle(factor_list)
            for factor in factor_list:
                for neighbor in factor.neighbors:
                    factor.send_message(neighbor)

            variable_list = list(self.variables.values())
            random.shuffle(variable_list)
            for variable in variable_list:
                variable.normalize()

    def get_solution(self):
        solution = {}
        for position, variable in self.variables.items():
            best_letter = variable.letters[np.argmax(variable.log_probs)]
            solution[position] = best_letter
        return solution


In [45]:
from sentence_transformers import SentenceTransformer, util
import numpy as np

bi_encoder = SentenceTransformer('paraphrase-MiniLM-L6-v2')

def biencoder(clue, answers):
    def encode_texts(bi_encoder, texts):
        return bi_encoder.encode(texts)

    def calculate_similarity(clue_embedding, answer_embeddings):
        return util.dot_score(clue_embedding, answer_embeddings)[0].cpu().numpy()

    def softmax(x):
        e_x = np.exp(x - np.max(x))
        return e_x / e_x.sum()

    clue_embedding = encode_texts(bi_encoder, [clue])[0]
    answer_embeddings = encode_texts(bi_encoder, answers)

    similarity_scores = calculate_similarity(clue_embedding, answer_embeddings)
    probabilities = softmax(similarity_scores)

    answer_probabilities = {answer: prob for answer, prob in zip(answers, probabilities)}

    return answer_probabilities


In [46]:
import json
import os
import numpy as np



def read_json(file_path):
    if os.path.exists(file_path) and os.path.getsize(file_path) > 0:
        try:
            # Open the JSON file and load its data
            with open(file_path, 'r') as file:
                data = json.load(file)
            # Now 'data' contains the JSON data as a Python dictionary
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON: {e}")
        except Exception as e:
            print(f"An error occurred: {e}")
    else:
        print("File does not exist or is empty.")

    return data

class Crossword:
    def __init__(self, data):
        self.data = data
        self.across_clues = {}
        self.down_clues = {}
        self.clue_to_positions = {}
        self.solution_dict = {}
        self.clue_grid = None
        self.neighbors = {}
        self.coord_to_letter = {}
        self.null_squares = []
        self.clues = {}

    def initialize_solution_map(self):
        # first do across
        clues = self.data['clues']['across']
        answers = self.data['answers']['across']
        for i, clue in enumerate(clues):
            period_idx = clue.find('.')
            num, rest = clue[:period_idx], clue[period_idx+1:]
            self.solution_dict[f"{num}A"] = answers[i]

        # now do down
        clues = self.data['clues']['down']
        answers = self.data['answers']['down']
        for i, clue in enumerate(clues):
            period_idx = clue.find('.')
            num, rest = clue[:period_idx], clue[period_idx+1:]
            self.solution_dict[f"{num}D"] = answers[i]


    def initialize_clues(self):
        """
        Take in dictionary representing crossword and fill in dictionaries that hole clue codes (i.e. 1a/3d/18a/etc)
        and map to corresppnding clue.
        """
        for clue in self.data['clues']['across']:
            period_idx = clue.find('.')
            num, rest = clue[:period_idx], clue[period_idx+1:]
            self.across_clues[f"{num}A"] = rest

        for clue in self.data['clues']['down']:
            period_idx = clue.find('.')
            num, rest = clue[:period_idx], clue[period_idx+1:]
            self.down_clues[f"{num}D"] = rest

    def initialize_clue_positions_mapping(self, n):
        """
        Take clue dictionary from self.across_clues and self.down_clues in the form {'1A': clue, etc ...},
        build a dictionary that maps clue ID to coordinates in grid
        """
        # first do across
        for clue in self.across_clues:
            num = int(clue[:-1])
            answer_len = len(self.solution_dict[clue])
            start = list(self.data['gridnums']).index(num)
            row, col = start // n, start % n # convert from 1D array index to grid coord
            # this is across, so now that we have a start index, add corresponding coord to map
            coords = []
            for i in range(answer_len):
                coords.append((row, col + i))
            self.clue_to_positions[clue] = coords

        # now do down
        for clue in self.down_clues:
            num = int(clue[:-1])
            answer_len = len(self.solution_dict[clue])
            start = list(self.data['gridnums']).index(num)
            row, col = start // n, start % n # convert from 1D array index to grid coord
            # this is across, so now that we have a start index, add corresponding coord to map
            coords = []
            for i in range(answer_len):
                coords.append((row + i, col))
            self.clue_to_positions[clue] = coords

    def skipped_blanks(self, n):
        all_tup = []
        for i in range(n):
            for j in range(n):
                all_tup.append((i, j))

        all_included_tup = []
        for lst in self.clue_to_positions.values():
            all_included_tup += lst

        skip = set(all_tup) - set(all_included_tup)

        self.null_squares = skip


    def coord_to_letter_mapping(self):
        for clue in self.across_clues:
            coords = self.clue_to_positions[clue]
            answer = self.solution_dict[clue]
            for i, coord in enumerate(coords):
                self.coord_to_letter[coord] = answer[i]

    def initialize_all_clues(self):
        for key in self.across_clues:
            self.clues[key] = self.across_clues[key]
        for key in self.down_clues:
            self.clues[key] = self.down_clues[key]

    def initialize(self, n):
        self.initialize_clues()
        self.initialize_solution_map()
        self.initialize_clue_positions_mapping(n)
        self.skipped_blanks(n)
        self.coord_to_letter_mapping()
        self.initialize_all_clues()






In [103]:
import os
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration

# Device config
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Path to fine-tuned T5 model directory inside final_code
MODEL_DIR = "fine_tuned_t5_crossword"

# Load tokenizer and model once
tokenizer = T5Tokenizer.from_pretrained(MODEL_DIR)
model = T5ForConditionalGeneration.from_pretrained(MODEL_DIR).to(device)
model.eval()


def generate_unique_completions(clue, length, num_completions=10, max_length=50):
    """
    Generate top candidate words using fine-tuned T5 model with beam search.
    """
    # Formulate prompt with task
    input_text = f"predict word from clue: {clue} $ length: {length}"
    inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True).to(device)

    # Beam search generation
    outputs = model.generate(
        **inputs,
        max_length=max_length,
        num_beams=num_completions,
        num_return_sequences=num_completions,
        early_stopping=True
    )
    # Decode predictions
    completions = [tokenizer.decode(o, skip_special_tokens=True).strip() for o in outputs]
    # Ensure uniqueness preserving order
    unique = []
    for c in completions:
        if c not in unique:
            unique.append(c)
        if len(unique) >= num_completions:
            break
    #print(unique)
    return unique



# Gemini

In [96]:
import os
from google import genai
import time

def generate_completion(prompt, model_id="gemini-2.0-flash"):
    """Generate a single completion for the given prompt using Gemini 2.0 Flash."""
    client = genai.Client(api_key='AIzaSyDW4RyzxBTfJg9P8AEWMXzLGkfeQLDyFeU')
    response = client.models.generate_content(
        model=model_id,
        contents=prompt,
    )
    # Extract the first word from the response and format it
    completion = response.text.split()[0].strip().upper()
    return completion

def generate_unique_completions(prompt, length, num_completions=5):
    """Generate a list of unique completions matching the specified length."""
    unique_completions = []
    prompt_text= f'The crossword clue is {prompt}. The length of the answer is {length} characters. Generate the answer in all caps and with no spaces.'
    max_attempts=1
    attempts=0

    time.sleep(4)
    while len(unique_completions) < num_completions and attempts<max_attempts:
        attempts+=1
        completion = generate_completion(prompt_text)
        print(completion)
        print(length)

        unique_completions.append(completion)
    print(unique_completions)
    return unique_completions*5


# Example usage
clue = "Opposite of black"
length = 5
unique_words = generate_unique_completions(clue, length)
print(unique_words)

WHITE
5
['WHITE']
['WHITE', 'WHITE', 'WHITE', 'WHITE', 'WHITE']


In [114]:
import math
import numpy as np
import matplotlib.pyplot as plt
import os

def get_candidates(crossword):
    """
    Given a Crossword type object, create dictionary mapping clue to list of candidates
    """
    keys = list(crossword.across_clues.keys()) + list(crossword.down_clues.keys())
    clues = list(crossword.across_clues.values()) + list(crossword.down_clues.values())

    candidates = {key: [] for key in keys}

    # Generate candidates using T5 beam search
    for idx, key in enumerate(keys):
        clue = clues[idx]
        length = len(crossword.solution_dict[key])
        completions = generate_unique_completions(clue, length,
                                                  num_completions=5
                                                  )
        for comp in completions:
            candidates[key].append(comp.upper())
            #print(key, comp.upper())

    # we have a generated dictionary for candidates. Now modify candidates to be valid for grid entry

    def pad_or_truncate(word, length, pad_char='X'):
        if len(word) > length:
            return word[:length]
        elif len(word) < length:
            return word + pad_char * (length - len(word))
        else:
            return word
    def filter_nonalpha(word):
        s = ''
        for ch in word:
            if ch.isalpha():
                s += ch.upper()
        return s

    # make dictionary mapping clue to length of answer
    correct_len_dict = {}
    for key, value in crossword.solution_dict.items():
        correct_len_dict[key] = len(value)

    for key, value in candidates.items():
        mod_guesses = []
        correct_len = correct_len_dict[key]
        for guess in candidates[key]:
            guess = filter_nonalpha(guess)
            if len(guess) == correct_len:
                mod_guesses.append(guess)
            else:
                mod_guesses.append(pad_or_truncate(guess, correct_len, pad_char='X'))
        candidates[key] = mod_guesses

    return candidates


def get_confidence_ratings(candidates, crossword):
    """

    """
    confidence_ratings = {}

    keys = list(crossword.across_clues.keys()) + list(crossword.down_clues.keys())

    # GET RATINGS
    for key in keys:
        prompt = crossword.clues[key]
        candidate_answers = candidates.get(key, [])

        # Only call biencoder if there are candidate answers
        if candidate_answers:
            confidence_ratings[key] = biencoder(prompt, candidate_answers)

    return confidence_ratings

def convert_answer(cw, n):
    all_coord = []
    for i in range(n):
        for j in range(n):
            all_coord.append((i, j))

    truth_solution = {}
    for coord in all_coord:
        if coord in cw.null_squares:
            truth_solution[coord] = '$'
        else:
            for clue in cw.solution_dict:
                answer = cw.solution_dict[clue]
                squares = cw.clue_to_positions[clue]
                for i, sq in enumerate(squares):
                    truth_solution[sq] = answer[i]

    return truth_solution



def letter_accuracy(guess, real, cw):
        count = 0
        for coord in guess:
            if coord not in cw.null_squares:
                if guess[coord] == real[coord]:
                    count += 1
        return(count / len(guess))

def word_accuracy(solution, cw):
    words_correct = 0
    for clue in cw.clue_to_positions:
        coords = cw.clue_to_positions[clue]
        letters_in_word = 0
        answer = cw.solution_dict[clue]
        sol = {}
        for i, coord in enumerate(coords):
            sol[coord] = answer[i]
        for coord in sol:
            if sol[coord] == solution[coord]:
                letters_in_word += 1
        if letters_in_word == len(answer):
            words_correct += 1
    return words_correct / len(cw.solution_dict)

def extract_word_predictions(y_hat, clue_to_positions, clue_to_solution):
    words = []
    # construct dictionary words mapping clue to prediction
    for clue in clue_to_positions:
        word = ''
        coords = clue_to_positions[clue]
        for coord in coords:
            guessed_letter = y_hat[coord]
            word += guessed_letter
        words.append((clue_to_solution[clue], word))

    return words


def plot_grid(solution, crossword, n, title="Grid"):
    """
    Plot a crossword grid using matplotlib, with '#' for black squares and letters.
    """
    fig, ax = plt.subplots(figsize=(n/2, n/2))
    # Draw cells
    for i in range(n):
        for j in range(n):
            x, y = j, n - 1 - i  # invert y for plotting
            if (i, j) in crossword.null_squares:
                rect = plt.Rectangle((x, y), 1, 1, facecolor='black')
            else:
                rect = plt.Rectangle((x, y), 1, 1, edgecolor='black', facecolor='white')
                letter = solution.get((i, j), '')
                ax.text(x + 0.5, y + 0.5, letter, ha='center', va='center', fontsize=12)
            ax.add_patch(rect)

    # grid lines
    ax.set_xticks(np.arange(0, n+1, 1))
    ax.set_yticks(np.arange(0, n+1, 1))
    ax.grid(True, color='black', linewidth=1)

    # limits, aspect, and invert y for top-left origin
    ax.set_xlim(0, n)
    ax.set_ylim(0, n)
    ax.set_aspect('equal')
    #ax.invert_yaxis()

    ax.set_title(title)
    plt.tight_layout()
    return fig, ax


def solve(filepath, index=None):
    """
    Give a filepath for a JSON crossword, solve it
    """
    print("Reading in crossword JSON ...")
    data = read_json(filepath)
    # create Crossword type using dictionary data that we loaded from json file
    n = int(math.sqrt(len(data['gridnums'])))
    crossword = Crossword(data)

    crossword.initialize(n)

    # load candidates and get ratings
    print("Using fine-tuned model to generate candidates ...")
    candidates = get_candidates(crossword)
    print("Candidates generates! Now running biencoder ...")
    confidence_ratings = get_confidence_ratings(candidates, crossword)

    variables = {}
    factors = {}

    print("Creating loopy belief propagation data structures ...")
    for row in range(n):
        for col in range(n):
            position = (row, col)
            if position not in crossword.null_squares:
                variables[position] = VariableNode(position)

    for clue, positions in crossword.clue_to_positions.items():
        #print(clue)
        #print(positions)
        length = len(position)
        factors[clue] = FactorNode(clue, length, candidates[clue], confidence_ratings[clue], BIGRAMS_DICT)

        for position in positions:
            if position not in crossword.null_squares:
                factors[clue].add_neighbor(variables[position])
                variables[position].add_neighbor(factors[clue])

    # Create the crossword object and run belief propagation
    print("Running loopy belief propagation ...")
    crossword_solver = CrosswordSolvingGrid(variables, factors)
    crossword_solver.run_belief_propagation(num_iterations=25)
    solution = crossword_solver.get_solution()
    print("Solution generated! Calculating accuracies ...")
    # convert true solution into form of our guess to compare
    truth_solution = convert_answer(crossword, n)
    # print out the solution
    letter_acc = letter_accuracy(solution, truth_solution, crossword)
    word_acc = word_accuracy(solution, crossword)
    word_pred = extract_word_predictions(solution, crossword.clue_to_positions, crossword.solution_dict) # get dict of word predictions

    print(f"Letter Accuracy: {letter_acc}")

    print(f"Word Accuracy: {word_acc}")

    print(f"Solution: {solution}")

    # Print predicted grid
    print("Predicted Grid:")
    for i in range(n):
        row_letters = []
        for j in range(n):
            if (i, j) in crossword.null_squares:
                row_letters.append('#')
            else:
                row_letters.append(solution[(i, j)])
        print(' '.join(row_letters))


    # Ensure plots directory exists
    plots_dir = "plots"
    os.makedirs(plots_dir, exist_ok=True)

    # Generate & save predicted grid
    fig1, _ = plot_grid(solution, crossword, n, title="Predicted Grid")
    fname1 = f"predicted_{index}.png" if index is not None else "predicted.png"
    fig1.savefig(os.path.join(plots_dir, fname1))
    plt.close(fig1)

    return letter_acc, word_acc, solution, word_pred





In [98]:
import os
import pandas as pd
#from Solver import solve

print(os.getcwd())

if __name__ == "__main__":
    letter_acc, word_acc, solution, word_pred = solve("merged_puzzle3.json", index=-99)




/content
Reading in crossword JSON ...
Using fine-tuned model to generate candidates ...
RATPACK
7
['RATPACK']
1A RATPACK
1A RATPACK
1A RATPACK
1A RATPACK
1A RATPACK
GALAGA
6
['GALAGA']
8A GALAGA
8A GALAGA
8A GALAGA
8A GALAGA
8A GALAGA
TURKISH
8
['TURKISH']
14A TURKISH
14A TURKISH
14A TURKISH
14A TURKISH
14A TURKISH
YESYES
6
['YESYES']
16A YESYES
16A YESYES
16A YESYES
16A YESYES
16A YESYES
CANBERRA
8
['CANBERRA']
17A CANBERRA
17A CANBERRA
17A CANBERRA
17A CANBERRA
17A CANBERRA
ROUNDED
6
['ROUNDED']
18A ROUNDED
18A ROUNDED
18A ROUNDED
18A ROUNDED
18A ROUNDED
NUTS
4
['NUTS']
19A NUTS
19A NUTS
19A NUTS
19A NUTS
19A NUTS
OLGA
4
['OLGA']
20A OLGA
20A OLGA
20A OLGA
20A OLGA
20A OLGA
PLAGUE
5
['PLAGUE']
22A PLAGUE
22A PLAGUE
22A PLAGUE
22A PLAGUE
22A PLAGUE
AVIAN
5
['AVIAN']
23A AVIAN
23A AVIAN
23A AVIAN
23A AVIAN
23A AVIAN
WOOF
4
['WOOF']
25A WOOF
25A WOOF
25A WOOF
25A WOOF
25A WOOF
PENTA
4
['PENTA']
27A PENTA
27A PENTA
27A PENTA
27A PENTA
27A PENTA
OWL
3
['OWL']
28A OWL
28A OWL
28A OWL
28A 

In [115]:
import os
import pandas as pd
#from Solver import solve

print(os.getcwd())

if __name__ == "__main__":
    letter_acc, word_acc, solution, word_pred = solve("merged_puzzle3.json", index=-99)




/content
Reading in crossword JSON ...
Using fine-tuned model to generate candidates ...
Candidates generates! Now running biencoder ...
Creating loopy belief propagation data structures ...
Running loopy belief propagation ...
Solution generated! Calculating accuracies ...
Letter Accuracy: 0.11794871794871795
Word Accuracy: 0.014285714285714285
Solution: {(0, 0): 'A', (0, 1): 'Y', (0, 2): 'O', (0, 3): 'T', (0, 4): 'A', (0, 5): 'H', (0, 6): 'O', (0, 9): 'A', (0, 10): 'R', (0, 11): 'M', (0, 12): 'O', (0, 13): 'R', (0, 14): 'Y', (1, 0): 'A', (1, 1): 'E', (1, 2): 'N', (1, 3): 'E', (1, 4): 'V', (1, 5): 'A', (1, 6): 'U', (1, 7): 'P', (1, 9): 'Y', (1, 10): 'E', (1, 11): 'S', (1, 12): 'N', (1, 13): 'O', (1, 14): 'T', (2, 0): 'A', (2, 1): 'S', (2, 2): 'E', (2, 3): 'X', (2, 4): 'A', (2, 5): 'H', (2, 6): 'T', (2, 7): 'E', (2, 9): 'T', (2, 10): 'E', (2, 11): 'A', (2, 12): 'L', (2, 13): 'E', (2, 14): 'D', (3, 0): 'A', (3, 1): 'Y', (3, 2): 'D', (3, 3): 'A', (3, 5): 'A', (3, 6): 'S', (3, 7): 'R', (3