<a href="https://colab.research.google.com/github/kaledai069/Crossword-Generator/blob/master/Backtracking_Feasible_%26_Infeasible_case_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Important Updates

In [127]:
import copy
import random
import heapq
import time
import pandas as pd
import requests
import json
import os

from PIL import Image
from collections import deque
from queue import PriorityQueue
from pprint import pprint
from tqdm import tqdm

### Crossword Variable & Back-tracking with Arc-Consistency

In [117]:

class Variable():

    ACROSS = "across"
    DOWN = "down"

    def __init__(self, i, j, direction, length):
        """Create a new variable with starting point, direction, and length."""
        self.i = i
        self.j = j
        self.direction = direction
        self.length = length
        self.cells = []
        for k in range(self.length):
            self.cells.append(
                (self.i + (k if self.direction == Variable.DOWN else 0),
                 self.j + (k if self.direction == Variable.ACROSS else 0))
            )

    def __hash__(self):
        return hash((self.i, self.j, self.direction, self.length))

    def __eq__(self, other):
        return (
            (self.i == other.i) and
            (self.j == other.j) and
            (self.direction == other.direction) and
            (self.length == other.length)
        )

    def __str__(self):
        return f"({self.i}, {self.j}) {self.direction} : {self.length}"

    def __repr__(self):
        direction = repr(self.direction)
        return f"Variable({self.i}, {self.j}, {direction}, {self.length})"

class Crossword():
    def __init__(self, grid, words_file, file_path = True):
        self.structure = []

        self.height = len(grid) # the number of rows in the grid
        self.width = len(grid[0]) # the number of columns in the grid
        for i in range(len(grid)):
            row = []
            for j in range(len(grid[0])):
                if grid[i][j] == '':
                  row.append(False)
                else:
                  row.append(True)
            self.structure.append(row)

        if not file_path:
            self.words = [word.upper() for word in words_file]

        else:
            # Save vocabulary list
            with open(words_file) as f:
                self.words = set(f.read().upper().splitlines()) # to remove all the duplicates
                self.words = list(self.words)
                for _ in range(5):
                    random.shuffle(self.words)
            self.words = set(self.words)

        # Determine variable set
        self.variables = set()

        for i in range(self.height):
            for j in range(self.width):

                # Vertical words
                starts_word = (
                    self.structure[i][j]
                    and (i == 0 or not self.structure[i - 1][j])
                )
                if starts_word:
                    length = 1
                    for k in range(i + 1, self.height):
                        if self.structure[k][j]:
                            length += 1
                        else:
                            break
                    if length > 1:
                        self.variables.add(Variable(
                            i=i, j=j,
                            direction=Variable.DOWN,
                            length=length
                        ))

                # Horizontal words
                starts_word = (
                    self.structure[i][j]
                    and (j == 0 or not self.structure[i][j - 1])
                )
                if starts_word:
                    length = 1
                    for k in range(j + 1, self.width):
                        if self.structure[i][k]:
                            length += 1
                        else:
                            break
                    if length > 1:
                        self.variables.add(Variable(
                            i=i, j=j,
                            direction=Variable.ACROSS,
                            length=length
                        ))

        # Compute overlaps for each word
        # For any pair of variables v1, v2, their overlap is either:
        #    None, if the two variables do not overlap; or
        #    (i, j), where v1's ith character overlaps v2's jth character
        self.overlaps = dict()
        for v1 in self.variables:
            for v2 in self.variables:
                if v1 == v2:
                    continue
                cells1 = v1.cells
                cells2 = v2.cells
                intersection = set(cells1).intersection(cells2)
                if not intersection:
                    self.overlaps[v1, v2] = None
                else:
                    intersection = intersection.pop()
                    self.overlaps[v1, v2] = (
                        cells1.index(intersection),
                        cells2.index(intersection)
                    )

    def neighbors(self, var):
        """Given a variable, return set of overlapping variables."""
        return set(
            v for v in self.variables
            if v != var and self.overlaps[v, var]
        )

class CrosswordCreator():

    def __init__(self, crossword):
        """
        Create new CSP crossword generate.
        """
        self.crossword = crossword
        self.ASSIGNMENT_COUNT = 0
        self.states = []
        # setting up the domains for each of the variables
        self.domains = {
            var: self.get_required_length_answers(var.length)
            for var in self.crossword.variables
        }

    # enforcing the node consistency here
    def get_required_length_answers(self, ans_length):
        output = []
        for word in self.crossword.words:
            if len(word) == ans_length:
                output.append(word)
        random.shuffle(output)
        # output = output[]
        return set(output)

    def letter_grid(self, assignment):
        """
        Return 2D array representing a given assignment.
        """
        letters = [
            [None for _ in range(self.crossword.width)]
            for _ in range(self.crossword.height)
        ]
        for variable, word in assignment.items():
            direction = variable.direction
            for k in range(len(word)):
                i = variable.i + (k if direction == Variable.DOWN else 0)
                j = variable.j + (k if direction == Variable.ACROSS else 0)
                letters[i][j] = word[k]
        return letters

    def print(self, assignment):
        """
        Print crossword assignment to the terminal.
        """
        letters = self.letter_grid(assignment)
        for i in range(self.crossword.height):
            for j in range(self.crossword.width):
                if self.crossword.structure[i][j]:
                    print(letters[i][j] or " ", end="")
                else:
                    print("██", end="")
            print()

    def save(self, assignment, filename):
        """
        Save crossword assignment to an image file.
        """
        from PIL import Image, ImageDraw, ImageFont
        cell_size = 100
        cell_border = 2
        interior_size = cell_size - 2 * cell_border
        letters = self.letter_grid(assignment)

        # Create a blank canvas
        img = Image.new(
            "RGBA",
            (self.crossword.width * cell_size,
             self.crossword.height * cell_size),
            "black"
        )
        font = ImageFont.truetype("/content/Roboto-Regular.ttf", 80)
        draw = ImageDraw.Draw(img)

        for i in range(self.crossword.height):
            for j in range(self.crossword.width):

                rect = [
                    (j * cell_size + cell_border,
                     i * cell_size + cell_border),
                    ((j + 1) * cell_size - cell_border,
                     (i + 1) * cell_size - cell_border)
                ]
                if self.crossword.structure[i][j]:
                    draw.rectangle(rect, fill="white")
                    if letters[i][j]:
                        _, _, w, h = draw.textbbox((0, 0), letters[i][j], font=font)
                        draw.text(
                            (rect[0][0] + ((interior_size - w) / 2),
                             rect[0][1] + ((interior_size - h) / 2) - 10),
                            letters[i][j], fill="black", font=font
                        )
        img.save(filename)

    ### here starts the main solving category
    def solve(self):
        """
        Enforce node and arc consistency, and then solve the CSP.
        """
        # self.enforce_node_consistency() # already being handled during initialization
        self.ac3()
        return self.backtrack(dict())

    def enforce_node_consistency(self):
        """
        Update `self.domains` such that each variable is node-consistent.
        (Remove any values that are inconsistent with a variable's unary
         constraints; in this case, the length of the word.)
        """
        for variable in self.crossword.variables:
            valid_words = set()
            for word in self.domains[variable]:
                if len(word) == variable.length:
                    valid_words.add(word)
            self.domains[variable] = valid_words

    def revise(self, x, y):
        """
            Make variable `x` arc consistent with variable `y`.
            To do so, remove values from `self.domains[x]` for which there is no
            possible corresponding value for `y` in `self.domains[y]`.

            Return True if a revision was made to the domain of `x`; return
            False if no revision was made.
        """
        revised = False
        overlap = self.crossword.overlaps[x, y]
        y_chars = {word[overlap[1]] for word in self.domains[y]}  # Precompute the y's second character set
        self.domains[x] = {word for word in self.domains[x] if word[overlap[0]] in y_chars}
        if len(self.domains[x]) < len(self.domains[y]):
            revised = True
        return revised

    def ac3(self, arcs=None):
        if arcs is None:
            arcs = deque([(v1, v2) for v1 in self.crossword.variables for v2 in self.crossword.neighbors(v1)])
        else:
            arcs = deque(arcs)

        while arcs:
            x, y = arcs.popleft()  # Efficient pop from the left
            if self.revise(x, y):
                if not self.domains[x]:
                    return False
                for z in self.crossword.neighbors(x) - {y}:
                    arcs.append((z, x))
        return True

    def assignment_complete(self, assignment):
        """
        Return True if `assignment` is complete (i.e., assigns a value to each
        crossword variable); return False otherwise.
        """
        self.ASSIGNMENT_COUNT += 1
        self.states.append(assignment)
        complete = True
        vars_in_assignment = set(var for var in assignment)
        # Checking if all vars in the crossword has been assigned
        if vars_in_assignment != self.crossword.variables:
            complete = False
        for var in assignment:
            # making sure no var is empty
            assert isinstance(assignment[var], str)
            if not assignment[var]:
                complete = False
        return complete

    # phind AI
    def consistent(self, assignment):
        """
          Return True if `assignment` is consistent (i.e., words fit in crossword
          puzzle without conflicting characters); return False otherwise.
        """
        values = set()
        for var, word in assignment.items():
            if word in values or len(word) != var.length:
                return False
            values.add(word)
            for neighbor in self.crossword.neighbors(var):
                overlap = self.crossword.overlaps[var, neighbor]
                if neighbor in assignment and assignment[var][overlap[0]] != assignment[neighbor][overlap[1]]:
                    return False
        return True

    def order_domain_values(self, var, assignment):
        """
        Return a list of values in the domain of `var`, in order by
        the number of values they rule out for neighboring variables.
        The first value in the list should be the one that rules out
        the fewest values among the neighbors of `var`.
        """
        values_penalty = {value: 0 for value in self.domains[var]}
        for neighbor in self.crossword.neighbors(var):
            if neighbor not in assignment:
                overlap = self.crossword.overlaps[var, neighbor]
                for value in self.domains[var]:
                    for value2 in self.domains[neighbor]:
                        if value[overlap[0]] != value2[overlap[1]]:
                            values_penalty[value] += 1

        # Use a priority queue (heap) for efficient retrieval of the least constraining values
        priority_queue = [(-values_penalty[value], value) for value in self.domains[var]]
        heapq.heapify(priority_queue)

        return [value for _, value in priority_queue]

    def select_unassigned_variable(self, assignment):
        """
        Ordering:
          Return an unassigned variable not already part of `assignment`.
          Choose the variable with the minimum number of remaining values - MRV
          in its domain. If there is a tie, choose the variable with the highest
          degree. If there is a tie, any of the tied variables are acceptable
          return values.
        """
        var_penalty = {}
        for var in self.crossword.variables:
            if var not in assignment:
                var_penalty[var] = len(self.domains[var])
        vars = sorted(var_penalty, key= lambda v: var_penalty[v])
        # if the two first variables have the same domain size
        if len(vars) > 1 and var_penalty[vars[0]] == var_penalty[vars[1]]:
            # Check number of neighbors and return highest degree
            if len(self.crossword.neighbors(vars[0])) < len(self.crossword.neighbors(vars[1])):
                return vars[1]
        return vars[0]

    # Modify the backtrack method
    def backtrack(self, assignment):
        """
        Using Backtracking Search, take as input a partial assignment for the
        crossword and return a complete assignment if possible to do so.

        `assignment` is a mapping from variables (keys) to words (values).

        If no assignment is possible, return None.
        """
        if self.assignment_complete(assignment):
            return assignment  # base case

        var = self.select_unassigned_variable(assignment)

        # for value in self.order_domain_values(var, assignment):

        for value in self.domains[var]:
            new_assignment = assignment.copy()  # or dict(assignment)
            new_assignment[var] = value
            if self.consistent(new_assignment):
                result = self.backtrack(new_assignment)
                if result is not None:
                    return result
        return None


In [18]:
word_list_path = '/content/all_answers.txt'

# 4x4 grid
grid = [['A', 'A', 'A', 'A'],
        ['A', 'A', 'A', 'A'],
        ['A', 'A', 'A', 'A'],
        ['A', 'A', 'A', 'A']]

start_t = time.time()
crossword = Crossword(grid, word_list_path)
creator = CrosswordCreator(crossword)
assignment = creator.solve()
end_t = time.time()

print("Total time taken: ", end_t - start_t)

Total time taken:  7.575305223464966


In [19]:
creator.save(assignment, '/content/output_4x4_3.png')

In [20]:
words = open("/content/all_answers.txt").read().splitlines()
for var, ans in assignment.items():
    if ans.lower() in words:
        print(ans)

GOLD
LOOK
AORA
ILRE
ELIM
EGAD
DKEY
MDAY


Let me try something out here, so, if the answers are within the list of the words to be made the domain of the variables, the backtracking would easily find the answers.

<b> 1. Get clues and their gold answers from NYT or somewhere <br>
<b> 2. Form a words dictionary of about 5000 words which would include those answers <br>
<b> 3. Test if the back-tracking would find the answers

In [82]:
def get_grid_answers(dateStr):

    headers = {
        'Referer': 'https://www.xwordinfo.com/JSON/'
    }
    # mm/dd/yyyy
    url = 'https://www.xwordinfo.com/JSON/Data.ashx?date=' + dateStr

    response = requests.get(url, headers=headers)

    answer_list = []
    two_d_grid = []
    grid_structure = []

    grid_data = {}
    if response.status_code == 200:
        bytevalue = response.content
        jsonText = bytevalue.decode('utf-8').replace("'", '"')
        grid_data = json.loads(jsonText)
        answer_list.extend(grid_data['answers']['across'])
        answer_list.extend(grid_data['answers']['down'])

        rows = cols = grid_data['size']['rows']
        initial_grid = grid_data['grid']

        for i in range(0, len(initial_grid), cols):
            two_d_grid.append(initial_grid[i:i+cols])

        for grid_row in two_d_grid:
            row = []
            for element in grid_row:
                if element == '.':
                    row.append('')
                else:
                    row.append('#')
            grid_structure.append(row)
        return answer_list, grid_structure
    else:
        print(f"Request failed with status code {response.status_code}.")
        return None

<b> Let's prepare a custom answer list

In [58]:
words = open("/content/all_answers.txt").read().splitlines()
words_df = pd.DataFrame(words, columns = ['answer'])
words_df['answer_len'] = words_df['answer'].apply(len)
words_df.head(5)

Unnamed: 0,answer,answer_len
0,taxpro,6
1,warm,4
2,decaf,5
3,mussel,6
4,aaamap,6


In [57]:
def get_answers_by_len(answers_df, answer_len, no_samples = 100):
    grouped_by_len = answers_df.groupby('answer_len')
    out_answers_list = grouped_by_len.get_group(answer_len)['answer'].tolist()
    random.shuffle(out_answers_list)
    return out_answers_list[:no_samples]

In [217]:
date_str = "1/14/2024"
answer_list, grid = get_grid_answers(date_str)

if answer_list is not None:
    answer_len_ = set()
    for answer in answer_list:
        answer_len_.add(len(answer))

    custom_answer_list = []
    for ans_len in answer_len_:
        custom_answer_list.extend(get_answers_by_len(words_df, ans_len, 85))

    print('Length of Custom Answer List before addition: ', len(custom_answer_list))

    custom_answer_list.extend(answer_list)
    print("Length of Custom Answer List after addition: ", len(custom_answer_list))

Length of Custom Answer List before addition:  850
Length of Custom Answer List after addition:  986


Let's now test the hypothesis

In [None]:
start_t = time.time()
crossword = Crossword(grid, custom_answer_list, False)
creator = CrosswordCreator(crossword)
assignment = creator.solve()
end_t = time.time()

print("Total time taken: ", end_t - start_t)
print(type(assignment))

In [165]:
words = open("/content/all_answers.txt").read().splitlines()
print(len(answer_list))

counter = 0
for ans in answer_list:
    if ans.lower() in words:
        counter += 1
counter

66


66

In [133]:
for i, state in tqdm(enumerate(creator.states), ncols = 70):
    creator.save(state, f"/content/Images 4x4/output_4x4_{i}.png")

11it [00:00, 38.46it/s]


In [102]:
creator.save(assignment, '/content/output_21x21_1.png')

<b> Lets make a gif out of this

In [135]:
def sort_by_last_digit(filename):
    last_digit = int(filename.split('_')[-1].split('.')[0])
    return last_digit

def create_gif(images_folder, output_gif_path):
    image_files = sorted([f for f in os.listdir(images_folder) if f.endswith(('.png', '.jpg', '.jpeg', '.gif'))])
    image_files = sorted(image_files, key = sort_by_last_digit)

    images = []
    for image_file in tqdm(image_files, ncols = 50):
        image_path = os.path.join(images_folder, image_file)
        img = Image.open(image_path)
        images.append(img)

    images[0].save(output_gif_path, save_all=True, append_images=images[1:], duration = 400, loop = 0)

images_folder = "/content/Images 4x4"
output_gif_path = "/content/output 4x4.gif"

create_gif(images_folder, output_gif_path)

100%|███████████| 11/11 [00:00<00:00, 2634.01it/s]
