In [1]:
import scipy
from sklearn.neighbors import KDTree
from scipy.spatial import distance 
import numpy as np
from numpy.random import default_rng
import pandas as pd
from tqdm import tqdm
from collections import defaultdict
import random
import copy
import functools

In [2]:
from nltk.corpus import words
from nltk.corpus import wordnet 

ModuleNotFoundError: No module named 'nltk'

In [None]:
words = list(wordnet.words())

def is_word(word):
    # NOTE: Removing hyphenated words too.
    return (
        not any(c in word for c in '-_. \'"0123456789,<>!@#$%^&*({[]})')
    )
print(len(words))

words = [w for w in words if is_word(w)]
print(len(words))
# Also convert everything to lower case... Maybe I don't want place names though..??
words = { w.lower() for w in words }
print(len(words))

In [None]:
words_by_length = defaultdict(list)

for w in words:
    words_by_length[len(w)].append(w)

In [None]:
# TODO: Memoize
@functools.lru_cache(maxsize=None)
def words_from_constraint(length, position, letter):
    return { w for w in words_by_length[length] if w[position] == letter }

def words_from_constraints(length, constraints):
    if not constraints:
        return words_by_length[length]
    
    sets = [words_from_constraint(length, position, letter)
            for position, letter in constraints]
    
    return set.intersection(*sets)


words_from_constraints(5, [(0, 'a'), (3, 'b')])

In [None]:
# A word is just a list of positions that're covered by that word in the grid.
# Could store 'position', 'direction' but...

# Generate all words in a grid

size_x = 5  # Width
size_y = 5

# | 0,0 | 1,0 | ..
# | 0,1 | 1,1 | ..
#   ..    ..

words = []

for i in range(size_x):
    words.append(
        [(i, j) for j in range(size_y)]
    )
    
for j in range(size_y):
    words.append(
        [(i, j) for i in range(size_x)]
    )
    
random.shuffle(words)

words

In [None]:
def nice_print(grid):
    print('+-' * len(grid[0]) + '+')
    for row in grid:
        print('|' + '|'.join(row) + '|')
        print('+-' * len(row) + '+')
        
    print()

In [None]:
grid = [[' ' for i in range(size_x)] for j in range(size_y)]
# NOTE: Accessed with grid[y][x]

node_tracker = defaultdict(int)

def dfs(depth):
    node_tracker[depth] += 1
    
    if depth >= len(words):
        yield copy.deepcopy(grid)
        return
        
    # Build constraints for this word.
    new_letters = []
    constraints = []
    for ix, (i,j) in enumerate(words[depth]):
        c = grid[j][i]
        if c == ' ':
            new_letters.append((ix, i, j))
        else:
            constraints.append((ix, c))

#     print(new_letters, constraints, ix)

    # Find words.
    possible_words = words_from_constraints(ix+1, constraints)
    #print('found:', len(possible_words), 'possible words')
    #print(ix+1, constraints)
    for w in possible_words:
        # Insert word into grid...
        for ix, i, j in new_letters:
            grid[j][i] = w[ix]
            
        # Recurse
        for ans in dfs(depth + 1):
            yield ans
        
        # Remove word from grid
        for ix, i, j in new_letters:
            grid[j][i] = ' '
      
x = dfs(0)
for i in range(100):
    nice_print(next(x))

In [None]:
node_tracker  

In [None]:
# +-+-+-+-+-+
# |h|a|r|s|h|
# +-+-+-+-+-+
# |a|m|a|t|i|
# +-+-+-+-+-+
# |l|i|d|a|r|
# +-+-+-+-+-+
# |a|g|i|l|e|
# +-+-+-+-+-+
# |b|o|x|e|r|
# +-+-+-+-+-+