In [3]:
import numpy as np
import pandas as pd
from pathlib import Path
from collections import defaultdict
import copy

In [4]:
dict_path = Path(Path.cwd(),'repos/word_squares/top10k.txt')
words = Path(dict_path).open('r').read().split('\n')
words = [word.lower() for word in words]
print(f'{len(words)} | {words[:10]}')

9999 | ['you', 'i', 'to', 'the', 'and', 'that', 'of', 'me', 'in', 'this']


In [5]:
def add_word_to_tree(tree, word):
    if len(word) > 0:
        try:
            sub_tree = tree[word[0]]
        except:
            sub_tree = defaultdict(lambda: defaultdict())
        tree[word[0]] = add_word_to_tree(sub_tree, word[1:])
        return tree
    else:
        return defaultdict(lambda: defaultdict())


In [6]:
char_tree = defaultdict(lambda: defaultdict())

for word in words:
    char_tree = add_word_to_tree(char_tree, word)


In [7]:
char_tree['t']['h']['i']

defaultdict(None,
            {'s': defaultdict(<function __main__.add_word_to_tree.<locals>.<lambda>()>,
                         {}),
             'n': defaultdict(<function __main__.add_word_to_tree.<locals>.<lambda>()>,
                         {'k': defaultdict(None,
                                      {'i': defaultdict(<function __main__.add_word_to_tree.<locals>.<lambda>()>,
                                                   {'n': defaultdict(None,
                                                                {'g': defaultdict(<function __main__.add_word_to_tree.<locals>.<lambda>()>,
                                                                             {})})})}),
                          'g': defaultdict(None,
                                      {'y': defaultdict(<function __main__.add_word_to_tree.<locals>.<lambda>()>,
                                                   {})}),
                          'n': defaultdict(None,
                                      {'

# Make Word Squares

In [228]:
def get_loc_from_index(i, n):
    x = i % n
    y = int(i / n)
    return [x, y]

def get_index_from_loc(loc, n):
    x, y = loc
    return y * n + x

def get_possible_chars(sq, loc, char_tree):
    x, y = loc
    partial_word1 = sq[x, :y]
    partial_word2 = sq[:x, y]

    options1 = get_possible_chars_from_partial_word(partial_word1, char_tree)
    options2 = get_possible_chars_from_partial_word(partial_word2, char_tree)
    return  options1.intersection(options2)

def get_possible_chars_from_partial_word(partial_word, char_tree):
    t = copy.copy(char_tree)
    for char in partial_word:
        if char in t.keys():
            t = t[char]
        else:
            t = {}
    return set(t.keys())

def get_char_tree(n):
    char_tree = defaultdict(lambda: defaultdict())
    for word in words:
        if len(word) == n:
            char_tree = add_word_to_tree(char_tree, word)
    return char_tree


class Square():
    def __init__(self, sq):
        self.sq = sq
        self.symmetry_score = self.get_symmetry()
    
    def __eq__(self, other): 
        if np.all(self.sq == other.sq) or np.all(self.sq == other.sq.T): 
            return True
        else: 
            return False
    
    def __ge__(self, other):
        return (not __lt__(self, other) and not __eq__(self, other))
    
    def __lt__(self, other):
        return np.mean(self.sq < other.sq) > 0.5
    
    def __hash__(self):
        sq = self.sq if np.mean(self.sq > self.sq.T) > 0.5 else self.sq.T
        return hash(sq.tostring())
    
    def __str__(self):
        array_string = ''
        for line in self.sq:
            for char in line:
                array_string += f'{char} '
            array_string += f'\n'
        return array_string
    
    def get_symmetry(self):
        return np.mean(self.sq == self.sq.T)


def get_partial_squares(sq, i, char_tree):
    sq = copy.copy(sq)
    n = sq.shape[0]
    loc = get_loc_from_index(i, n)
    x, y = loc
    possible_chars = get_possible_chars(sq, loc, char_tree)

    sqs = []

    if len(possible_chars) == 0:
        return []

    for char in possible_chars:
        sq[x, y] = char

        if i < n**2 - 1:
            sqs.extend(get_partial_squares(sq, i + 1, char_tree))
        else:
            sqs.extend([Square(sq)])

    return sqs

def get_squares(n):

    char_tree = get_char_tree(n)
    
    sq = np.chararray((n, n))
    sq.fill('')
    sq = sq.astype('<U1')

    _start = time.time()

    sqs = get_partial_squares(sq, 0, char_tree)

    print("Took {1} seconds".format(count, 
            (time.time() - _start)))

    return sqs

def get_unique_sqs(sqs):
    return list(set(sqs))

def print_sqs(sqs):
    for sq in sqs:
        print(sq)

In [231]:
x = get_squares(5)
unique_sqs = get_unique_sqs(x)
unique_sqs.sort(key=lambda x: x.symmetry_score, reverse=False)
print(len(unique_sqs))

Took 43.79590630531311 seconds
573


In [233]:
print_sqs(unique_sqs[:2])

c r a n e 
h a v e n 
a v e r t 
s e r v e 
e n t e r 

l a t h e 
a g a i n 
p a i n t 
s i n g e 
e n t e r 



In [232]:
def get_loc_from_index(i, n):
    x = i % n
    y = int(i / n)
    return [x, y]

def get_index_from_loc(loc, n):
    x, y = loc
    return y * n + x

def get_possible_chars(sq, loc, char_tree):
    x, y = loc
    partial_word1 = sq[x, :y]
    partial_word2 = sq[:x, y]

    options1 = get_possible_chars_from_partial_word(partial_word1, char_tree)
    options2 = get_possible_chars_from_partial_word(partial_word2, char_tree)
    return  options1.intersection(options2)

def get_possible_chars_from_partial_word(partial_word, char_tree):
    t = copy.copy(char_tree)
    for char in partial_word:
        if char in t.keys():
            t = t[char]
        else:
            t = {}
    return set(t.keys())

def get_char_tree(n):
    char_tree = defaultdict(lambda: defaultdict())
    for word in words:
        if len(word) == n:
            char_tree = add_word_to_tree(char_tree, word)
    return char_tree


class Square():
    def __init__(self, sq):
        self.sq = sq
        self.symmetry_score = self.get_symmetry()
    
    def __eq__(self, other): 
        if np.all(self.sq == other.sq) or np.all(self.sq == other.sq.T): 
            return True
        else: 
            return False
    
    def __ge__(self, other):
        return (not __lt__(self, other) and not __eq__(self, other))
    
    def __lt__(self, other):
        return np.mean(self.sq < other.sq) > 0.5
    
    def __hash__(self):
        sq = self.sq if np.mean(self.sq > self.sq.T) > 0.5 else self.sq.T
        return hash(sq.tostring())
    
    def __str__(self):
        array_string = ''
        for line in self.sq:
            for char in line:
                array_string += f'{char} '
            array_string += f'\n'
        return array_string
    
    def get_symmetry(self):
        return np.mean(self.sq == self.sq.T)

def get_partial_squares(sq, i):
    sq = copy.copy(sq)
    n = sq.shape[0]
    loc = get_loc_from_index(i, n)
    x, y = loc
    possible_chars = get_possible_chars(sq, loc, char_tree)

    sqs = []

    if len(possible_chars) == 0:
        return []

    for char in possible_chars:
        sq[x, y] = char

        if i < n**2 - 1:
            sqs.extend(get_partial_squares(sq, i + 1))
        else:
            sqs.extend([Square(sq)])

    return sqs

def get_parallel_partial_squares(sq, i, done_list):
    sq = copy.copy(sq)
    n = sq.shape[0]
    loc = get_loc_from_index(i, n)
    x, y = loc
    possible_chars = get_possible_chars(sq, loc, char_tree)

    sqs = []

    if len(possible_chars) == 0:
        return []

    for char in possible_chars:
        sq[x, y] = char

        sqs = get_partial_squares(sq, i + 1)
        done_list.extend(sqs)


def get_squares_parallel(n):
    with Manager() as manager:

        done_list = manager.list() 

        global char_tree
        char_tree = get_char_tree(n)
        
        sq = np.chararray((n, n))
        sq.fill('')
        sq = sq.astype('<U1')

        loc = get_loc_from_index(0, n)
        x, y = loc

        possible_chars = get_possible_chars(sq, loc, char_tree)

        procs = []
        _start = time.time()

        for char in possible_chars:
            sq = copy.copy(sq)
            sq[x, y] = char
            i = 1

            # print(name)
            proc = Process(target=get_parallel_partial_squares, args=(sq, i, done_list))
            proc.daemon = True
            procs.append(proc)
            proc.start()
            sleep(0.01)     

        # complete the processes
        for proc in procs:
            proc.join()

        print("Took {1} seconds".format(count, 
            (time.time() - _start)))
    
        return list(done_list)


def get_unique_sqs(sqs):
    return list(set(sqs))

def print_sqs(sqs):
    for sq in sqs:
        print(sq)

In [234]:
x = get_squares_parallel(5)
unique_sqs = get_unique_sqs(x)
unique_sqs.sort(key=lambda x: x.symmetry_score, reverse=False)
print(len(unique_sqs))

Took 14.10971975326538 seconds
573


In [135]:
def get_char_tree(n):
    char_tree = defaultdict(lambda: defaultdict())
    for word in words:
        if len(word) == n:
            char_tree = add_word_to_tree(char_tree, word)
    return char_tree


def get_squares(n, processes = 6):
    with Manager() as manager:

        global char_tree
        char_tree = get_char_tree(n)
        
        sq = np.chararray((n, n)).astype('<U1')
        sq.fill('')

        work_queue = Queue()
        done_list = manager.list() 

        work_queue.put((0, sq))


        procs = []
        _start = time.time()

        for i in range(processes):
            # print(name)
            proc = Process(target=get_partial_squares, args=(work_queue, done_list))
            proc.daemon = True
            procs.append(proc)
            proc.start()
            sleep(0.01)     

            
        # complete the processes
        for proc in procs:
            proc.join()

        print("Took {1} seconds".format(count, 
            (time.time() - _start)))
    
        return list(done_list)


def get_partial_squares(work_queue, done_list):

    while not work_queue.empty():
        i, sq = work_queue.get()

        sq = copy.copy(sq)
        n = sq.shape[0]
        loc = get_loc_from_index(i, n)
        x, y = loc
        possible_chars = get_possible_chars(sq, loc, char_tree)

        sqs = []

        if len(possible_chars) == 0:
            continue

        for char in possible_chars:
            sq[x, y] = char

            if i < n**2 - 1:
                work_queue.put((i + 1, sq))
            else:
                done_list.extend([Square(sq)])
        
        sleep(0.000001)


In [137]:
sqs = get_squares(3)

KeyboardInterrupt: 

In [123]:
sqs

[<__main__.Square at 0x7f3da1680210>,
 <__main__.Square at 0x7f3da2d08310>,
 <__main__.Square at 0x7f3da16808d0>,
 <__main__.Square at 0x7f3da1680f90>,
 <__main__.Square at 0x7f3da1680dd0>,
 <__main__.Square at 0x7f3da1680a90>,
 <__main__.Square at 0x7f3da1680d10>,
 <__main__.Square at 0x7f3da16805d0>,
 <__main__.Square at 0x7f3da1680bd0>,
 <__main__.Square at 0x7f3da1680890>,
 <__main__.Square at 0x7f3da16801d0>,
 <__main__.Square at 0x7f3da16807d0>,
 <__main__.Square at 0x7f3da16800d0>,
 <__main__.Square at 0x7f3da2d08a90>,
 <__main__.Square at 0x7f3da1680d50>,
 <__main__.Square at 0x7f3da1680390>,
 <__main__.Square at 0x7f3da2d086d0>,
 <__main__.Square at 0x7f3da16804d0>,
 <__main__.Square at 0x7f3da1680710>,
 <__main__.Square at 0x7f3da1680cd0>,
 <__main__.Square at 0x7f3da1680990>,
 <__main__.Square at 0x7f3da318b190>,
 <__main__.Square at 0x7f3da318b210>,
 <__main__.Square at 0x7f3da318be10>,
 <__main__.Square at 0x7f3da318b810>,
 <__main__.Square at 0x7f3da318b8d0>,
 <__main__.S

In [124]:
unique_sqs = get_unique_sqs(x)
print(len(unique_sqs))
unique_sqs.sort(key=lambda x: x.symmetry_score, reverse=False)
print_sqs(unique_sqs)

25
i s 
n o 

a s 
n o 

a t 
n o 

a t 
s o 

i t 
s o 

i n 
s o 

i s 
t o 

a n 
s o 

i n 
t o 

a n 
t o 

a s 
t o 

i t 
n o 

o n 
n o 

i n 
n o 

i t 
t o 

u s 
s o 

n o 
o r 

t o 
o r 

d o 
o r 

a s 
s o 

i s 
s o 

a n 
n o 

g o 
o r 

s o 
o r 

a t 
t o 



In [20]:
from multiprocessing import Process, Queue, Manager
import time
import sys

def reader_proc(queue):
    ## Read from the queue; this will be spawned as a separate Process
    while True:
        msg = queue.get()         # Read from the queue and do nothing
        if (msg == 'DONE'):
            break

def writer(count, queue):
    ## Write to the queue
    for ii in range(0, count):
        queue.put(ii)             # Write 'count' numbers into the queue
    queue.put('DONE')


pqueue = Queue() # writer() writes to pqueue from _this_ process
for count in [10**4, 10**5]:             
    ### reader_proc() reads from pqueue as a separate process
    reader_p = Process(target=reader_proc, args=((pqueue),))
    reader_p.daemon = True
    reader_p.start()        # Launch reader_proc() as a separate python process

    _start = time.time()
    writer(count, pqueue)    # Send a lot of stuff to reader()
    reader_p.join()         # Wait for the reader to finish
    print("Sending {0} numbers to Queue() took {1} seconds".format(count, 
        (time.time() - _start)))

Sending 10000 numbers to Queue() took 0.0828714370727539 seconds
Sending 100000 numbers to Queue() took 0.7374887466430664 seconds


In [101]:
char_tree = defaultdict(lambda: defaultdict())

for word in words:
    if len(word) == n:
        char_tree = add_word_to_tree(char_tree, word)


In [102]:
for i, char in enumerate(list(char_tree.keys())[:n**2]):
    loc = get_loc_from_index(i, n)
    x, y = loc
    sq[x, y] = char
sq

array([['t', 'w', 'f', 'd'],
       ['h', 'c', 's', 'v'],
       ['j', 'g', 'm', 'l'],
       ['y', 'b', 'o', 'e']], dtype='<U1')

In [103]:
x = [['i','t','e','m'],
    ['t','i','m','e'],
    ['e','m','i','t'],
    ['m','e','t','a']]
sq = np.array(x)
sq

array([['i', 't', 'e', 'm'],
       ['t', 'i', 'm', 'e'],
       ['e', 'm', 'i', 't'],
       ['m', 'e', 't', 'a']], dtype='<U1')

In [104]:
loc = [2, 1]
get_possible_chars(sq, loc)

TypeError: get_possible_chars() missing 1 required positional argument: 'char_tree'

In [105]:

x, y = loc
partial_word1 = sq[x, :y]
partial_word2 = sq[:x, y]
print(sq[x,y], partial_word1, partial_word2)

m ['e'] ['t' 'i']


In [106]:
char_tree

in__.add_word_to_tree.<locals>.<lambda>()>,
                                                                {}),
                                                    'g': defaultdict(<function __main__.add_word_to_tree.<locals>.<lambda>()>,
                                                                {}),
                                                    'm': defaultdict(<function __main__.add_word_to_tree.<locals>.<lambda>()>,
                                                                {})}),
                                       'e': defaultdict(None,
                                                   {'a': defaultdict(<function __main__.add_word_to_tree.<locals>.<lambda>()>,
                                                                {})}),
                                       'o': defaultdict(None,
                                                   {'t': defaultdict(<function __main__.add_word_to_tree.<locals>.<lambda>()>,
                                            