In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
from collections import defaultdict
import copy

from multiprocessing import Process, Queue, Manager
import time
import sys
import re

import importlib.util
spec = importlib.util.spec_from_file_location("word_squares", Path(Path.cwd(), "repos/word_squares/word_squares.py").as_posix())
ws = importlib.util.module_from_spec(spec)
spec.loader.exec_module(ws)

In [None]:
dict_path = Path(Path.cwd(),'repos/word_squares/top10k.txt')
dict_path = Path(Path.cwd(),'repos/word_squares/12dict/American/2of12inf.txt')

words = Path(dict_path).open('r').read().split('\n')
words = [word.lower() for word in words]
print(f'{len(words)} | {words[:10]}')

In [None]:
word_lengths = defaultdict(lambda: 0)
for word in words:
    word_lengths[len(word)] += 1

for i in range(np.max(list(word_lengths.keys()))):
    l = word_lengths[i]
    print(f'{i}: {l}')


In [None]:
char_tree = defaultdict(lambda: defaultdict())

for word in words:
    char_tree = ws.add_word_to_tree(char_tree, word)


In [None]:
char_tree['t']

In [None]:
words[:10]

# Make Word Squares

In [None]:
def add_word_to_tree(tree, word):
    if len(word) > 0:
        try:
            sub_tree = tree[word[0]]
        except:
            sub_tree = defaultdict(lambda: defaultdict())
        tree[word[0]] = add_word_to_tree(sub_tree, word[1:])
        return tree
    else:
        return defaultdict(lambda: defaultdict())


def get_loc_from_index(i, n):
    x = i % n
    y = int(i / n)
    return [x, y]

def get_index_from_loc(loc, n):
    x, y = loc
    return y * n + x

def get_possible_chars(sq, loc, char_tree):
    x, y = loc
    partial_word1 = sq[x, :y]
    partial_word2 = sq[:x, y]

    options1 = get_possible_chars_from_partial_word(partial_word1, char_tree)
    options2 = get_possible_chars_from_partial_word(partial_word2, char_tree)
    return  options1.intersection(options2)

def get_possible_chars_from_partial_word(partial_word, char_tree):
    t = copy.copy(char_tree)
    for char in partial_word:
        if char in t.keys():
            t = t[char]
        else:
            t = {}
    return set(t.keys())

def get_char_tree(n):
    char_tree = defaultdict(lambda: defaultdict())
    count = 0
    for word in words:
        if len(word) == n:
            count += 1
            char_tree = add_word_to_tree(char_tree, word)
    print(f'Made char tree with depth {n} from {count} words')
    char_tree['info'] = {'num_words':count, 'depth':n}
    return char_tree


class Square():
    def __init__(self, sq):
        self.sq = copy.copy(sq)
        self.symmetry_score = self.get_symmetry()
    
    def __eq__(self, other): 
        if np.all(self.sq == other.sq) or np.all(self.sq == other.sq.T): 
            return True
        else: 
            return False
    
    def __ge__(self, other):
        return (not __lt__(self, other) and not __eq__(self, other))

    def __lt__(self, other):
        return np.mean(self.sq < other.sq) > 0.5
    
    def __hash__(self):
        sq = self.sq if np.mean(self.sq.tostring() > self.sq.T.tostring()) > 0.5 else self.sq.T
        return hash(sq.tostring())
    
    def __str__(self):
        array_string = ''
        for line in self.sq:
            for char in line:
                array_string += f'{char} '
            array_string += f'\n'
        return array_string
    
    def get_symmetry(self):
        return np.mean(self.sq == self.sq.T)

def get_partial_squares(sq, limit=None):

    sq = copy.copy(sq)
    n = sq.shape[0]
    i = np.sum(sq != '')

    if limit is None:
        limit = n**2 - 1

    loc = get_loc_from_index(i, n)
    x, y = loc
    possible_chars = get_possible_chars(sq, loc, char_tree)

    sqs = []

    if len(possible_chars) == 0:
        return []

    for char in possible_chars:

        sq[x, y] = char

        if i < limit:
            sqs.extend(get_partial_squares(sq))
        else:
            sqs.extend([Square(sq)])
    
    return sqs

# def get_partial_squares(sq):
#     sq = copy.copy(sq)
#     i = np.sum(sq != '')
#     n = sq.shape[0]
#     loc = get_loc_from_index(i, n)
#     x, y = loc
#     possible_chars = get_possible_chars(sq, loc, char_tree)

#     sqs = []

#     if len(possible_chars) == 0:
#         return []

#     for char in possible_chars:
#         sq[x, y] = char

#         if i < n**2 - 1:
#             sqs.extend(get_partial_squares(sq))
#         else:
#             sqs.extend([Square(sq)])

#     return sqs

def get_squares(n):
    
    global char_tree
    char_tree = get_char_tree(n)
    
    sq = np.chararray((n, n))
    sq.fill('')
    sq = sq.astype('<U1')

    _start = time.time()

    sqs = get_partial_squares(sq)

    print("Took {0} seconds".format((time.time() - _start)))

    return sqs, char_tree

def get_unique_sqs(sqs):
    return list(set(sqs))

def print_sqs(sqs):
    for sq in sqs:
        print(sq)


def get_parallel_partial_squares(sq, done_list):

    sq = copy.copy(sq)
    n = sq.shape[0]
    i = np.sum(sq != '')

    loc = get_loc_from_index(i, n)
    x, y = loc
    possible_chars = get_possible_chars(sq, loc, char_tree)

    sqs = []

    if len(possible_chars) == 0:
        return []

    for char in possible_chars:
        sq[x, y] = char

        sqs = get_partial_squares(sq)
        done_list.extend(sqs)
    
    return done_list

def get_squares_parallel(n, max_processes = 80):
    with Manager() as manager:

        done_list = manager.list() 

        global char_tree
        char_tree = get_char_tree(n)
        
        sq = np.chararray((n, n))
        sq.fill('')
        sq = sq.astype('<U1')

        processes_level = 0
        possible_chars = get_possible_chars(sq, [0,0], char_tree)
        partial_sqs = get_partial_squares(sq, limit=processes_level)
        
        procs = []
        _start = time.time()

        print(f'Starting {len(partial_sqs)} processes')
        for sq in partial_sqs:
          
            # print(name)
            proc = Process(target=get_parallel_partial_squares, args=(sq.sq, done_list))
            proc.daemon = True
            procs.append(proc)
            proc.start()

        # complete the processes
        for proc in procs:
            proc.join()

        print("Took {0} seconds".format((time.time() - _start)))
    
        return list(done_list), char_tree

def sqs2txt(sqs):
    txt = ''
    for i, sq in enumerate(sqs):
        txt += f'\n{i}:\n'
        txt += str(sq)
    return txt

def txt2sq(txt):
    rows = txt.split('\n')

    clean_rows = []
    for row in rows:
        if len(row) > 0:
            clean_row = row.split(' ')[:-1]
            if len(clean_row) > 0:
                clean_rows.append(clean_row)

    return Square(np.array(clean_rows))
    
def get_path_from_vars(sqs, words, n, note=''):
    if len(note) > 0:
        note = f'_note:{note}'
    name = f'repos/word_squares/squares/dict-len={len(words)}_n={n}_uniq={len(sqs)}{note}.txt'
    path = Path(Path.cwd(), name)
    return path

def save_sqs_from_path(sqs, path):
    print(f'Writting {len(sqs)} squares to: {path}')
    with path.open("w") as text_file:
        n = text_file.write(sqs2txt(sqs))

def load_sqs_from_file(path):
    with path.open("r") as text_file:
        sqs_string = text_file.read()

    loaded_sqs = []
    for sq_string in re.split('\n*.:', sqs_string):
        if len(sq_string) > 0:
            loaded_sqs.append(txt2sq(sq_string))
    print(f'Loaded {len(loaded_sqs)} Squares from: {path}')
    return loaded_sqs

def save_with_load_check(sqs_to_write, words, n, note=''):
    path = get_path_from_vars(sqs_to_write, words, n, note=note)
    save_sqs_from_path(sqs_to_write, path)
    loaded_sqs = load_sqs_from_file(path)

    assert(loaded_sqs == sqs_to_write)
    print('Load check succesful!')

save_with_load_check(unique_sqs[0:100], words, n, note='test')


In [None]:
x = get_squares(3)
print(f'Total squares found {len(x)}')
unique_sqs, char_tree = get_unique_sqs(x)
unique_sqs.sort(key=lambda x: x.symmetry_score, reverse=False)
print(f'Total unique squares {len(unique_sqs)}')


In [None]:
print_sqs(unique_sqs[:10])

In [None]:
save_with_load_check(unique_sqs, words, n, note='seq')

# First level parallel

In [None]:
n = 5
x,char_tree  = get_squares_parallel(n)
print(f'Total squares found {len(x)}')
unique_sqs = get_unique_sqs(x)
unique_sqs.sort(key=lambda x: x.symmetry_score, reverse=False)
print(f'Total unique squares {len(unique_sqs)}')
total_combinations = char_tree['info']['num_words']**char_tree['info']['depth'] / 2
print(f'Total Combinations: {total_combinations :e} | Percent of Total: {100 * len(unique_sqs) / total_combinations :e}')

In [None]:
print_sqs(unique_sqs[:10])

In [None]:
save_with_load_check(unique_sqs, words, n, note='parallel')

In [None]:
temp_words = []
for word in words:
    if len(word) == 19:
        temp_words.append(word)

temp_words

In [None]:
words = all_words

# All Parallel

In [None]:
def get_char_tree(n):
    char_tree = defaultdict(lambda: defaultdict())
    for word in words:
        if len(word) == n:
            char_tree = add_word_to_tree(char_tree, word)
    return char_tree


def get_squares(n, processes = 6):
    with Manager() as manager:

        global char_tree
        char_tree = get_char_tree(n)
        
        sq = np.chararray((n, n)).astype('<U1')
        sq.fill('')

        work_queue = Queue()
        done_list = manager.list() 

        work_queue.put((0, sq))


        procs = []
        _start = time.time()

        for i in range(processes):
            # print(name)
            proc = Process(target=get_partial_squares, args=(work_queue, done_list))
            proc.daemon = True
            procs.append(proc)
            proc.start()
            sleep(0.01)     

            
        # complete the processes
        for proc in procs:
            proc.join()

        print("Took {1} seconds".format(count, 
            (time.time() - _start)))
    
        return list(done_list)


def get_partial_squares(work_queue, done_list):

    while not work_queue.empty():
        i, sq = work_queue.get()

        sq = copy.copy(sq)
        n = sq.shape[0]
        loc = get_loc_from_index(i, n)
        x, y = loc
        possible_chars = get_possible_chars(sq, loc, char_tree)

        sqs = []

        if len(possible_chars) == 0:
            continue

        for char in possible_chars:
            sq[x, y] = char

            if i < n**2 - 1:
                work_queue.put((i + 1, sq))
            else:
                done_list.extend([Square(sq)])
        
        sleep(0.000001)


In [None]:
sqs = get_squares(3)

In [None]:
sqs

In [None]:
unique_sqs = get_unique_sqs(x)
print(len(unique_sqs))
unique_sqs.sort(key=lambda x: x.symmetry_score, reverse=False)
print_sqs(unique_sqs)

In [None]:

def reader_proc(queue):
    ## Read from the queue; this will be spawned as a separate Process
    while True:
        msg = queue.get()         # Read from the queue and do nothing
        if (msg == 'DONE'):
            break

def writer(count, queue):
    ## Write to the queue
    for ii in range(0, count):
        queue.put(ii)             # Write 'count' numbers into the queue
    queue.put('DONE')


pqueue = Queue() # writer() writes to pqueue from _this_ process
for count in [10**4, 10**5]:             
    ### reader_proc() reads from pqueue as a separate process
    reader_p = Process(target=reader_proc, args=((pqueue),))
    reader_p.daemon = True
    reader_p.start()        # Launch reader_proc() as a separate python process

    _start = time.time()
    writer(count, pqueue)    # Send a lot of stuff to reader()
    reader_p.join()         # Wait for the reader to finish
    print("Sending {0} numbers to Queue() took {1} seconds".format(count, 
        (time.time() - _start)))

In [None]:
char_tree = defaultdict(lambda: defaultdict())

for word in words:
    if len(word) == n:
        char_tree = add_word_to_tree(char_tree, word)


In [None]:
for i, char in enumerate(list(char_tree.keys())[:n**2]):
    loc = get_loc_from_index(i, n)
    x, y = loc
    sq[x, y] = char
sq

In [None]:
x = [['i','t','e','m'],
    ['t','i','m','e'],
    ['e','m','i','t'],
    ['m','e','','']]
sq = np.array(x)
sq

In [None]:
np.sum(sq != '')

In [None]:
loc = [2, 1]
get_possible_chars(sq, loc)

In [None]:

x, y = loc
partial_word1 = sq[x, :y]
partial_word2 = sq[:x, y]
print(sq[x,y], partial_word1, partial_word2)

In [None]:
char_tree