# Mike Babb
# babb.mike@outlook.com
# Find anagrams
## Part 4: Query the anagram database

In [1]:
# standard libraries
import os

In [2]:
# external libraries

In [3]:
# custom librariesw
from part_00_process_functions import query_db
import _run_constants as rc

### define focal word

In [4]:
# select a focal word for testing
focal_word = 'achiever'

### load the list of words

In [5]:
sql = 'select lcase, word_id, word_group_id from word_groups;'
word_df = query_db(sql=sql, db_path = rc.db_path, db_name = rc.db_name)

...query execution took: 0.87 seconds...


In [6]:
# use dictionary comprehension to create a very fast lookup table
word_id_dict = {word:(word_id, word_group_id) for word, word_id, word_group_id in 
                zip(word_df['lcase'], word_df['word_id'], word_df['word_group_id'])}


In [7]:
focal_word_id, focal_word_group_id = word_id_dict[focal_word]

### load from/parent word group id pairs

In [8]:
sql = 'select from_word_group_id from anagram_groups where to_word_group_id = (?);'

In [9]:
pw_df = query_db(sql = sql, db_path = rc.db_path, db_name = rc.db_name, params = (focal_word_group_id,))

...query execution took: 0.01 seconds...


In [10]:
# parent word dataframe
parent_word_df = word_df.loc[word_df['word_group_id'].isin(pw_df['from_word_group_id']), ['lcase']]

### load to/child word group id pairs

In [11]:
# get child words
sql = 'select to_word_group_id from anagram_groups where from_word_group_id = (?);'

In [12]:
cw_df = query_db(sql = sql, db_path = rc.db_path, db_name = rc.db_name, params = (focal_word_group_id,))

...query execution took: 0.04 seconds...


In [13]:
child_word_df = word_df.loc[word_df['word_group_id'].isin(cw_df['to_word_group_id']), ['lcase']]

### determine exact anagrams

In [14]:
# compute exact anagrams
pw_set = set(parent_word_df['lcase'].tolist())
cw_set = set(child_word_df['lcase'].tolist())

In [15]:
# compute the intersection
ew_set = pw_set.intersection(cw_set)

In [16]:
# now, remove the interesection from both the from and the to word group sets
pw_set = pw_set.difference(ew_set)

In [17]:
cw_set = cw_set.difference(ew_set)

In [18]:
# now, remove the focal word from the exact word set
if focal_word in ew_set:
    ew_set.remove(focal_word)

In [19]:
## Save the list of anagrams to disk

In [20]:
# so, let's set up writing to disk
out_file_name = f'{focal_word}.txt'

In [21]:
ofpn = os.path.join(rc.word_output_file_path, out_file_name)

In [22]:
# sort the sets and in do doing, create lists
pw_list = sorted(pw_set)
ew_list = sorted(ew_set)
cw_list = sorted(cw_set)

In [23]:
# write the from, exact, and to anagram pairs to a text file.
with open( ofpn, 'w', newline = '') as out_file:
    # from anagrams
    write_line = 'Parent/From words for: ' + focal_word + '\n'
    out_file.writelines(write_line)
    for pw in pw_list:
        write_line = pw + '\n'
        out_file.writelines(write_line)
    
    # exact anagrams
    out_file.writelines('\n')    
    write_line = 'Exact anagrams for: ' + focal_word + '\n'
    out_file.writelines(write_line)    
    for ew in ew_list:
        write_line = ew + '\n'
        out_file.writelines(write_line)
    
    # to anagrams
    out_file.writelines('\n')
    write_line = 'Child/To words for: ' + focal_word + '\n'
    out_file.writelines(write_line)    
    for cw in cw_list:
        if len(cw) == 1:
            write_line = '**' + cw + '** \n'
        else:
            write_line = cw + '\n'
        out_file.writelines(write_line)      