# Mike Babb
# babbm@uw.edu
# Find anagrams
## Part 2: Generate and store the anagrams v2.0

In [1]:
# standard libraries - installed by default
import collections
import datetime
import pickle
import sqlite3
import string
import os
import timeit

In [2]:
# external libraries - not installed by default
import numpy as np
import pandas as pd

In [3]:
from part_00_file_db_utils import load_pickle, build_db_conn
from part_00_process_functions import *

### set input and output paths

In [4]:
# base file path
base_file_path = '/project/finding_anagrams'

In [5]:
# input path
in_file_path = 'data'
in_file_path = os.path.join(base_file_path, in_file_path)

In [6]:
# output db path and name
db_path = 'db'
db_path = os.path.join(base_file_path, db_path)

In [7]:
if os.path.exists(db_path):
    pass
else:
    os.makedirs(db_path)

In [8]:
db_name = 'words.db'

### process control flags

In [9]:
# Use numpy to perform matrix opertions and determine from/to and exact anagram relationships
# option 1 - work with the full char_matrix
# option 2 - create submatrices by word length
# option 3 - create submatrices by word length and letter
# option 4 - create submatrices by word length and least common two letters

matrix_extraction_option = 4

# max number of letters to slice to use for the generation of sub-matrices for
# option 4. More letters means more sub-matrices
# 3 seems to be the sweet spot
n_subset_letters = 3

# set write_data to true to store the generated list of anagrams
write_data = True

# set to None to include all letters
# test with a subset of letters by setting the letter_subset_list to ['q', 'x'] or 
# a different set of letters
letter_subset_list = ['x']
#letter_subset_list = None

In [10]:
# start a timer to record the entire operation
total_time_start = datetime.datetime.now()

### load input data

In [11]:
# load the word_df, the words from Part 1
input_file_name = 'word_df.csv'
# build the file path
ipn = os.path.join(in_file_path, input_file_name)

# specify the datatypes of the columns using a dictionary
# because NA and NULL are reserved python words, but also words in our list of words,
# we need to specify the data types of the columns
dtype_dict = {'word': str,
              'lcase': str,
              'n_chars': int,
              'first_letter': str,
              'word_id': int,
              'word_group_id': int,
              'letter_group': str,
              'letter_group_ranked': str}

# read in the file and be careful of the NA and NULL values
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html
word_df = pd.read_csv(filepath_or_buffer = ipn, sep = '\t', header = 0,
                          dtype=dtype_dict, na_values = '!!', keep_default_na=False)   

In [12]:
# extract the column of word ids as a numpy array
word_id_list = word_df['word_id'].to_numpy(dtype = np.int32)    

In [13]:
# create a dataframe with the letters sorted by the frequency of words that
# start with a particular letter
agg_word_df = word_df['first_letter'].groupby(word_df['first_letter']).agg(np.size).to_frame()

# set column names
agg_word_df.columns = ['word_count']

# reset the index to rename columns
agg_word_df = agg_word_df.reset_index()

# sort the dataframe by frequency
agg_word_df = agg_word_df.sort_values(by='word_count')

In [14]:
# extract the letters sorted by word frequency
sorted_first_letters = agg_word_df['first_letter'].tolist()

In [15]:
# load the letter dictionary from part 1
in_file_name = 'letter_dict.pkl'
letter_dict = load_pickle(in_file_path = in_file_path, in_file_name=in_file_name)

In [16]:
# load the word dictionary from part 1
in_file_name = 'word_dict.pkl'
word_dict = load_pickle(in_file_path = in_file_path, in_file_name=in_file_name)

In [17]:
# load the char matrix from part 1
in_file_name = 'char_matrix.npy'
ipn = os.path.join(in_file_path, in_file_name)
char_matrix = np.load(file = ipn)

### create the word group df: wg_df

In [18]:
# drop duplicates based on the word group. 
# by default, this will only keep the first record and it will drop all others
wg_df = word_df.drop_duplicates(subset = ['word_group_id']).copy()

In [19]:
wg_df = wg_df.sort_values(by = 'word_id')

In [20]:
# unique word groups
wg_df.shape

(215842, 8)

In [21]:
# get the word group ids
word_group_id_list = wg_df['word_group_id'].to_numpy()
# and the associated word_id
word_id_list = wg_df['word_id'].to_numpy()

In [22]:
# trim the char matrix by word id
# and not the word_group id
wchar_matrix = char_matrix[word_id_list, :]

In [23]:
# i don't use these objects, but i can't delete them?
# build a word_id to word_group_id dictionary
word_id_wg_id_dict = dict()
# and a word_group_id to word_id dictionary
wg_id_word_id_dict = dict()

for word_id, wg_id in zip(wg_df['word_id'], wg_df['word_group_id']):
    word_id_wg_id_dict[word_id] = wg_id
    wg_id_word_id_dict[wg_id] = word_id

In [24]:
wg_df.head()

Unnamed: 0,word,lcase,n_chars,first_letter,word_id,word_group_id,letter_group,letter_group_ranked
0,A,a,1,a,0,0,a,a
1,aa,aa,2,a,1,1,a,a
2,aal,aal,3,a,2,2,al,la
3,aalii,aalii,5,a,3,3,ail,lai
4,aam,aam,3,a,4,4,am,ma


In [25]:
wg_df['letter_selector'] = wg_df['letter_group_ranked'].str[:n_subset_letters]

In [26]:
# the dictionary holding the sub-matrices
n_char_matrix_dict = {}

# by word length
word_length_list = sorted(wg_df['n_chars'].unique().tolist())

# python dictionaries work by storing the hash values of objects
# Anything that can be hashed can be a dictionary key. 
# Computing the hash value of an object ahead of time can reduce dictionary access time.
# we'll compute the associated hash value of the tuple used to identify the sub-matrices.

wg_id_n_char_matrix_dict = {}
wg_df['wg_id_n_char_matrix_key'] = wg_df['letter_selector']
wg_df['wg_id_n_char_matrix_key_hash'] = wg_df['wg_id_n_char_matrix_key'].map(hash)
for curr_word_id, curr_key_hash in zip(wg_df['word_group_id'], wg_df['wg_id_n_char_matrix_key_hash']):
    wg_id_n_char_matrix_dict[curr_word_id] = curr_key_hash


In [27]:
# okay, now we need to populate three sets of dictionaries:
# word length
# letter selector
# the intersection of the two

In [28]:
# there will be three parts to this function
# The first part does a selection by a single character
# The second part does the selection based on the newly created subselection

# peforming selections on a dataframe is slow.
# Especially so since we are comparing characters
    
# now, do it again, but this time use the dictionary
# by word length and n least common letters
wg_df['letter_selector'] = wg_df['letter_group_ranked'].str[:n_subset_letters]
nc_ls_df = wg_df[['n_chars', 'letter_selector']].drop_duplicates()

print(n_subset_letters)
print('...creating', nc_ls_df.shape[0], 'sets of ids')

3
...creating 16101 sets of ids


In [29]:
nc_ls_df.head()

Unnamed: 0,n_chars,letter_selector
0,1,a
1,2,a
2,3,la
3,5,lai
4,3,ma


In [30]:
# let's enumerate this fucker
loop_count = 0
s_time = datetime.datetime.now()
# n char dictionaries
n_char_dict = {}

# single letter matrix dict
single_letter_matrix_dict = {}

# letter selector dictionary
letter_selector_matrix_dict = {}

# the intersection of the two
nc_ls_matrix_dict = {}

# an array to hold the output of the zipping operation
nc_ls_tuple_list = []

# enumerate these combinations only once
# reduce the number of times we have to compute sets of ids

for nc, ls in zip(nc_ls_df['n_chars'], nc_ls_df['letter_selector']):
    
    # build the number of characters splits
    if nc not in n_char_dict:    
    
        nc_wg_id_list = wg_df.loc[(wg_df['n_chars']>=nc) , 'word_group_id'].to_numpy()
        nc_wg_id_set = set(nc_wg_id_list)

        # subset the wchar_matrix to get the sub matrix
        nc_sub_wchar_matrix = wchar_matrix[nc_wg_id_list, ]
        
        n_char_dict[nc] = (nc_wg_id_list, nc_sub_wchar_matrix, nc_wg_id_set)
    else:
        nc_wg_id_list, nc_sub_wchar_matrix, nc_wg_id_set = n_char_dict[nc]
        

    ll = ls[0]

    # check to see if sub-matrix with the first letter has already been created
    if ll not in single_letter_matrix_dict:            
    
        # the submatrix has not been created, let's do it.
        column_selector = [letter_dict[ll]]                
        outcome = wchar_matrix[:, column_selector] > 0   
        outcome_indices = np.all(outcome > 0, axis = 1)

        # these indices match with the word_id_list, extract the subset        
        single_letter_word_group_id_list = word_group_id_list[outcome_indices]
        single_letter_word_group_id_set = set(single_letter_word_group_id_list)

        # subset the wchar_matrix to get the sub matrix
        single_letter_sub_wchar_matrix = wchar_matrix[single_letter_word_group_id_list, ]            

        single_letter_matrix_dict[ll] = (single_letter_word_group_id_list, single_letter_sub_wchar_matrix, single_letter_word_group_id_set)
    else:
        # query the sub matrices split by individual letter to then get the smaller partitions
        single_letter_word_group_id_list, single_letter_sub_wchar_matrix, single_letter_word_group_id_set = single_letter_matrix_dict[ll]
        

    if ls not in letter_selector_matrix_dict:
        
        # build a column selector
        column_selector = [letter_dict[curr_letter] for curr_letter in ls]        

        # get the indices of the single_letter_sub_wchar_matrix that feature the n least common letters
        outcome = single_letter_sub_wchar_matrix[:, column_selector] > 0        
        outcome_indices = np.all(outcome > 0, axis = 1)
        
        # these are now the ids
        ls_wg_id_list = single_letter_word_group_id_list[outcome_indices]    
        ls_wg_id_set = set(ls_wg_id_list)
                
        # subset the wchar_matrix to get the sub matrix - this contains the three least common letters for a group of words
        ls_wchar_matrix = wchar_matrix[ls_wg_id_list, ]        
        letter_selector_matrix_dict[ls] = (ls_wg_id_list, ls_wchar_matrix, ls_wg_id_set)
    else:
        # this is the submatrix by letter selector
        ls_wg_id_list, ls_wchar_matrix, ls_wg_id_set = letter_selector_matrix_dict[ls]

    # now, compute the intersection of the two
    nc_ls_tuple = (nc, ls)
    # hash that fucker
    #nc_ls_tuple_hash = hash(nc_ls_tuple)    
    
    # perform the intersection
    # this is incredibly slow.
    nc_ls_wg_id_set = nc_wg_id_set.intersection(ls_wg_id_set)
    #nc_ls_wg_id_list = np.array(object = list(nc_ls_wg_id_set), dtype = int)
    nc_ls_wg_id_list = np.fromiter(iter = nc_ls_wg_id_set, dtype = int)
    # now, get the rows
    nc_ls_wchar_matrix = wchar_matrix[nc_ls_wg_id_list, ]
    nc_ls_matrix_dict[nc_ls_tuple] = (nc_ls_wg_id_list, nc_ls_wchar_matrix, nc_ls_wg_id_set)

    nc_ls_tuple_list.append(nc_ls_tuple)

    loop_count += 1
    if loop_count % 1000 == 0:
        print(loop_count)

e_time = datetime.datetime.now()
p_time = e_time - s_time
print(p_time.total_seconds())

1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
16000
52.432989


In [31]:
nc_ls_df['nc_ls_tuple'] = nc_ls_tuple_list

In [32]:
nc_ls_df.head()

Unnamed: 0,n_chars,letter_selector,nc_ls_tuple
0,1,a,"(1, a)"
1,2,a,"(2, a)"
2,3,la,"(3, la)"
3,5,lai,"(5, lai)"
4,3,ma,"(3, ma)"


In [33]:
# join this back to the wg_df
wg_df = pd.merge(left = wg_df, right = nc_ls_df)

In [34]:
len(ls_wg_id_list)

21

In [35]:
len(ls_wg_id_set)

21

In [36]:
len(nc_ls_wg_id_set)

1

In [37]:
# let's examine what we've created

In [38]:
# demontrate the look up with the word 'quiet'

In [39]:
wg_id = word_df.loc[word_df['lcase'] == 'quiet', 'word_group_id'].iloc[0]
letter_selector = wg_df.loc[wg_df['word_group_id'] == wg_id, 'letter_selector'].iloc[0]
nc_ls_tuple = wg_df.loc[wg_df['word_group_id'] == wg_id, 'nc_ls_tuple'].iloc[0]

In [40]:
output = get_values(wg_id = wg_id, 
                    wchar_matrix = wchar_matrix,
                   word_group_id_list = word_group_id_list)

In [41]:
# how many parent/from words were found for the word 'quiet'?
len(output)

979

In [42]:
# this is an array of from words to the word 'quiet'
output

array([[  1084, 151389],
       [  1630, 151389],
       [  1631, 151389],
       ...,
       [208656, 151389],
       [209665, 151389],
       [212473, 151389]])

In [43]:
# and those words are...
word_list = word_df.loc[word_df['word_group_id'].isin(output[:, 0]), 'lcase'].tolist()

In [44]:
len(word_list)

999

In [45]:
# we've tested with one word, let's time many evaluations to get a sense of how quickly 
# the current matrix_extraction_option executes
# use the timeit() function to evaluate how long, on average, a single matrix operation
# takes to complete
code_snippet = """get_values(wg_id = wg_id, 
                    wchar_matrix = wchar_matrix,
                   word_group_id_list = word_group_id_list)"""

In [46]:
n_trials = 1000
total_time = timeit.timeit(code_snippet,
              number=n_trials, globals=globals())

In [47]:
# average number of seconds per trial
print(total_time, total_time / n_trials)

51.15199629997369 0.05115199629997369


In [48]:
output = get_values_better(wg_id = wg_id,
                      letter_selector = letter_selector,
                      letter_selector_matrix_dict = letter_selector_matrix_dict)

In [49]:
len(output)

979

In [50]:
# use the timeit() function to evaluate how long, on average, a single matrix operation
# takes to complete
code_snippet = """get_values_better(wg_id = wg_id,
                      letter_selector = letter_selector,
                      letter_selector_matrix_dict = letter_selector_matrix_dict)"""

In [51]:
n_trials = 1000
total_time = timeit.timeit(code_snippet,
              number=n_trials, globals=globals())

In [52]:
# average number of seconds per trial
print(total_time, total_time / n_trials)

0.46148850000463426 0.00046148850000463424


In [53]:
output = get_values_even_better(wg_id = wg_id,
                           nc_ls_tuple = nc_ls_tuple,                           
                           nc_ls_matrix_dict=nc_ls_matrix_dict)


In [54]:
len(output)

979

In [55]:
# use the timeit() function to evaluate how long, on average, a single matrix operation
# takes to complete`
code_snippet = """get_values_even_better(wg_id = wg_id,
                           nc_ls_tuple = nc_ls_tuple,                           
                           nc_ls_matrix_dict=nc_ls_matrix_dict)"""

In [56]:
n_trials = 1000
total_time = timeit.timeit(code_snippet,
              number=n_trials, globals=globals())

In [57]:
# average number of seconds per trial
print(total_time, total_time / n_trials)

0.4884099999908358 0.0004884099999908358


In [None]:
# here is where I left off: 2023/12/30

### estimate total number of from/to word pairs

In [None]:
# how many anagrams are there?
# let's estimate the number of anagrams by assuming that the number of
# parent/from words is a function of word length. 
# let's sample 10 words of each word length, compute the number of from/parent anagrams
# for each word in the sample, compute the min, mean, and max, and apply those values
# to the numbers of words by length and multiply accordingly
# this will give us very generous upper bound of anagram pairs

In [64]:
def estimate_total_pairs(word_df:pd.DataFrame, wg_df:pd.DataFrame,
                         nc_ls_matrix_dict:dict):

    # list of the number of characters per word
    n_char_list = sorted(word_df['n_chars'].unique().tolist())
    
    # enumerate and sample
    output_list = []
    for n_char in n_char_list:
        # this will get all words that are n_char in length.        
        curr_id_list = wg_df.loc[wg_df['n_chars']==n_char, 'word_group_id'].to_numpy()        
        # sample with replacement, 10 words per length of word
        sample_id_list = np.random.choice(a = curr_id_list, size = 10, replace = True)
        sample_df = wg_df.loc[wg_df['word_group_id'].isin(sample_id_list), ['word_group_id', 'nc_ls_tuple']]        
        
        for s_wg_id, nc_ls_tuple in zip(sample_df['word_group_id'], sample_df['nc_ls_tuple']):
            

            # get the values
            output = get_values_even_better(wg_id = s_wg_id,
                           nc_ls_tuple = nc_ls_tuple,                           
                           nc_ls_matrix_dict=nc_ls_matrix_dict)
            
            curr_from_words = output.shape[0]
            curr_output = [n_char, curr_from_words]
            output_list.append(curr_output)    
    
    # make a dataframe from the possibilities
    pos_df = pd.DataFrame(data = output_list, columns = ['n_chars', 'n_from_words'])
    
    # minimum, max, and mean number of from words
    agg_pos_df = pos_df.groupby('n_chars').agg(["min", "max", "mean"])    
       
    agg_pos_df.columns = ['min_n_from_words', 'max_n_from_words', 'mean_n_from_words']
    
    # let's aggregate by number of letters per word, and then join
    n_word_length_df = word_df['n_chars'].groupby(word_df['n_chars']).agg(np.size).to_frame()
    n_word_length_df.columns = ['n_words']
    
    n_pos_df = pd.merge(left = n_word_length_df, right = agg_pos_df, left_index = True,
                       right_index = True)
    
    n_pos_df['n_tot_max_anagrams'] = n_pos_df['n_words'] * n_pos_df['max_n_from_words']
    n_pos_df['n_tot_mean_anagrams'] = n_pos_df['n_words'] * n_pos_df['mean_n_from_words']
    
    # set the upper bound of anagrams as the midway point
    # between the mean and the max of the estimated number of anagrams
    n_possible_anagrams = (n_pos_df['n_tot_mean_anagrams'].sum() + n_pos_df['n_tot_max_anagrams'].sum()) / 2
    
    # round and convert to integer
    n_possible_anagrams = int(np.round(n_possible_anagrams, 0))
    
    # this number will be used to create an array that will hold the from/to pairs
    n_possible_anagrams_formatted = '{:,}'.format(n_possible_anagrams)    
    print('...estimated number of from/to pair word pairs:', n_possible_anagrams_formatted )               
    
    return n_possible_anagrams

In [65]:
n_possible_anagrams = estimate_total_pairs(word_df = word_df, wg_df = wg_df,
                         nc_ls_matrix_dict = nc_ls_matrix_dict)

...estimated number of from/to pair word pairs: 181,015,501


In [None]:
n_possible_anagrams = estimate_total_pairs(word_df = word_df, wg_df = wg_df,
                                          n_char_matrix_dict = n_char_matrix_dict,
                                           wg_id_n_char_matrix_dict = wg_id_n_char_matrix_dict)

### discover from/to word group id pairs

In [None]:
# initialize counters to count the number of to (child words) from a focal word.
# we could do this in post-processing, but the data are already in memory and it's a simple 
# calculation to make.
# we want to minimize the number of trips through our data.

# the number of candidate words examined for each focal word

# a list to hold the dataframes generated for each letter
proc_time_df_list = []

# subset the list of leters
if letter_subset_list:
    letters = letter_subset_list[:]
else:
    letters = sorted_first_letters

anagram_pair_count = 0 
# use numpy to pre-allocate an array that will be updated while enumerating. 
# this eliminates list.append() calls

output_list = np.full(shape = (n_possible_anagrams, 2), fill_value = -1,  dtype=int)

wg_count = 0

for i_cl, curr_letter in enumerate(letters):
    # enumerate by each letter
    # this isn't absolutely necessary, we could just enumerate by word id, 
    # but for testing and development, letters are a handy way to chunk up the data. 

    # this dictionary will store the calculations for each letter
    proc_time_dict = {}    
    
    # the list of words that start with the focal letter     
    curr_wg_df = wg_df.loc[wg_df['first_letter'] == curr_letter, :]
    
    # sort the dataframe by n_chars and letter_selector, if it exists.
    # this will cut down on dictionary lookups for matrix_extraction_types 3 and 4.        
    curr_wg_df = curr_wg_df.sort_values(by = ['n_chars', 'letter_selector'])        
    curr_word_group_id_list = curr_wg_df['word_group_id'].tolist()
    
    wg_count += len(curr_word_group_id_list)
    
    n_curr_words = '{:,}'.format(len(curr_wg_df))    
    print('...finding parent anagrams for', n_curr_words, 'words that start with', curr_letter)               
    
    # enumerate by word id, working with integers is faster than words    
    for i_wi, word_group_id in enumerate(curr_word_group_id_list):            
        # start timing to record processing for each word            
        s_time = datetime.datetime.now()
        
        # get the current word length, from the word id
        #to_word, to_word_length, curr_first_letter, clg, clgr = word_dict[word_group_id]   
        to_word_length = word_dict[word_group_id][1]               

        # get the tuple associated with the word id
        # much faster to look up stored values for the hash value than it is to 
        # only look up if the hash value has changed            
        key_hash = wg_id_n_char_matrix_dict[word_group_id]                
        # get the possible candidate word_group_ids and char matrix
        curr_word_id_index_list, curr_char_matrix = n_char_matrix_dict[key_hash]                                

        # how many candidates?
        n_possible_words = len(curr_word_id_index_list)

        # subtract the curr_test_vector from every row in the matrix
        # this produces a new matrix.        
        new_word_id = curr_word_id_index_list == word_group_id            
        outcome = curr_char_matrix - curr_char_matrix[new_word_id, ]
        del new_word_id
                        
        # compute the score by finding where rows, across all columns, are GTE 0
        outcome_indices = np.all(outcome >= 0, axis = 1)
        outcome = None        
        
        # extract anagrams based on same index values
        outcome_word_id_list = curr_word_id_index_list[outcome_indices].tolist()
            
        outcome_indices = None               
        
        # if the outcome is greater than or equal to zero, then the current word is an
        # anagram of the other word    
        # a value  >= 0 means that the current word contains the exact same number of focal letters
        # mite --> time or miter --> time
        # a value >= 1 means that current word contains at least the same number of focal letters
        # terminator --> time
        # a value of <=-1 means that the current word does not have the 
        # correct number of letters and is therefore not an anagram.
        # trait <> time        

        # number of parent words found
        n_from_words = len(outcome_word_id_list)

        if n_from_words > 1:
            
            # we have matches
            # the focal word   
                                    
            # enumerate the from/parent words            
            new_anagram_pair_count = anagram_pair_count + len(outcome_word_id_list)
            # the from words
            #print(anagram_pair_count)    
            #print(new_anagram_pair_count)
            #print(len(outcome_word_id_list))
            #print(output_list.shape)
            output_list[anagram_pair_count:new_anagram_pair_count, 0] = outcome_word_id_list      
            
            # the to word
            output_list[anagram_pair_count:new_anagram_pair_count, 1] = word_group_id                                            
            
            # set the anagram pair count
            anagram_pair_count = new_anagram_pair_count
                    
                
        del outcome_word_id_list
            
        # record the time for the word
        e_time = datetime.datetime.now()
        p_time = e_time - s_time    
        p_time = p_time.total_seconds()

        proc_time_dict[word_group_id] = (p_time, n_from_words, n_possible_words)       
    
    # create a dataframe from the proc_time_dict
    proc_time_df = pd.DataFrame.from_dict(data=proc_time_dict, orient='index')
    proc_time_df = proc_time_df.reset_index()
    proc_time_df.columns = ['word_group_id', 'n_seconds', 'n_from_word_groups', 'n_candidates']                
    
    # display processing time for the current letter
    total_proc_time = round(proc_time_df['n_seconds'].sum(), 2)
    print('...finding parent anagrams for', curr_letter, 'words took', total_proc_time, 'seconds...')
    
    proc_time_df_list.append(proc_time_df)

### shape and store output data

In [None]:
# truncate the output array to only include indices with a from/to word pair
output_indices = np.all(output_list >= 0, axis = 1)
output_list = output_list[output_indices, ]
del output_indices

In [None]:
# how many anagram pairs were found?
n_total_anagrams = len(output_list)
n_total_anagrams_formatted = '{:,}'.format(n_total_anagrams)
print('...total anagrams', n_total_anagrams_formatted)

In [None]:
## count the number of to words
# https://docs.python.org/3/library/collections.html#collections.Counter
# number of to words

### write anagram pairs to SQLite

In [None]:
# write the anagram pairs to the database
if write_data:
    store_anagram_pairs(output_list = output_list, db_path = db_path, db_name = db_name)    

### store number of from/to word pairs and time related to processing

In [None]:
store_anagaram_processing_time(output_list=output_list, proc_time_df_list=proc_time_df_list, word_df=word_df,
                               wg_df = wg_df,
                               matrix_extraction_option = matrix_extraction_option, db_path=db_path,
                               db_name=db_name, total_time_start=total_time_start)