# Exp 05: Demonstrate Numba
Let's build upon the 


In [None]:
# standard libraries
from time import perf_counter_ns
import time

In [None]:
# external libraries
import numpy as np
import pandas as pd

# custom libraries
from _run_constants import *
from part_00_file_db_utils import *
from part_00_process_functions import *

In [None]:
word_df, wg_df, letter_dict, char_matrix, \
    word_group_id_list, word_id_list, wchar_matrix = load_input_data(
        db_path=rc.DB_PATH, db_name=rc.DB_NAME,
        in_file_path=rc.IN_FILE_PATH)

In [None]:
ls_df = build_letter_selector_df(df = wg_df, 
                                 ls_nchar=3, letter_selector_col_name='letter_selector',
                                 letter_selector_id_col_name='letter_selector_id')
ls_df = get_ls_index(df = ls_df)

In [None]:
ls_df.head()

In [None]:
# load the total number of anagrams
n_possible_anagrams = load_possible_anagrams(db_path=rc.DB_PATH,
                                             db_name=rc.DB_NAME)

In [None]:
wg_df.head()

In [None]:
col_names = ['letter_selector', 'letter_selector_id']
wg_df = pd.merge(left = wg_df, right = ls_df[col_names])

In [None]:
ls_id_wg_id, ls_index_array = build_ls_index_arrays(wg_df=wg_df, ls_df = ls_df)

In [None]:
from numba import njit, int8, int32

In [None]:
# build a numba function
#@njit(int32[:](int32[:], int8[:,:], int32))
@njit()
def build_output_wg_id_list(temp_wg_id_list:np.ndarray, ls_wchar_matrix:np.ndarray, temp_wg_id:int) -> np.ndarray:
    # numba doesn't have the equivalent of np.all which means I need 
    # to implement a work around.
    rows, cols = ls_wchar_matrix.shape
    temp_matrix = (ls_wchar_matrix - ls_wchar_matrix[temp_wg_id, :]) >= 0    
    zero_list = np.zeros(shape = rows, dtype=np.bool)
    for i in range(rows):
        zero_list[i] = temp_matrix[i, :].sum() == cols
    #zero_list = zero_list.astype(np.bool)
    return temp_wg_id_list[zero_list]

In [None]:
# run it!
run_start_time=perf_counter_ns()
# create the output list
output_list = np.full(shape = (n_possible_anagrams, 2), fill_value=-1, dtype=np.int32)
output_time_list = []

# start counting
anagram_pair_count = 0

for ls_row_id, ls_row in enumerate(ls_index_array):    
    if ls_row_id % 100 == 0:
        print(ls_row_id)
    start_time = perf_counter_ns()    
            
    ##
    # SUBSET THE wchar_matrix by column selector
    ##    
    outcome_indices = np.all(wchar_matrix[:, ls_row] >= 1, axis=1)
    
    # this is the sub-matrix from which to query
    ls_wchar_matrix = wchar_matrix[outcome_indices, :]
        
    # this is the list of word group ids that correspond to the word group ids
    # in the ls_wchar_matrix
    temp_wg_id_list = word_group_id_list[outcome_indices]
    
    # this is the number of word groups that meet certain criteria. 
    # for example, words that feature the letters: 'buc'    
    n_search_space = temp_wg_id_list.shape[0]
    
    curr_wg_id_list = ls_id_wg_id[ls_id_wg_id[:, 0] == ls_row_id, 1]
    
    for i_curr_wg_id, curr_wg_id in enumerate(curr_wg_id_list):    
            
        # get different word group ids?
        temp_wg_id = np.where(temp_wg_id_list == curr_wg_id)[0][0]
                
        outcome_word_id_list = build_output_wg_id_list(temp_wg_id_list=temp_wg_id_list,
                                                       ls_wchar_matrix=ls_wchar_matrix,
                                                       temp_wg_id=temp_wg_id)
                
        n_from_words = outcome_word_id_list.shape[0]
        
        if n_from_words > 0:
            outcome_word_id_list = format_output_list(outcome_word_id_list=outcome_word_id_list, wg_id=curr_wg_id)
                        
            # enumerate the from/parent words
            new_anagram_pair_count = anagram_pair_count + n_from_words
            
            output_list[anagram_pair_count:new_anagram_pair_count, :] = outcome_word_id_list

            # update the anagram pair count
            anagram_pair_count = new_anagram_pair_count

    curr_time = calc_time(time_start=start_time, round_digits=8)
    output_time_list.append([ls_row_id, n_search_space, curr_time])

print('...time to find parent/child word relationships')
time_proc = calc_time(time_start=run_start_time, round_digits=4)
compute_elapsed_time(seconds=time_proc)
print('...truncating output list...')
output_indices = np.all(output_list >= 0, axis=1)
output_list = output_list[output_indices,]
print(output_list.shape)
time_proc = calc_time(time_start=run_start_time, round_digits=4)
compute_elapsed_time(seconds=time_proc)

In [None]:
# approximately 19 seconds
from_word_counter, to_word_counter = build_counters(output_list=output_list)

In [None]:
# the number of from word groups: should be 26
print(from_word_counter[746])
print(to_word_counter[746])

In [None]:
# using numba: 2 minutes, 29 seconds. 