# Lookup Table - Use

In [1]:
import scooby
import numpy as np

## Load data

In [2]:
with open('lut-data.npz', 'br') as f:
    dat = np.load(f)
    all_data = dat['data1'], dat['data2'], dat['data3']
    all_params = dat['true1'], dat['true2'], dat['true3']

In [3]:
idat = 0

# Load data
data = all_data[idat]

# Load "true" model parameters
p, q, r = all_params[idat]

# 1. Full matrix computation

If memory allows, this is the easiest way (not necessarily the fastest, that depends on problem size and one would have to test).

In [4]:
# Load table
with open('lookuptable.npz', 'br') as f:
    dat = np.load(f)
    lookuptable = dat['lookuptable']
    pp = dat['pp']
    qq = dat['qq']
    rr = dat['rr']

print(f"Shape: (p, q, r, ndat): {lookuptable.shape}")

Shape: (p, q, r, ndat): (60, 50, 40, 5)


In [5]:
# Compute the difference
diff = lookuptable - data

# Squaring it and summing along the data-axis (np.einsum is a powerful tool)
summed_square_diff = np.einsum('pqrd,pqrd->pqr', diff, diff)

# Get the indices of the minimum
ip, iq, ir = np.unravel_index(np.argmin(summed_square_diff), lookuptable.shape[:-1])

# Check the data
print(f"DATA - Synthetic   : {data}")
print(f"     - Lookuptable : {lookuptable[ip, iq, ir, :]}", end="\n\n")

# Check the parameters
print(f"PARAMETERS - True  : {[p, q, r]}")
print(f"             Found : {[pp[ip], qq[iq], rr[ir]]}")

DATA - Synthetic   : [262.5 110.9  20.7 209.9 310.8]
     - Lookuptable : [262.49290903 110.90003459  20.69802836 209.91836735 310.77966102]

PARAMETERS - True  : [2.7, 1.1, 50.0]
             Found : [2.694915254237288, 1.1020408163265305, 50.0]


# 2. Using sorted arrays per data-point and a range

If one massive table is too big for memory, you can split it along the data axis.

**Note**: This still has to be expanded to make it fully functional for sequential runs for big matrices, as currently we still load all matrices.

In [6]:
with open("lookuptable-sorted0.npz", 'br') as f:
    dat = np.load(f)
    ilt0, lookuptable0s = dat['indices'], dat['lookuptable']
    pp = dat['pp']
    qq = dat['qq']
    rr = dat['rr']
with open("lookuptable-sorted1.npz", 'br') as f:
    dat = np.load(f)
    ilt1, lookuptable1s = dat['indices'], dat['lookuptable']
with open("lookuptable-sorted2.npz", 'br') as f:
    dat = np.load(f)
    ilt2, lookuptable2s = dat['indices'], dat['lookuptable']
with open("lookuptable-sorted3.npz", 'br') as f:
    dat = np.load(f)
    ilt3, lookuptable3s = dat['indices'], dat['lookuptable']
with open("lookuptable-sorted4.npz", 'br') as f:
    dat = np.load(f)
    ilt4, lookuptable4s = dat['indices'], dat['lookuptable']

In [7]:
# Relative error that is considered "acceptable"
# You should keep it as small as possible, to reduce the possible models
rel_err = 0.01

def get_set(sorted_table, sorted_indices, data, rel_error):

    # Get first and last index of sorted table
    abs_err = rel_err*abs(data)
    imin = np.searchsorted(sorted_table, data - abs_err)
    imax = np.searchsorted(sorted_table, data + abs_err)
    
    # Return set of acceptables indices
    return set(sorted_indices[imin:imax])

# Create sets of all possible indices
r0 = get_set(lookuptable0s, ilt0, data[0], rel_err)
r1 = get_set(lookuptable1s, ilt1, data[1], rel_err)
r2 = get_set(lookuptable2s, ilt2, data[2], rel_err)
r3 = get_set(lookuptable3s, ilt3, data[3], rel_err)
r4 = get_set(lookuptable4s, ilt4, data[4], rel_err)

# Get overlap of indices for all three (sets are fast)
overlap = set.intersection(r0, r1, r2, r3, r4)

# If there is no overlap, raise an error.
if not overlap:
    raise ValueError(f"No solution found, increase rel_err! (Current rel_err: {rel_err})")
    
# Get the indices of the overlapping models.
# => THE SMALLER THIS NUMBER, THE FASTER THE NEXT CELL WILL BE
ind_possible = [np.unravel_index(o, (pp.size, qq.size, rr.size)) for o in overlap]
print(f"Number of acceptable models: {len(ind_possible)}")

Number of acceptable models: 17


In [8]:
# Collect the possible models
possible_models = np.array([
    np.r_[
        lookuptable0s[ilt0==o],
        lookuptable1s[ilt1==o],
        lookuptable2s[ilt2==o],
        lookuptable3s[ilt3==o],
        lookuptable4s[ilt4==o],
    ]
 for o in overlap]
)

# Compute the difference
diff = possible_models - data

# Squaring it and summing along the data-axis
summed_square_diff = np.einsum('ld,ld->l', diff, diff)

# Get the indices of the minimum
imin = np.argmin(summed_square_diff)
ip, iq, ir = ind_possible[imin]

# Check the data
print(f"DATA - Synthetic   : {data}")
print(f"     - Lookuptable : {possible_models[imin, :]}", end="\n\n")

# Check the parameters
print(f"PARAMETERS - True  : {[p, q, r]}")
print(f"             Found : {[pp[ip], qq[iq], rr[ir]]}")

DATA - Synthetic   : [262.5 110.9  20.7 209.9 310.8]
     - Lookuptable : [262.49290903 110.90003459  20.69802836 209.91836735 310.77966102]

PARAMETERS - True  : [2.7, 1.1, 50.0]
             Found : [2.694915254237288, 1.1020408163265305, 50.0]


In [9]:
scooby.Report()

0,1,2,3,4,5,6,7
Fri Apr 21 19:58:56 2023 CEST,Fri Apr 21 19:58:56 2023 CEST,Fri Apr 21 19:58:56 2023 CEST,Fri Apr 21 19:58:56 2023 CEST,Fri Apr 21 19:58:56 2023 CEST,Fri Apr 21 19:58:56 2023 CEST,Fri Apr 21 19:58:56 2023 CEST,Fri Apr 21 19:58:56 2023 CEST
OS,Linux,CPU(s),4,Machine,x86_64,Architecture,64bit
RAM,15.5 GiB,Environment,Jupyter,File system,ext4,,
"Python 3.10.9 | packaged by conda-forge | (main, Feb 2 2023, 20:20:04) [GCC 11.3.0]","Python 3.10.9 | packaged by conda-forge | (main, Feb 2 2023, 20:20:04) [GCC 11.3.0]","Python 3.10.9 | packaged by conda-forge | (main, Feb 2 2023, 20:20:04) [GCC 11.3.0]","Python 3.10.9 | packaged by conda-forge | (main, Feb 2 2023, 20:20:04) [GCC 11.3.0]","Python 3.10.9 | packaged by conda-forge | (main, Feb 2 2023, 20:20:04) [GCC 11.3.0]","Python 3.10.9 | packaged by conda-forge | (main, Feb 2 2023, 20:20:04) [GCC 11.3.0]","Python 3.10.9 | packaged by conda-forge | (main, Feb 2 2023, 20:20:04) [GCC 11.3.0]","Python 3.10.9 | packaged by conda-forge | (main, Feb 2 2023, 20:20:04) [GCC 11.3.0]"
numpy,1.23.5,scipy,1.10.0,IPython,8.9.0,matplotlib,3.6.3
scooby,0.7.1,,,,,,
Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications,Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications,Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications,Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications,Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications,Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications,Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications,Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications
