In [1]:
import numba as nb
import numpy as np

import _khash_ffi

from numba import cffi_support

cffi_support.register_module(_khash_ffi)

khash_init = _khash_ffi.lib.khash_int2int_init
khash_get = _khash_ffi.lib.khash_int2int_get
khash_set = _khash_ffi.lib.khash_int2int_set
khash_destroy = _khash_ffi.lib.khash_int2int_destroy


In [28]:
@nb.jit
def test1(fid, values, fetch_ids):
    # Build map of fid's (non-continuous) to fix (continuous compact)
    fid2fix = np.zeros(np.max(fid)+1,dtype=np.int64)
    fid2fix[np.unique(fid)] = np.arange(len(np.unique(fid)),dtype=np.int64)

    # Now fetch a selection of values    
    s = np.empty_like(fetch_ids, dtype=np.float64)
    for i in range(fetch_ids.shape[0]):
        ii = fid2fix[fetch_ids[i]]
        s[i] = values[ii]
        
    return s

In [37]:
@nb.njit
def test2(fid, values, fetch_ids):
    d = khash_init()
    
    fix = 0
    for i in range(fid.shape[0]):
        khash_set(d, fid[i], fix)
        fix += 1  
        
    s = np.empty_like(fetch_ids, dtype=np.float64)
    for j in range(fetch_ids.shape[0]):
        ii = khash_get(d, fetch_ids[j], -99)
        s[j] = values[ii]
    
    khash_destroy(d)
    return s

In [112]:
max_fid = 215000
n_fids = 130
n_fetch = 100

_fids = np.arange(max_fid)
np.random.shuffle(_fids)
fids = np.empty(n_fids, dtype=np.int64)
fids[-1] = max_fid
fids[:-1] = np.sort(_fids[:n_fids -1])

values = np.random.normal(size=(n_fids))
fetch_ids = np.random.choice(fids, size=(n_fetch,), replace=True)

In [113]:
s1 = test1(fids, values, fetch_ids)

In [114]:
s2 = test2(fids, values, fetch_ids)

In [115]:
np.allclose(s1, s2)

True

In [116]:
%timeit s1 = test1(fids, values, fetch_ids)

10000 loops, best of 3: 86.4 µs per loop


In [117]:
%timeit s2 = test2(fids, values, fetch_ids)

100000 loops, best of 3: 7.59 µs per loop
