Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Add support for cache and AOT use of hash() #3768

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,6 @@ exclude =
numba/tests/test_slices.py
numba/tests/test_mandelbrot.py
numba/tests/test_numpy_support.py
numba/tests/compile_with_pycc.py
numba/tests/test_ir.py
numba/tests/test_deprecations.py
numba/tests/test_looplifting.py
Expand Down Expand Up @@ -295,7 +294,6 @@ exclude =
numba/tests/test_cfunc.py
numba/tests/test_conversion.py
numba/tests/test_indexing.py
numba/tests/test_pycc.py
numba/tests/annotation_usecases.py
numba/tests/test_unicode_names.py
numba/tests/test_unicode_literals.py
Expand Down
6 changes: 5 additions & 1 deletion numba/targets/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,11 @@ def _load_global_helpers():
# This is Py_None's real C name
ll.add_symbol("_Py_NoneStruct", id(None))

# This is the _Py_HashSecret_t struct
import ctypes as ct
addr = ct.addressof(ct.c_void_p.in_dll(ct.pythonapi, "_Py_HashSecret"))
ll.add_symbol("_Py_HashSecret", addr)

# Add Numba C helper functions
for c_helpers in (_helperlib.c_helpers, _dynfunc.c_helpers):
for py_name, c_address in c_helpers.items():
Expand Down Expand Up @@ -1115,7 +1120,6 @@ def add_linking_libs(self, libs):
for lib in libs:
colib.add_linking_library(lib)


class _wrap_impl(object):
"""
A wrapper object to call an implementation function with some predefined
Expand Down
9 changes: 8 additions & 1 deletion numba/targets/cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@

import llvmlite.binding as ll
import llvmlite.llvmpy.core as lc
from llvmlite import ir as llvmir

from numba import _dynfunc, config
from numba.callwrapper import PyCallWrapper
from .base import BaseContext, PYOBJECT
from numba import utils, cgutils, types
from numba import utils, cgutils, types, errors
from numba.utils import cached_property
from numba.targets import callconv, codegen, externals, intrinsics, listobj, setobj
from .options import TargetOptions
Expand Down Expand Up @@ -185,6 +186,12 @@ def calc_array_sizeof(self, ndim):
aryty = types.Array(types.int32, ndim, 'A')
return self.get_abi_sizeof(self.get_value_type(aryty))

def get_address_of_hash_secret(self, builder):
i64 = llvmir.IntType(64)
secret = llvmir.GlobalVariable(builder.module, cgutils.voidptr_t,
'_Py_HashSecret')
return builder.ptrtoint(secret, i64)


class ParallelOptions(object):
"""
Expand Down
40 changes: 31 additions & 9 deletions numba/targets/hashing.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,8 @@ def _siphash24(k0, k1, src, src_sz):
for i in range(4):
jmp = i * 8
mask = ~types.uint64(ohexefef << jmp)
t = (t & mask) | (types.uint64(grab_byte(src, boffset + i)) << jmp)
t = (t & mask) | (types.uint64(
grab_byte(src, boffset + i)) << jmp)
if src_sz >= 3:
jmp = (2 * 8)
mask = ~types.uint64(ohexefef << jmp)
Expand All @@ -539,12 +540,20 @@ def _siphash24(k0, k1, src, src_sz):
return t

elif _Py_hashfunc_name == 'fnv':
#TODO: Should this instead warn and switch to siphash24?
# TODO: Should this instead warn and switch to siphash24?
raise NotImplementedError("FNV hashing is not implemented")
else:
msg = "Unsupported hashing algorithm in use %s" % _Py_hashfunc_name
raise ValueError(msg)


@intrinsic
def get_address_of_hash_secret(typingctx):
def codegen(context, builder, typ, args):
return context.get_address_of_hash_secret(builder)
sig = types.uint64()
return sig, codegen

# This is a translation of CPythons's _Py_HashBytes:
# https://github.com/python/cpython/blob/d1dd6be613381b996b9071443ef081de8e5f3aff/Python/pyhash.c#L145-L191

Expand All @@ -554,6 +563,9 @@ def _Py_HashBytes(val, _len):
if (_len == 0):
return process_return(0)

# need to get the hash secret, the is a llvm global in the module
secret_addr = get_address_of_hash_secret()

if (_len < _Py_HASH_CUTOFF):
# TODO: this branch needs testing, needs a CPython setup for it!
# /* Optimize hashing of very small strings with inline DJBX33A. */
Expand All @@ -562,8 +574,11 @@ def _Py_HashBytes(val, _len):
_hash = ((_hash << 5) + _hash) + np.uint8(grab_byte(val, idx))

_hash ^= _len
_Py_HashSecret_djbx33a_suffix = grab_uint64_t(secret_addr, 2)
_hash ^= _Py_HashSecret_djbx33a_suffix
else:
_Py_HashSecret_siphash_k0 = grab_uint64_t(secret_addr, 0)
_Py_HashSecret_siphash_k1 = grab_uint64_t(secret_addr, 1)
tmp = _siphash24(types.uint64(_Py_HashSecret_siphash_k0),
types.uint64(_Py_HashSecret_siphash_k1),
val, _len)
Expand All @@ -581,13 +596,20 @@ def unicode_hash(val):
def impl(val):
kindwidth = _kind_to_byte_width(val._kind)
_len = len(val)
# NOTE This branching is disabled until a way of working out if the code
# is being loaded from the numba cache is written. The issue is
# basically that the current_hash gets baked in as a constant and even
# if were it possible to make it volatile then there's no knowing if
# the value stored in the native unicode struct hash slot is the hash
# computed from the current hashsecret.
#
# use the cache if possible
current_hash = val._hash
if current_hash != -1:
return current_hash
else:
# cannot write hash value to cache in the unicode struct due to
# pass by value on the struct making the struct member immutable
return _Py_HashBytes(val._data, kindwidth * _len)
#current_hash = val._hash
# if current_hash != -1:
# return current_hash
# else:
# cannot write hash value to cache in the unicode struct due to
# pass by value on the struct making the struct member immutable
return _Py_HashBytes(val._data, kindwidth * _len)

return impl
33 changes: 33 additions & 0 deletions numba/tests/compile_with_pycc.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,49 +15,64 @@
cc.use_nrt = False

# Note the first signature omits the return type


@cc.export('multf', (float32, float32))
@cc.export('multi', 'i4(i4, i4)')
def mult(a, b):
return a * b

# Test imported C globals such as Py_None, PyExc_ZeroDivisionError


@cc.export('get_none', 'none()')
def get_none():
return None


@cc.export('div', 'f8(f8, f8)')
def div(x, y):
return x / y


_two = 2

# This one can't be compiled by the legacy API as it doesn't execute
# the script in a proper module.


@cc.export('square', 'i8(i8)')
def square(u):
return u ** _two


# These ones need helperlib
cc_helperlib = CC('pycc_test_helperlib')
cc_helperlib.use_nrt = False


@cc_helperlib.export('power', 'i8(i8, i8)')
def power(u, v):
return u ** v


@cc_helperlib.export('sqrt', 'c16(c16)')
def sqrt(u):
return cmath.sqrt(u)


@cc_helperlib.export('size', 'i8(f8[:])')
def size(arr):
return arr.size

# Exercise linking to Numpy math functions


@cc_helperlib.export('np_sqrt', 'f8(f8)')
def np_sqrt(u):
return np.sqrt(u)


@cc_helperlib.export('spacing', 'f8(f8)')
def np_spacing(u):
return np.spacing(u)
Expand All @@ -70,14 +85,17 @@ def random_impl(seed):
np.random.seed(seed)
return np.random.random()


# These ones need NRT
cc_nrt = CC('pycc_test_nrt')


@cc_nrt.export('zero_scalar', 'f8(i4)')
def zero_scalar(n):
arr = np.zeros(n)
return arr[-1]


if has_blas:
# This one also needs BLAS
@cc_nrt.export('vector_dot', 'f8(i4)')
Expand All @@ -86,19 +104,34 @@ def vector_dot(n):
return np.dot(a, a)

# This one needs an environment


@cc_nrt.export('zeros', 'f8[:](i4)')
def zeros(n):
return np.zeros(n)

# requires list dtor, #issue3535


@cc_nrt.export('np_argsort', 'intp[:](float64[:])')
def np_argsort(arr):
return np.argsort(arr)


cc_hash_unicode = CC('pycc_test_unicode_hash')

# requires update of hashsecret global symbol


@cc_hash_unicode.export('hash_unicode', 'i8()')
def hash_unicode():
return hash("numba")

#
# Legacy API
#


exportmany(['multf f4(f4,f4)', 'multi i4(i4,i4)'])(mult)
# Needs to link to helperlib to due with complex arguments
# export('multc c16(c16,c16)')(mult)
Expand Down
57 changes: 57 additions & 0 deletions numba/tests/hash_cache_usecases.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# -*- coding: utf-8 -*-
"""
This file will be copied to a temporary directory in order to
exercise caching compiled Numba functions.

See test_hashing.py's TestHashInCache()
"""
from __future__ import division, print_function, absolute_import
import sys

import numpy as np
from numba import jit, utils
from numba.tests.support import TestCase


@jit(cache=True, nopython=True)
def simple_usecase(x):
return hash(x)


class _TestModule(TestCase):
"""
Tests for functionality of this module's function.
Note this does not define any "test_*" method, instead check_module()
should be called by hand.
"""

def check_module(self, mod, assert_run_from_cache=False):
f = mod.simple_usecase
ints = [np.uint8(123), np.int16(123), np.uint32(123), np.uint64(123)]
floats = [np.float32(123), np.float64(123), np.complex64(123 + 456j),
np.complex128(123 + 456j)]
tuples = [(1, 2, 3), (1.2, 3j, 4)]

inputs = ints + floats + tuples

if utils.IS_PY3:
strings = ['numba', "眼" , "🐍⚡"]
inputs.extend(strings)

for i in inputs:
self.assertPreciseEqual(simple_usecase(i), hash(i))

if assert_run_from_cache:
ntypes = 10
ndata = 1
expected = ntypes + ndata
self.assertEqual(sum(f.stats.cache_hits.values()), expected)

# For 2.x
def runTest(self):
raise NotImplementedError


def self_test(**kwargs):
mod = sys.modules[__name__]
_TestModule().check_module(mod, **kwargs)
1 change: 1 addition & 0 deletions numba/tests/test_dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -917,6 +917,7 @@ def pyfunc(A, i, j):

class BaseCacheTest(TestCase):
# This class is also used in test_cfunc.py.
# and test_hashing.py

# The source file that will be copied
usecases_file = None
Expand Down
Loading