Skip to content
Permalink
 
 
Cannot retrieve contributors at this time
719 lines (603 sloc) 25.5 KB
"""
Hash implementations for Numba types
"""
import math
import numpy as np
import sys
import ctypes
import warnings
from collections import namedtuple
import llvmlite.binding as ll
import llvmlite.llvmpy.core as lc
from llvmlite import ir
from numba.core.extending import (
overload, overload_method, intrinsic, register_jitable)
from numba.core import errors
from numba.core import types, utils
from numba.core.unsafe.bytes import grab_byte, grab_uint64_t
_py38_or_later = utils.PYVERSION >= (3, 8)
# This is Py_hash_t, which is a Py_ssize_t, which has sizeof(size_t):
# https://github.com/python/cpython/blob/d1dd6be613381b996b9071443ef081de8e5f3aff/Include/pyport.h#L91-L96 # noqa: E501
_hash_width = sys.hash_info.width
_Py_hash_t = getattr(types, 'int%s' % _hash_width)
_Py_uhash_t = getattr(types, 'uint%s' % _hash_width)
# Constants from CPython source, obtained by various means:
# https://github.com/python/cpython/blob/d1dd6be613381b996b9071443ef081de8e5f3aff/Include/pyhash.h # noqa: E501
_PyHASH_INF = sys.hash_info.inf
_PyHASH_NAN = sys.hash_info.nan
_PyHASH_MODULUS = _Py_uhash_t(sys.hash_info.modulus)
_PyHASH_BITS = 31 if types.intp.bitwidth == 32 else 61 # mersenne primes
_PyHASH_MULTIPLIER = 0xf4243 # 1000003UL
_PyHASH_IMAG = _PyHASH_MULTIPLIER
_PyLong_SHIFT = sys.int_info.bits_per_digit
_Py_HASH_CUTOFF = sys.hash_info.cutoff
_Py_hashfunc_name = sys.hash_info.algorithm
# hash(obj) is implemented by calling obj.__hash__()
@overload(hash)
def hash_overload(obj):
def impl(obj):
return obj.__hash__()
return impl
@register_jitable
def process_return(val):
asint = _Py_hash_t(val)
if (asint == int(-1)):
asint = int(-2)
return asint
# This is a translation of CPython's _Py_HashDouble:
# https://github.com/python/cpython/blob/d1dd6be613381b996b9071443ef081de8e5f3aff/Python/pyhash.c#L34-L129 # noqa: E501
@register_jitable(locals={'x': _Py_uhash_t,
'y': _Py_uhash_t,
'm': types.double,
'e': types.intc,
'sign': types.intc,
'_PyHASH_MODULUS': _Py_uhash_t,
'_PyHASH_BITS': types.intc})
def _Py_HashDouble(v):
if not np.isfinite(v):
if (np.isinf(v)):
if (v > 0):
return _PyHASH_INF
else:
return -_PyHASH_INF
else:
return _PyHASH_NAN
m, e = math.frexp(v)
sign = 1
if (m < 0):
sign = -1
m = -m
# process 28 bits at a time; this should work well both for binary
# and hexadecimal floating point.
x = 0
while (m):
x = ((x << 28) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - 28)
m *= 268435456.0 # /* 2**28 */
e -= 28
y = int(m) # /* pull out integer part */
m -= y
x += y
if x >= _PyHASH_MODULUS:
x -= _PyHASH_MODULUS
# /* adjust for the exponent; first reduce it modulo _PyHASH_BITS */
if e >= 0:
e = e % _PyHASH_BITS
else:
e = _PyHASH_BITS - 1 - ((-1 - e) % _PyHASH_BITS)
x = ((x << e) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - e)
x = x * sign
return process_return(x)
@intrinsic
def _fpext(tyctx, val):
def impl(cgctx, builder, signature, args):
val = args[0]
return builder.fpext(val, lc.Type.double())
sig = types.float64(types.float32)
return sig, impl
# This is a translation of CPython's long_hash, but restricted to the numerical
# domain reachable by int64/uint64 (i.e. no BigInt like support):
# https://github.com/python/cpython/blob/d1dd6be613381b996b9071443ef081de8e5f3aff/Objects/longobject.c#L2934-L2989 # noqa: E501
# obdigit is a uint32_t which is typedef'd to digit
# int32_t is typedef'd to sdigit
@register_jitable(locals={'x': _Py_uhash_t,
'p1': _Py_uhash_t,
'p2': _Py_uhash_t,
'p3': _Py_uhash_t,
'p4': _Py_uhash_t,
'_PyHASH_MODULUS': _Py_uhash_t,
'_PyHASH_BITS': types.int32,
'_PyLong_SHIFT': types.int32,})
def _long_impl(val):
# This function assumes val came from a long int repr with val being a
# uint64_t this means having to split the input into PyLong_SHIFT size
# chunks in an unsigned hash wide type, max numba can handle is a 64bit int
# mask to select low _PyLong_SHIFT bits
_tmp_shift = 32 - _PyLong_SHIFT
mask_shift = (~types.uint32(0x0)) >> _tmp_shift
# a 64bit wide max means Numba only needs 3 x 30 bit values max,
# or 5 x 15 bit values max on 32bit platforms
i = (64 // _PyLong_SHIFT) + 1
# alg as per hash_long
x = 0
p3 = (_PyHASH_BITS - _PyLong_SHIFT)
for idx in range(i - 1, -1, -1):
p1 = x << _PyLong_SHIFT
p2 = p1 & _PyHASH_MODULUS
p4 = x >> p3
x = p2 | p4
# the shift and mask splits out the `ob_digit` parts of a Long repr
x += types.uint32((val >> idx * _PyLong_SHIFT) & mask_shift)
if x >= _PyHASH_MODULUS:
x -= _PyHASH_MODULUS
return _Py_hash_t(x)
# This has no CPython equivalent, CPython uses long_hash.
@overload_method(types.Integer, '__hash__')
@overload_method(types.Boolean, '__hash__')
def int_hash(val):
_HASH_I64_MIN = -2 if sys.maxsize <= 2 ** 32 else -4
_SIGNED_MIN = types.int64(-0x8000000000000000)
# Find a suitable type to hold a "big" value, i.e. iinfo(ty).min/max
# this is to ensure e.g. int32.min is handled ok as it's abs() is its value
_BIG = types.int64 if getattr(val, 'signed', False) else types.uint64
# this is a bit involved due to the CPython repr of ints
def impl(val):
# If the magnitude is under PyHASH_MODULUS, just return the
# value val as the hash, couple of special cases if val == val:
# 1. it's 0, in which case return 0
# 2. it's signed int minimum value, return the value CPython computes
# but Numba cannot as there's no type wide enough to hold the shifts.
#
# If the magnitude is greater than PyHASH_MODULUS then... if the value
# is negative then negate it switch the sign on the hash once computed
# and use the standard wide unsigned hash implementation
val = _BIG(val)
mag = abs(val)
if mag < _PyHASH_MODULUS:
if val == 0:
ret = 0
elif val == _SIGNED_MIN: # e.g. int64 min, -0x8000000000000000
ret = _Py_hash_t(_HASH_I64_MIN)
else:
ret = _Py_hash_t(val)
else:
needs_negate = False
if val < 0:
val = -val
needs_negate = True
ret = _long_impl(val)
if needs_negate:
ret = -ret
return process_return(ret)
return impl
# This is a translation of CPython's float_hash:
# https://github.com/python/cpython/blob/d1dd6be613381b996b9071443ef081de8e5f3aff/Objects/floatobject.c#L528-L532 # noqa: E501
@overload_method(types.Float, '__hash__')
def float_hash(val):
if val.bitwidth == 64:
def impl(val):
hashed = _Py_HashDouble(val)
return hashed
else:
def impl(val):
# widen the 32bit float to 64bit
fpextended = np.float64(_fpext(val))
hashed = _Py_HashDouble(fpextended)
return hashed
return impl
# This is a translation of CPython's complex_hash:
# https://github.com/python/cpython/blob/d1dd6be613381b996b9071443ef081de8e5f3aff/Objects/complexobject.c#L408-L428 # noqa: E501
@overload_method(types.Complex, '__hash__')
def complex_hash(val):
def impl(val):
hashreal = hash(val.real)
hashimag = hash(val.imag)
# Note: if the imaginary part is 0, hashimag is 0 now,
# so the following returns hashreal unchanged. This is
# important because numbers of different types that
# compare equal must have the same hash value, so that
# hash(x + 0*j) must equal hash(x).
combined = hashreal + _PyHASH_IMAG * hashimag
return process_return(combined)
return impl
if _py38_or_later:
# Python 3.8 strengthened its hash alg for tuples.
# This is a translation of CPython's tuplehash for Python >=3.8
# https://github.com/python/cpython/blob/b738237d6792acba85b1f6e6c8993a812c7fd815/Objects/tupleobject.c#L338-L391 # noqa: E501
# These consts are needed for this alg variant, they are from:
# https://github.com/python/cpython/blob/b738237d6792acba85b1f6e6c8993a812c7fd815/Objects/tupleobject.c#L353-L363 # noqa: E501
if _Py_uhash_t.bitwidth // 8 > 4:
_PyHASH_XXPRIME_1 = _Py_uhash_t(11400714785074694791)
_PyHASH_XXPRIME_2 = _Py_uhash_t(14029467366897019727)
_PyHASH_XXPRIME_5 = _Py_uhash_t(2870177450012600261)
@register_jitable(locals={'x': types.uint64})
def _PyHASH_XXROTATE(x):
# Rotate left 31 bits
return ((x << types.uint64(31)) | (x >> types.uint64(33)))
else:
_PyHASH_XXPRIME_1 = _Py_uhash_t(2654435761)
_PyHASH_XXPRIME_2 = _Py_uhash_t(2246822519)
_PyHASH_XXPRIME_5 = _Py_uhash_t(374761393)
@register_jitable(locals={'x': types.uint64})
def _PyHASH_XXROTATE(x):
# Rotate left 13 bits
return ((x << types.uint64(13)) | (x >> types.uint64(19)))
# Python 3.7+ has literal_unroll, this means any homogeneous and
# heterogeneous tuples can use the same alg and just be unrolled.
from numba import literal_unroll
@register_jitable(locals={'acc': _Py_uhash_t, 'lane': _Py_uhash_t,
'_PyHASH_XXPRIME_5': _Py_uhash_t,
'_PyHASH_XXPRIME_1': _Py_uhash_t,
'tl': _Py_uhash_t})
def _tuple_hash(tup):
tl = len(tup)
acc = _PyHASH_XXPRIME_5
for x in literal_unroll(tup):
lane = hash(x)
if lane == _Py_uhash_t(-1):
return -1
acc += lane * _PyHASH_XXPRIME_2
acc = _PyHASH_XXROTATE(acc)
acc *= _PyHASH_XXPRIME_1
acc += tl ^ (_PyHASH_XXPRIME_5 ^ _Py_uhash_t(3527539))
if acc == _Py_uhash_t(-1):
return process_return(1546275796)
return process_return(acc)
else:
# This is a translation of CPython's tuplehash:
# https://github.com/python/cpython/blob/d1dd6be613381b996b9071443ef081de8e5f3aff/Objects/tupleobject.c#L347-L369 # noqa: E501
@register_jitable(locals={'x': _Py_uhash_t,
'y': _Py_hash_t,
'mult': _Py_uhash_t,
'l': _Py_hash_t, })
def _tuple_hash(tup):
tl = len(tup)
mult = _PyHASH_MULTIPLIER
x = _Py_uhash_t(0x345678)
# in C this is while(--l >= 0), i is indexing tup instead of *tup++
for i, l in enumerate(range(tl - 1, -1, -1)):
y = hash(tup[i])
xxory = (x ^ y)
x = xxory * mult
mult += _Py_hash_t((_Py_uhash_t(82520) + l + l))
x += _Py_uhash_t(97531)
return process_return(x)
# This is an obfuscated translation of CPython's tuplehash:
# https://github.com/python/cpython/blob/d1dd6be613381b996b9071443ef081de8e5f3aff/Objects/tupleobject.c#L347-L369 # noqa: E501
# The obfuscation occurs for a heterogeneous tuple as each tuple member needs
# a potentially different hash() function calling for it. This cannot be done at
# runtime as there's no way to iterate a heterogeneous tuple, so this is
# achieved by essentially unrolling the loop over the members and inserting a
# per-type hash function call for each member, and then simply computing the
# hash value in an inlined/rolling fashion.
@intrinsic
def _tuple_hash_resolve(tyctx, val):
def impl(cgctx, builder, signature, args):
typingctx = cgctx.typing_context
fnty = typingctx.resolve_value_type(hash)
tupty, = signature.args
tup, = args
lty = cgctx.get_value_type(signature.return_type)
x = ir.Constant(lty, 0x345678)
mult = ir.Constant(lty, _PyHASH_MULTIPLIER)
shift = ir.Constant(lty, 82520)
tl = len(tupty)
for i, packed in enumerate(zip(tupty.types, range(tl - 1, -1, -1))):
ty, l = packed
sig = fnty.get_call_type(tyctx, (ty,), {})
impl = cgctx.get_function(fnty, sig)
tuple_val = builder.extract_value(tup, i)
y = impl(builder, (tuple_val,))
xxory = builder.xor(x, y)
x = builder.mul(xxory, mult)
lconst = ir.Constant(lty, l)
mult = builder.add(mult, shift)
mult = builder.add(mult, lconst)
mult = builder.add(mult, lconst)
x = builder.add(x, ir.Constant(lty, 97531))
return x
sig = _Py_hash_t(val)
return sig, impl
@overload_method(types.BaseTuple, '__hash__')
def tuple_hash(val):
if _py38_or_later or isinstance(val, types.Sequence):
def impl(val):
return _tuple_hash(val)
return impl
else:
def impl(val):
hashed = _Py_hash_t(_tuple_hash_resolve(val))
return process_return(hashed)
return impl
# ------------------------------------------------------------------------------
# String/bytes hashing needs hashseed info, this is from:
# https://stackoverflow.com/a/41088757
# with thanks to Martijn Pieters
#
# Developer note:
# CPython makes use of an internal "hashsecret" which is essentially a struct
# containing some state that is set on CPython initialization and contains magic
# numbers used particularly in unicode/string hashing. This code binds to the
# Python runtime libraries in use by the current process and reads the
# "hashsecret" state so that it can be used by Numba. As this is done at runtime
# the behaviour and influence of the PYTHONHASHSEED environment variable is
# accommodated.
from ctypes import ( # noqa
c_size_t,
c_ubyte,
c_uint64,
pythonapi,
Structure,
Union,
) # noqa
class FNV(Structure):
_fields_ = [
('prefix', c_size_t),
('suffix', c_size_t)
]
class SIPHASH(Structure):
_fields_ = [
('k0', c_uint64),
('k1', c_uint64),
]
class DJBX33A(Structure):
_fields_ = [
('padding', c_ubyte * 16),
('suffix', c_size_t),
]
class EXPAT(Structure):
_fields_ = [
('padding', c_ubyte * 16),
('hashsalt', c_size_t),
]
class _Py_HashSecret_t(Union):
_fields_ = [
# ensure 24 bytes
('uc', c_ubyte * 24),
# two Py_hash_t for FNV
('fnv', FNV),
# two uint64 for SipHash24
('siphash', SIPHASH),
# a different (!) Py_hash_t for small string optimization
('djbx33a', DJBX33A),
('expat', EXPAT),
]
_hashsecret_entry = namedtuple('_hashsecret_entry', ['symbol', 'value'])
# Only a few members are needed at present
def _build_hashsecret():
"""Read hash secret from the Python process
Returns
-------
info : dict
- keys are "djbx33a_suffix", "siphash_k0", siphash_k1".
- values are the namedtuple[symbol:str, value:int]
"""
# Read hashsecret and inject it into the LLVM symbol map under the
# prefix `_numba_hashsecret_`.
pyhashsecret = _Py_HashSecret_t.in_dll(pythonapi, '_Py_HashSecret')
info = {}
def inject(name, val):
symbol_name = "_numba_hashsecret_{}".format(name)
val = ctypes.c_uint64(val)
addr = ctypes.addressof(val)
ll.add_symbol(symbol_name, addr)
info[name] = _hashsecret_entry(symbol=symbol_name, value=val)
inject('djbx33a_suffix', pyhashsecret.djbx33a.suffix)
inject('siphash_k0', pyhashsecret.siphash.k0)
inject('siphash_k1', pyhashsecret.siphash.k1)
return info
_hashsecret = _build_hashsecret()
# ------------------------------------------------------------------------------
if _Py_hashfunc_name in ('siphash24', 'fnv'):
# Check for use of the FNV hashing alg, warn users that it's not implemented
# and functionality relying of properties derived from hashing will be fine
# but hash values themselves are likely to be different.
if _Py_hashfunc_name == 'fnv':
msg = ("FNV hashing is not implemented in Numba. See PEP 456 "
"https://www.python.org/dev/peps/pep-0456/ "
"for rationale over not using FNV. Numba will continue to work, "
"but hashes for built in types will be computed using "
"siphash24. This will permit e.g. dictionaries to continue to "
"behave as expected, however anything relying on the value of "
"the hash opposed to hash as a derived property is likely to "
"not work as expected.")
warnings.warn(msg)
# This is a translation of CPython's siphash24 function:
# https://github.com/python/cpython/blob/d1dd6be613381b996b9071443ef081de8e5f3aff/Python/pyhash.c#L287-L413 # noqa: E501
# /* *********************************************************************
# <MIT License>
# Copyright (c) 2013 Marek Majkowski <marek@popcount.org>
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
# </MIT License>
# Original location:
# https://github.com/majek/csiphash/
# Solution inspired by code from:
# Samuel Neves (supercop/crypto_auth/siphash24/little)
#djb (supercop/crypto_auth/siphash24/little2)
# Jean-Philippe Aumasson (https://131002.net/siphash/siphash24.c)
# Modified for Python by Christian Heimes:
# - C89 / MSVC compatibility
# - _rotl64() on Windows
# - letoh64() fallback
# */
@register_jitable(locals={'x': types.uint64,
'b': types.uint64, })
def _ROTATE(x, b):
return types.uint64(((x) << (b)) | ((x) >> (types.uint64(64) - (b))))
@register_jitable(locals={'a': types.uint64,
'b': types.uint64,
'c': types.uint64,
'd': types.uint64,
's': types.uint64,
't': types.uint64, })
def _HALF_ROUND(a, b, c, d, s, t):
a += b
c += d
b = _ROTATE(b, s) ^ a
d = _ROTATE(d, t) ^ c
a = _ROTATE(a, 32)
return a, b, c, d
@register_jitable(locals={'v0': types.uint64,
'v1': types.uint64,
'v2': types.uint64,
'v3': types.uint64, })
def _DOUBLE_ROUND(v0, v1, v2, v3):
v0, v1, v2, v3 = _HALF_ROUND(v0, v1, v2, v3, 13, 16)
v2, v1, v0, v3 = _HALF_ROUND(v2, v1, v0, v3, 17, 21)
v0, v1, v2, v3 = _HALF_ROUND(v0, v1, v2, v3, 13, 16)
v2, v1, v0, v3 = _HALF_ROUND(v2, v1, v0, v3, 17, 21)
return v0, v1, v2, v3
@register_jitable(locals={'v0': types.uint64,
'v1': types.uint64,
'v2': types.uint64,
'v3': types.uint64,
'b': types.uint64,
'mi': types.uint64,
'tmp': types.Array(types.uint64, 1, 'C'),
't': types.uint64,
'mask': types.uint64,
'jmp': types.uint64,
'ohexefef': types.uint64})
def _siphash24(k0, k1, src, src_sz):
b = types.uint64(src_sz) << 56
v0 = k0 ^ types.uint64(0x736f6d6570736575)
v1 = k1 ^ types.uint64(0x646f72616e646f6d)
v2 = k0 ^ types.uint64(0x6c7967656e657261)
v3 = k1 ^ types.uint64(0x7465646279746573)
idx = 0
while (src_sz >= 8):
mi = grab_uint64_t(src, idx)
idx += 1
src_sz -= 8
v3 ^= mi
v0, v1, v2, v3 = _DOUBLE_ROUND(v0, v1, v2, v3)
v0 ^= mi
# this is the switch fallthrough:
# https://github.com/python/cpython/blob/d1dd6be613381b996b9071443ef081de8e5f3aff/Python/pyhash.c#L390-L400 # noqa: E501
t = types.uint64(0x0)
boffset = idx * 8
ohexefef = types.uint64(0xff)
if src_sz >= 7:
jmp = (6 * 8)
mask = ~types.uint64(ohexefef << jmp)
t = (t & mask) | (types.uint64(grab_byte(src, boffset + 6))
<< jmp)
if src_sz >= 6:
jmp = (5 * 8)
mask = ~types.uint64(ohexefef << jmp)
t = (t & mask) | (types.uint64(grab_byte(src, boffset + 5))
<< jmp)
if src_sz >= 5:
jmp = (4 * 8)
mask = ~types.uint64(ohexefef << jmp)
t = (t & mask) | (types.uint64(grab_byte(src, boffset + 4))
<< jmp)
if src_sz >= 4:
t &= types.uint64(0xffffffff00000000)
for i in range(4):
jmp = i * 8
mask = ~types.uint64(ohexefef << jmp)
t = (t & mask) | (types.uint64(grab_byte(src, boffset + i))
<< jmp)
if src_sz >= 3:
jmp = (2 * 8)
mask = ~types.uint64(ohexefef << jmp)
t = (t & mask) | (types.uint64(grab_byte(src, boffset + 2))
<< jmp)
if src_sz >= 2:
jmp = (1 * 8)
mask = ~types.uint64(ohexefef << jmp)
t = (t & mask) | (types.uint64(grab_byte(src, boffset + 1))
<< jmp)
if src_sz >= 1:
mask = ~(ohexefef)
t = (t & mask) | (types.uint64(grab_byte(src, boffset + 0)))
b |= t
v3 ^= b
v0, v1, v2, v3 = _DOUBLE_ROUND(v0, v1, v2, v3)
v0 ^= b
v2 ^= ohexefef
v0, v1, v2, v3 = _DOUBLE_ROUND(v0, v1, v2, v3)
v0, v1, v2, v3 = _DOUBLE_ROUND(v0, v1, v2, v3)
t = (v0 ^ v1) ^ (v2 ^ v3)
return t
else:
msg = "Unsupported hashing algorithm in use %s" % _Py_hashfunc_name
raise ValueError(msg)
@intrinsic
def _inject_hashsecret_read(tyctx, name):
"""Emit code to load the hashsecret.
"""
if not isinstance(name, types.StringLiteral):
raise errors.TypingError("requires literal string")
sym = _hashsecret[name.literal_value].symbol
resty = types.uint64
sig = resty(name)
def impl(cgctx, builder, sig, args):
mod = builder.module
try:
# Search for existing global
gv = mod.get_global(sym)
except KeyError:
# Inject the symbol if not already exist.
gv = ir.GlobalVariable(mod, ir.IntType(64), name=sym)
v = builder.load(gv)
return v
return sig, impl
def _load_hashsecret(name):
return _hashsecret[name].value
@overload(_load_hashsecret)
def _impl_load_hashsecret(name):
def imp(name):
return _inject_hashsecret_read(name)
return imp
# This is a translation of CPythons's _Py_HashBytes:
# https://github.com/python/cpython/blob/d1dd6be613381b996b9071443ef081de8e5f3aff/Python/pyhash.c#L145-L191 # noqa: E501
@register_jitable(locals={'_hash': _Py_uhash_t})
def _Py_HashBytes(val, _len):
if (_len == 0):
return process_return(0)
if (_len < _Py_HASH_CUTOFF):
# TODO: this branch needs testing, needs a CPython setup for it!
# /* Optimize hashing of very small strings with inline DJBX33A. */
_hash = _Py_uhash_t(5381) # /* DJBX33A starts with 5381 */
for idx in range(_len):
_hash = ((_hash << 5) + _hash) + np.uint8(grab_byte(val, idx))
_hash ^= _len
_hash ^= _load_hashsecret('djbx33a_suffix')
else:
tmp = _siphash24(types.uint64(_load_hashsecret('siphash_k0')),
types.uint64(_load_hashsecret('siphash_k1')),
val, _len)
_hash = process_return(tmp)
return process_return(_hash)
# This is an approximate translation of CPython's unicode_hash:
# https://github.com/python/cpython/blob/d1dd6be613381b996b9071443ef081de8e5f3aff/Objects/unicodeobject.c#L11635-L11663 # noqa: E501
@overload_method(types.UnicodeType, '__hash__')
def unicode_hash(val):
from numba.cpython.unicode import _kind_to_byte_width
def impl(val):
kindwidth = _kind_to_byte_width(val._kind)
_len = len(val)
# use the cache if possible
current_hash = val._hash
if current_hash != -1:
return current_hash
else:
# cannot write hash value to cache in the unicode struct due to
# pass by value on the struct making the struct member immutable
return _Py_HashBytes(val._data, kindwidth * _len)
return impl
You can’t perform that action at this time.