Skip to content

Commit

Permalink
Improve hash ring value conversion
Browse files Browse the repository at this point in the history
The current method for computing the point on the hash ring converts
the MD5 digest into a 4-byte integer (and assumes big-endian). The
digest is actually a 16-byte value, so we are eliminating much of
the value, thus increasing the possibility of hash collisions.

This patch does two things:

  * Removes the endianness assumption
  * Uses the full value of the digest to create a long integer,
    which has unlimited precision in Python.

Change-Id: I554ec46f506cb8cdfd5878c11a905a3acfe92db0
  • Loading branch information
David Shrewsbury committed Oct 20, 2014
1 parent e6ac648 commit e2c1ebc
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 4 deletions.
14 changes: 10 additions & 4 deletions ironic/common/hash_ring.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

import bisect
import hashlib
import struct
import threading

from oslo.config import cfg
Expand Down Expand Up @@ -91,16 +90,23 @@ def __init__(self, hosts, replicas=None):
key_hash = hashlib.md5(key)
for p in range(2 ** CONF.hash_partition_exponent):
key_hash.update(key)
hashed_key = struct.unpack_from('>I', key_hash.digest())[0]
hashed_key = self._hash2int(key_hash)
self._host_hashes[hashed_key] = host
# Gather the (possibly colliding) resulting hashes into a bisectable
# list.
self._partitions = sorted(self._host_hashes.keys())

def _hash2int(self, key_hash):
"""Convert the given hash's digest to a numerical value for the ring.
:returns: An integer equivalent value of the digest.
"""
return int(key_hash.hexdigest(), 16)

def _get_partition(self, data):
try:
hashed_key = struct.unpack_from(
'>I', hashlib.md5(data).digest())[0]
key_hash = hashlib.md5(data)
hashed_key = self._hash2int(key_hash)
position = bisect.bisect(self._partitions, hashed_key)
return position if position < len(self._partitions) else 0
except TypeError:
Expand Down
17 changes: 17 additions & 0 deletions ironic/tests/test_hash_ring.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
# License for the specific language governing permissions and limitations
# under the License.

import hashlib

import mock
from oslo.config import cfg
from testtools import matchers

Expand All @@ -33,6 +36,20 @@ class HashRingTestCase(base.TestCase):
# fake -> foo, bar, baz
# fake-again -> bar, baz, foo

@mock.patch.object(hashlib, 'md5')
def test__hash2int_returns_int(self, mock_md5):
CONF.set_override('hash_partition_exponent', 0)
r1 = 32 * 'a'
r2 = 32 * 'b'
mock_md5.return_value.hexdigest.side_effect = [r1, r2]

hosts = ['foo', 'bar']
replicas = 1
ring = hash_ring.HashRing(hosts, replicas=replicas)

self.assertIn(int(r1, 16), ring._host_hashes)
self.assertIn(int(r2, 16), ring._host_hashes)

def test_create_ring(self):
hosts = ['foo', 'bar']
replicas = 2
Expand Down

0 comments on commit e2c1ebc

Please sign in to comment.