Skip to content
This repository has been archived by the owner on Aug 26, 2022. It is now read-only.

Commit

Permalink
[bug 630718] Hash string filter values down to ints. Recreate your ta…
Browse files Browse the repository at this point in the history
…bles.

Not factoring up search app's implementation of crc32() since the notifications app is going to break off anyway.
  • Loading branch information
erikrose committed Feb 2, 2011
1 parent dcc38c3 commit 82e7ca9
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 8 deletions.
10 changes: 6 additions & 4 deletions apps/notifications/events.py
Expand Up @@ -9,7 +9,7 @@
from celery.decorators import task

from notifications.models import Watch, WatchFilter, EmailUser, multi_raw
from notifications.utils import merge
from notifications.utils import merge, hash_to_unsigned


def _unique_by_email(users_and_watches):
Expand Down Expand Up @@ -161,7 +161,7 @@ def filter_conditions():
join_params.append(k)
wheres.append('(f{n}.value=%s '
'OR f{n}.value IS NULL)'.format(n=n))
where_params.append(v)
where_params.append(hash_to_unsigned(v))
return joins, wheres, join_params + where_params

# Apply watchfilter constraints:
Expand Down Expand Up @@ -246,7 +246,8 @@ def _watches_belonging_to_user(cls, user_or_email, object_id=None,

# Apply 1-to-many filters:
for k, v in filters.iteritems():
watches = watches.filter(filters__name=k, filters__value=v)
watches = watches.filter(filters__name=k,
filters__value=hash_to_unsigned(v))

return watches

Expand Down Expand Up @@ -312,7 +313,8 @@ def notify(cls, user_or_email_, object_id=None, **filters):
**create_kwargs)
for k, v in filters.iteritems():
# TODO: Auto-hash v into an int if it isn't one?
WatchFilter.objects.create(watch=watch, name=k, value=v)
WatchFilter.objects.create(watch=watch, name=k,
value=hash_to_unsigned(v))
return watch

@classmethod
Expand Down
7 changes: 4 additions & 3 deletions apps/notifications/models.py
Expand Up @@ -112,9 +112,10 @@ class WatchFilter(ModelBase):
watch = models.ForeignKey(Watch, related_name='filters')
name = models.CharField(max_length=20)

# Either ints or hashes of enumerated strings. All we can't represent
# easily with this schema is arbitrary (open-vocab) strings.
value = models.IntegerField()
# Either an int or the hash of an item in a reasonably small set, which is
# indicated by the name field. See comments by hash_to_unsigned() for more
# on what is reasonably small.
value = models.PositiveIntegerField()

class Meta(object):
# There's no particular reason we couldn't allow multiple values for
Expand Down
14 changes: 14 additions & 0 deletions apps/notifications/tests/test_events.py
@@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
import mock
from nose.tools import eq_

Expand Down Expand Up @@ -261,6 +262,19 @@ def test_exact_matching(self):
assert not FilteredContentTypeEvent.is_notifying('hi@there.com',
color=3)

def test_hashing(self):
"""Strings should be hashed to ints, but ints should be left alone.
Unicode strings should also work.
"""
FilteredEvent.notify('red@x.com', color='red', flavor=6)
FilteredEvent.notify('blue@x.com', color=u'blüe', flavor=7)
assert FilteredEvent.is_notifying('red@x.com', color='red', flavor=6)
assert FilteredEvent.is_notifying('blue@x.com', color=u'blüe', flavor=7)
assert not FilteredEvent.is_notifying('blue@x.com', color='red',
flavor=7)


class CascadingDeleteTests(ModelsTestCase):
"""Cascading deletes on object_id + content_type."""
Expand Down
21 changes: 21 additions & 0 deletions apps/notifications/tests/test_models.py
@@ -1,3 +1,24 @@
from nose.tools import eq_

from notifications.models import WatchFilter
from notifications.tests import watch_filter
from sumo.tests import TestCase


# TODO: write a test to ensure that event types don't collide
# case-insensitive-ly
# E.g. No duplicates in this list: [et.lower() for et in EVENT_TYPES]


class WatchFilterTests(TestCase):
"""Tests for WatchFilter"""

def test_value_range(self):
"""Assert 0 and 2**32-1 both fit in the value field.
That's the range of our hash function.
"""
MAX_INT = 2 ** 32 - 1
watch_filter(name='maxint', value=MAX_INT).save()
eq_(MAX_INT, WatchFilter.objects.get(name='maxint').value)
28 changes: 28 additions & 0 deletions apps/notifications/utils.py
@@ -1,3 +1,6 @@
from zlib import crc32


class peekable(object):
"""Wrapper for an iterator to allow 1-item lookahead"""
# Lowercase to blend in with itertools. The fact that it's a class is an
Expand Down Expand Up @@ -53,3 +56,28 @@ def merge(*iterables, **kwargs):
_, p = min_or_max((key(p.peek()), p) for p in peekables)
yield p.next()
peekables = [p for p in peekables if p]


def hash_to_unsigned(data):
"""If data is a string, return an unsigned 4-byte int hash of it. If data
is already an int that fits those parameters, return it verbatim.
If data is an int outside that range, behavior is undefined at the moment.
We rely on the PositiveIntegerField on WatchFilter to scream if the int is
too long for the field.
"""
if isinstance(data, basestring):
# Return a CRC32 value identical across Python versions and platforms
# by stripping the sign bit as on
# http://docs.python.org/library/zlib.html. Though CRC32 is not a good
# general-purpose hash function, it has no collisions on a dictionary
# of 38,470 English words, which should be fine for the small sets that
# watch filters are designed to enumerate. As a bonus, it is fast and
# available as a built-in function in some DBs. If your set of filter
# values is very large or has different CRC32 distribution properties
# than English words, you might want to do your own hashing in your
# Event subclass and pass ints when specifying filter values.
return crc32(data.encode('utf-8')) & 0xffffffff
else:
return int(data)
2 changes: 1 addition & 1 deletion migrations/78-new-notifications.sql
Expand Up @@ -21,7 +21,7 @@ CREATE TABLE `notifications_watchfilter` (
`id` integer NOT NULL AUTO_INCREMENT,
`watch_id` integer NOT NULL,
`name` binary(20) NOT NULL,
`value` integer NOT NULL,
`value` integer UNSIGNED NOT NULL,
PRIMARY KEY (`id`),
UNIQUE (`name`, `watch_id`),
KEY `notifications_watchfilter_6e1bd094` (`watch_id`),
Expand Down

0 comments on commit 82e7ca9

Please sign in to comment.