Skip to content

Commit

Permalink
[728422] split search view into sphinx and es
Browse files Browse the repository at this point in the history
* splits the search view into separate Sphinx and ES views
* removes Sphinx things from the ES view
* removes object_ids code from the ES view replacing it with ComposedList
* adds some TODO items to look at later
  • Loading branch information
willkg committed Mar 7, 2012
1 parent 2f02af7 commit 7f753ff
Show file tree
Hide file tree
Showing 4 changed files with 579 additions and 5 deletions.
74 changes: 73 additions & 1 deletion apps/search/tests/test__utils.py
@@ -1,6 +1,7 @@
from nose.tools import eq_

from search.utils import crc32
from search.utils import crc32, ComposedList
from sumo.tests import TestCase


def test_crc32_ascii():
Expand All @@ -16,3 +17,74 @@ def test_crc32_fr():
def test_crc32_ja():
"""crc32 works for japanese. Integer value taken from mysql's CRC32."""
eq_(696255294, crc32(u'\u6709\u52b9'))


class TestComposedList(TestCase):
# See documentation for ComposedList for how it should work.
def test_no_counts(self):
cl = ComposedList()

# No count groups, so length is 0
eq_(len(cl), 0)

# Index out of bounds raises an IndexError
self.assertRaises(IndexError, lambda: cl[0])

# Slices out of bounds return []
eq_(cl[0:1], [])

def test_one(self):
cl = ComposedList()
cl.set_count('test', 1)
eq_(len(cl), 1)
eq_(cl[0], ('test', 0))
eq_(cl[0:1], [('test', (0, 1))])

# Slices out of bounds should return []
eq_(cl[5:6], [])

def test_two(self):
cl = ComposedList()
cl.set_count('test1', 5)
cl.set_count('test2', 5)
eq_(len(cl), 10)
eq_(cl[0], ('test1', 0))
eq_(cl[2], ('test1', 2))

# 5th index in list is 0th index in test2
eq_(cl[5], ('test2', 0))

# 6th index in list is 1st index in test2
eq_(cl[6], ('test2', 1))

# Test slicing where start and stop are in the same group
eq_(cl[0:3], [('test1', (0, 3))])
eq_(cl[7:9], [('test2', (2, 4))])

# Test slicing where start and stop span groups
eq_(cl[3:9], [('test1', (3, 5)), ('test2', (0, 4))])

# Slices out of bounds return []
eq_(cl[20:25], [])

def test_three(self):
cl = ComposedList()
cl.set_count('test1', 5)
cl.set_count('test2', 1)
cl.set_count('test3', 2)

eq_(len(cl), 8)
# Slice across everything
eq_(cl[0:8], [('test1', (0, 5)),
('test2', (0, 1)),
('test3', (0, 2))])

# Slices out of bounds should return everything
eq_(cl[0:10], [('test1', (0, 5)),
('test2', (0, 1)),
('test3', (0, 2))])

# Slice across all three groups
eq_(cl[4:7], [('test1', (4, 5)),
('test2', (0, 1)),
('test3', (0, 1))])
122 changes: 122 additions & 0 deletions apps/search/utils.py
Expand Up @@ -61,3 +61,125 @@ def locale_or_default(locale):
if locale not in LOCALES:
locale = settings.LANGUAGE_CODE
return locale


class ComposedList(object):
"""Takes counts and pretends they're sublists of a big list
This helps in the case where you know the lengths of the sublists,
need to treat them all as a big list, but don't want to actually
have to generate the lists.
With ComposedList, you do pagination and other things
including slice the list and get the bounds of the sublists you
need allowing you to generate just those tiny bits rather than the
whole thing.
Handles "length", "index", and "slicing" as if they were
operations on the complete list.
**length**
Length of the ComposedList is the sum of the counts of the
sublists.
**index**
Returns a tuple (kind, index) for the index if the FDL
were one big list of (kind, index) tuples.
Raises IndexError if the index exceeds the list.
**slice**
Returns a list of (kind, (start, stop)) tuples for the kinds
that are in the slice bounds. The start and stop are not
indexes--they're slice start and stop, so it's start up to but
not including stop.
For example::
>>> cl = ComposedList()
>>> # group a has 5 items indexed 0 through 4
...
>>> cl.set_count('a', 5)
>>> # group b has 2 items indexed 0 and 1
...
>>> cl.set_count('b', 2)
>>> cl[1:7]
[('a', (1, 5)) ('b', (0, 2))]
This is the same if this were a real list:
>>> reallist = [('a', 0), ('a', 1), ('a', 2), ('a', 3)
... ('a', 4), ('b', 0), ('b', 1)]
>>> reallist[1:7]
[('a', 1), ('a', 2), ('a', 3), ('a', 4), ('b', 0), ('b', 1)]
"""
def __init__(self):
self.counts = []

def set_count(self, kind, count):
"""Adds a (kind, count) to the counts
>>> cl = ComposedList()
>>> cl.set_count('wiki', 10)
:arg kind: str. e.g. 'wiki'
:arg count: int. e.g. 40
.. Note::
The order you call set_count() is important. If you have
three groups of things, you need to call set_count() in the
order you want those things returned in a slice.
"""
self.counts.append((kind, count))

def __repr__(self):
return repr(self.counts)

def __len__(self):
"""Returns the total length of the composed list"""
return sum(mem[1] for mem in self.counts)

def __getitem__(self, key):
"""Returns the 'index' or 'slice' of this composed list"""
if isinstance(key, slice):
start = key.start
stop = key.stop
docs = []

# figure out the start
for mem in self.counts:
if start is not None:
if start <= mem[1]:
if stop <= mem[1]:
docs.append((mem[0], (start, stop)))
break
docs.append((mem[0], (start, mem[1])))
start = None
else:
start = start - mem[1]
stop = stop - mem[1]
else:
if stop <= mem[1]:
docs.append((mem[0], (0, stop)))
break
else:
docs.append((mem[0], (0, mem[1])))
stop = stop - mem[1]

return docs

if isinstance(key, int):
for mem in self.counts:
if key < mem[1]:
return (mem[0], key)
else:
key = key - mem[1]
if key >= 0:
raise IndexError('Index exceeded list length.')

0 comments on commit 7f753ff

Please sign in to comment.