Permalink
Browse files

DBShardManagerTests previously provided test coverage for basic query…

… execution logic. Move this to query_test.py, leaving behind just smoke tests on the ShardManager.
  • Loading branch information...
1 parent 73c41ae commit d920de3f1ec77f3b5ed148f381026d43567502b3 @natduca committed Mar 19, 2012
Showing with 202 additions and 195 deletions.
  1. +8 −0 src/db_index_shard_test.py
  2. +7 −2 src/db_indexer.py
  3. +2 −5 src/db_shard_manager.py
  4. +29 −79 src/db_shard_manager_test.py
  5. +73 −67 src/query.py
  6. +83 −42 src/query_test.py
@@ -15,6 +15,14 @@
import unittest
import re
+def _assertSetEquals(self, ref,src):
+ src_set = set(src)
+ ref_set = set(ref)
+ if set(ref) != set(src):
+ src_only = src_set.difference(ref_set)
+ ref_only = ref_set.difference(src_set)
+ self.assertEquals(ref_set,src_set)
+
class DBIndexShardTest(unittest.TestCase):
def test_filters(self):
m = db_index_shard.DBIndexShard({})
View
@@ -17,8 +17,13 @@
import json
class MockIndexer(object):
- def __init__(self, filename):
- self.files_by_basename = json.load(open(filename))
+ def __init__(self, files = [], files_by_basename = {}):
+ self.files_by_basename = files_by_basename
+ for f in files:
+ bn = os.path.basename(f)
+ if bn not in self.files_by_basename:
+ self.files_by_basename[bn] = []
+ self.files_by_basename[bn].append(f)
class DBIndexer(object):
def __init__(self, dirs, dir_cache):
View
@@ -38,7 +38,7 @@ class DBShardManager(object):
The DBShardManager takes a complete list of basenames in the database and manages the sharding
of those basenames using the multiprocessing module.
"""
- def __init__(self, indexer, threaded = True):
+ def __init__(self, indexer):
self.files = []
self.files_by_lower_basename = dict()
for basename,files_with_basename in indexer.files_by_basename.items():
@@ -49,10 +49,7 @@ def __init__(self, indexer, threaded = True):
self.files_by_lower_basename[lower_basename] = files_with_basename
self.files.extend(files_with_basename)
- if threaded:
- N = min(multiprocessing.cpu_count(), 4) # test for scaling beyond 4
- else:
- N = 1
+ N = min(multiprocessing.cpu_count(), 4) # test for scaling beyond 4
chunks = self._make_chunks(list(indexer.files_by_basename.items()), N)
@@ -20,86 +20,35 @@
from query import Query
from query_cache import QueryCache
-
-FILES_BY_BASENAME = None
-
-class DBShardManagerWithSearchMethod(db_shard_manager.DBShardManager):
- # TODO: remove this method once the db_shard_manager_tests stop testing using queries
- def search(self, *args, **kwargs):
- import query_cache
- if not hasattr(self, 'query_cache'):
- self.query_cache = query_cache.QueryCache()
- query = Query.from_kargs(args, kwargs)
- return query.execute(self, self.query_cache)
-
-class DBShardManagerTestBase(object):
- def setUp(self):
- mock_indexer = db_indexer.MockIndexer('test_data/cr_files_by_basename_five_percent.json')
- self.index = DBShardManagerWithSearchMethod(mock_indexer,threaded=self.threaded)
-
- def tearDown(self):
- self.index.close()
-
- def test_case_sensitive_query(self):
- self.assertTrue('~/chrome/src/third_party/tlslite/tlslite/integration/ClientHelper.py' in self.index.search('ClientHelper').filenames)
-
- def test_wordstart_query(self):
- self.assertTrue('~/chrome/src/content/browser/renderer_host/render_widget_host_gtk.cc' in self.index.search('rwh').filenames)
- self.assertTrue('~/chrome/src/content/browser/renderer_host/render_widget_host_gtk.cc' in self.index.search('rwhg').filenames)
-
- def test_wordstart_query2(self):
- self.assertTrue('~/chrome/src/third_party/WebKit/Source/WebCore/css/MediaFeatureNames.cpp' in self.index.search('mfn').filenames)
- self.assertTrue('~/chrome/src/third_party/WebKit/Source/WebCore/css/MediaFeatureNames.cpp' in self.index.search('MFN').filenames)
-
- def test_case_insensitive_query(self):
- self.assertTrue("~/ndbg/quickopen/src/db_proxy_test.py" in self.index.search('db_proxy_test').filenames)
- self.assertTrue("~/ndbg/quickopen/test_data/something/something_file.txt" in self.index.search('something_file.txt').filenames)
-
- def test_case_query_with_extension(self):
- self.assertTrue("~/ndbg/quickopen/src/db_proxy_test.py" in self.index.search('db_proxy_test.py').filenames)
- self.assertTrue('~/chrome/src/third_party/tlslite/tlslite/integration/ClientHelper.py' in self.index.search('ClientHelper.py').filenames)
-
- def _assertSetEquals(self, ref,src):
- src_set = set(src)
- ref_set = set(ref)
- if set(ref) != set(src):
- src_only = src_set.difference(ref_set)
- ref_only = ref_set.difference(src_set)
- self.assertEquals(ref_set,src_set)
-
- def test_dir_query(self):
- src = self.index.search('src/').filenames
- ref = [u'~/chrome/src/third_party/sqlite/src/src/analyze.c', u'~/chrome/src/third_party/WebKit/Source/WebKit/chromium/src/VideoFrameChromiumImpl.h', u'~/chrome/src/third_party/WebKit/Source/WebKit/chromium/src/WorkerFileWriterCallbacksBridge.cpp', u'~/chrome/src/third_party/WebKit/Source/WebKit/chromium/src/WebFontImpl.cpp', u'~/chrome/src/third_party/sqlite/src/src/test_journal.c', u'~/chrome/src/third_party/sqlite/src/src/btreeInt.h', u'~/chrome/src/third_party/WebKit/Source/WebKit/chromium/src/StorageNamespaceProxy.h', u'~/chrome/src/third_party/WebKit/Tools/iExploder/iexploder-1.7.2/src/config.yaml', u'~/chrome/src/third_party/libxml/src/nanohttp.c', u'~/chrome/src/sandbox/src/sandbox.cc', u'~/chrome/src/third_party/WebKit/Source/WebKit/chromium/src/WebFrameImpl.cpp', u'~/chrome/src/third_party/harfbuzz/src/harfbuzz-hangul.c', u'~/chrome/src/third_party/sqlite/src/src/loadext.c', u'~/chrome/src/third_party/sqlite/src/src/test_intarray.h', u'~/chrome/src/third_party/WebKit/Source/WebKit/chromium/src/ChromiumOSRandomSource.cpp', u'~/chrome/src/third_party/WebKit/Source/WebKit/chromium/src/IDBDatabaseCallbacksProxy.h', u'~/ndbg/quickopen/src/open_dialog_base.py', u'~/chrome/src/third_party/WebKit/Source/ThirdParty/gtest/src/gtest-filepath.cc', u'~/chrome/src/third_party/sqlite/src/mkopcodec.awk', u'~/chrome/src/third_party/sqlite/src/src/fkey.c', u'~/chrome/src/third_party/harfbuzz/src/harfbuzz-shape.h', u'~/chrome/src/third_party/libxml/src/testapi.c', u'~/chrome/src/third_party/sqlite/src/config.guess', u'~/chrome/src/third_party/libxml/src/config.guess', u'~/chrome/src/third_party/libxml/src/check-relaxng-test-suite.py', u'~/chrome/src/third_party/WebKit/Source/WebKit/chromium/src/WebEntities.cpp', u'~/chrome/src/third_party/harfbuzz-ng/src/hb-unicode-private.h', u'~/chrome/src/third_party/WebKit/Source/WebKit/chromium/src/WebIDBDatabaseError.cpp', u'~/chrome/src/third_party/sqlite/src/src/test_tclvar.c', u'~/chrome/src/third_party/libxml/src/configure.in', u'~/chrome/src/third_party/WebKit/Source/ThirdParty/gyp/test/include_dirs/src/includes.c', u'~/chrome/src/third_party/WebKit/Source/WebKit/chromium/src/ExternalPopupMenu.cpp', u'~/chrome/src/sandbox/src/sandbox.vcproj', u'~/ndbg/quickopen/src/db_test.pyc', u'~/chrome/src/third_party/WebKit/Source/ThirdParty/gyp/test/builddir/src/func3.c', u'~/chrome/src/third_party/WebKit/Source/WebKit/chromium/src/WebFontDescription.cpp', u'~/chrome/src/sandbox/src/nt_internals.h', u'~/chrome/src/sandbox/src/sid.h', u'~/chrome/src/tools/symsrc/COPYING-pefile', u'~/chrome/src/third_party/harfbuzz-ng/src/hb-common.h', u'~/chrome/src/third_party/sqlite/src/src/pager.h', u'~/chrome/src/AUTHORS', u'~/chrome/src/third_party/libxml/src/AUTHORS', u'~/chrome/src/third_party/tcmalloc/vendor/src/stacktrace_x86_64-inl.h', u'~/chrome/src/third_party/tcmalloc/chromium/src/stacktrace_x86_64-inl.h', u'~/chrome/src/sandbox/src/sandbox_utils.h', u'~/chrome/src/sandbox/src/named_pipe_dispatcher.h', u'~/chrome/src/third_party/WebKit/Source/ThirdParty/gtest/src/gtest-port.cc', u'~/chrome/src/third_party/harfbuzz/src/harfbuzz.c', u'~/chrome/src/third_party/WebKit/Source/WebKit/chromium/src/WebDeviceOrientation.cpp', u'~/chrome/src/third_party/sqlite/src/src/pcache1.c', u'~/chrome/src/third_party/WebKit/Source/WebKit/chromium/src/WebIDBKey.cpp', u'~/chrome/src/third_party/WebKit/Source/WebKit/chromium/src/VideoFrameChromiumImpl.cpp', u'~/ndbg/quickopen/src/dyn_object.pyc', u'~/chrome/src/third_party/sqlite/src/src/test_backup.c', u'~/chrome/src/third_party/tcmalloc/vendor/src/stacktrace_win32-inl.h', u'~/chrome/src/third_party/tcmalloc/chromium/src/stacktrace_win32-inl.h', u'~/ndbg/quickopen/src/db_proxy_test.py']
- self._assertSetEquals(set(ref),set(src))
-
- def test_emtpy_dir_query(self):
- self.assertEquals([], self.index.search('/').filenames)
-
- def test_two_dir_query(self):
- self.assertTrue("~/ndbg/quickopen/src/db_proxy_test.py" in self.index.search('quickopen/src/').filenames)
-
- def test_dir_and_name_query(self):
- self.assertTrue("~/ndbg/quickopen/src/db_proxy_test.py" in self.index.search('src/db_proxy_test.py').filenames)
-
-class DBShardManagerTestMT(unittest.TestCase, DBShardManagerTestBase):
- def setUp(self,*args,**kwargs):
- self.threaded = True
- unittest.TestCase.setUp(self,*args,**kwargs)
- DBShardManagerTestBase.setUp(self)
-
- def tearDown(self):
- DBShardManagerTestBase.tearDown(self)
-
-class DBShardManagerTestST(unittest.TestCase, DBShardManagerTestBase):
+class DBShardManagerTest(unittest.TestCase):
def setUp(self,*args,**kwargs):
- unittest.TestCase.setUp(self,*args,**kwargs)
- self.threaded = False
- DBShardManagerTestBase.setUp(self)
+ self.files = [
+ "a/b/csdf.txt",
+ "a/b/ghijkl.txt",
+ "a/dsfsfd.txt",
+ "k/dsfsfd.txt",
+ "k/sdf.txt",
+ ]
+ mock_indexer = db_indexer.MockIndexer(self.files)
+ self.shard_manager = db_shard_manager.DBShardManager(mock_indexer)
+
+ def test_props(self):
+ self.assertEquals(set(self.files),set(self.shard_manager.files))
+ self.assertEquals(set(["a/dsfsfd.txt", "k/dsfsfd.txt"]),
+ set(self.shard_manager.files_by_lower_basename["dsfsfd.txt"]))
+
+ def test_smoketest_basename_search(self):
+ """
+ Does a very simple smoketest on the shard search.
+ """
+ res, truncated = self.shard_manager.search_basenames("sdf", 100)
+ self.assertEquals(set(["csdf.txt", "sdf.txt"]), set(res))
+ self.assertFalse(truncated)
def test_chunker(self):
def validate(num_items,nchunks):
start_list = [(i,True) for i in range(num_items)]
- chunks = self.index._make_chunks(start_list,nchunks)
+ chunks = self.shard_manager._make_chunks(start_list,nchunks)
found_indices = set()
for chunk in chunks:
for i,j in chunk.items():
@@ -112,12 +61,13 @@ def validate(num_items,nchunks):
validate(10,3)
def tearDown(self):
- DBShardManagerTestBase.tearDown(self)
+ self.shard_manager.close()
class DBShardManagerPerfTest():
def __init__(self, testfile):
- mock_indexer = db_indexer.MockIndexer(testfile)
- self.index = db_shard_manager.DBShardManager(mock_indexer)
+ files_by_basename = json.load(open(filename))
+ mock_indexer = db_indexer.MockIndexer(files_by_basename = files_by_basename)
+ self.shard_manager = db_shard_manager.DBShardManager(mock_indexer)
def test_matcher_perf(self,max_hits):
print "%15s %s" % ("query", "time")
@@ -152,7 +102,7 @@ def test_matcher_perf(self,max_hits):
]
for q in PERF_QUERIES:
start = time.time()
- self.index.search(q,max_hits)
+ self.shard_manager.search(q,max_hits)
elapsed = time.time() - start
print '%15s %.3f' % (q ,elapsed)
View
@@ -36,6 +36,49 @@ def _is_exact_match(query_text, hit):
return True
return False
+def _apply_global_rank_adjustment(base_result):
+ def hit_cmp(x,y):
+ # compare on the rank
+ i = -cmp(x[1],y[1])
+ if i != 0:
+ return i
+ # if the ranks agree, compare on the filename,
+ # first by basename, then by fullname
+ x_base = os.path.basename(x[0])
+ y_base = os.path.basename(y[0])
+ j = cmp(x_base, y_base)
+ if j != 0:
+ return j
+ return cmp(x[0], y[0])
+
+ hits = list(base_result.hits())
+ hits.sort(hit_cmp)
+ return QueryResult(hits, base_result.truncated)
+
+def _filter_result_for_exact_matches(query_text, base_result):
+ """
+ Returns a new QueryResult object containing only filenames that exactly
+ match the provided query.
+ """
+ res = QueryResult()
+ res.truncated = base_result.truncated
+
+ for hit,rank in base_result.hits():
+ if _is_exact_match(query_text, hit):
+ res.filenames.append(hit)
+ res.ranks.append(rank)
+ return res
+
+def _is_dirmatch(lower_dirpart_query, filename):
+ if lower_dirpart_query == '':
+ return True
+
+ dirname = os.path.dirname(filename)
+ lower_dirname = dirname.lower()
+ if lower_dirname.endswith(lower_dirpart_query):
+ return True
+ return False
+
class Query(object):
"""Encapsulates all the options to Quickopen search system."""
@@ -93,91 +136,54 @@ def execute(self, shard_manager, query_cache):
else:
base_results = self.execute_nocache(shard_manager, query_cache)
- ranked_results = self.apply_global_rank_adjustment(base_results)
+ ranked_results = _apply_global_rank_adjustment(base_results)
ranked_and_truncated_results = ranked_results.get_copy_with_max_hits(self.max_hits)
query_cache.searches[qkey] = ranked_and_truncated_results
res = ranked_and_truncated_results
if self.exact_match:
- return self.filter_result_for_exact_matches(res)
-
- return res
-
- def filter_result_for_exact_matches(self, base_result):
- """
- Returns a new QueryResult object containing only filenames that exactly
- match the provided query.
- """
- res = QueryResult()
- res.truncated = base_result.truncated
+ return _filter_result_for_exact_matches(self.text, res)
- for hit,rank in base_result.hits():
- if _is_exact_match(self.text, hit):
- res.filenames.append(hit)
- res.ranks.append(rank)
return res
- def apply_global_rank_adjustment(self, base_result):
- def hit_cmp(x,y):
- # compare on the rank
- i = -cmp(x[1],y[1])
- if i != 0:
- return i
- # if the ranks agree, compare on the filename,
- # first by basename, then by fullname
- x_base = os.path.basename(x[0])
- y_base = os.path.basename(y[0])
- j = cmp(x_base, y_base)
- if j != 0:
- return j
- return cmp(x[0], y[0])
-
- hits = list(base_result.hits())
- hits.sort(hit_cmp)
- return QueryResult(hits, base_result.truncated)
-
def execute_nocache(self, shard_manager, query_cache):
- self.text = self.text
+ # What we'll actually return
+ truncated = False
+
slashIdx = self.text.rfind('/')
if slashIdx != -1:
- dirpart = self.text[:slashIdx]
+ dirpart_query = self.text[:slashIdx]
basename_query = self.text[slashIdx+1:]
else:
- dirpart = None
+ dirpart_query = ''
basename_query = self.text
+ lower_dirpart_query = dirpart_query.lower()
- truncated = False
-
+ # Get the files
+ files = []
if len(basename_query):
basename_hits, truncated = shard_manager.search_basenames(basename_query, self.max_hits)
-
- # rank the results
- trace_begin("rank_results")
- hits = []
- basename_ranker = BasenameRanker()
for hit in basename_hits:
- files = shard_manager.files_by_lower_basename[hit]
- for f in files:
- basename = os.path.basename(f)
- rank = basename_ranker.rank_query(basename_query, basename)
- hits.append((f,rank))
- trace_end("rank_results")
-
+ hit_files = shard_manager.files_by_lower_basename[hit]
+ for f in hit_files:
+ if _is_dirmatch(lower_dirpart_query, f):
+ files.append(f)
else:
- if len(dirpart):
- hits = []
- hits.extend([(f, 1) for f in shard_manager.files])
- else:
- hits = []
-
- if dirpart:
- reshits = []
- lower_dirpart = dirpart.lower()
- for hit in hits:
- dirname = os.path.dirname(hit[0])
- lower_dirname = dirname.lower()
- if lower_dirname.endswith(lower_dirpart):
- reshits.append(hit)
- hits = reshits
+ for f in shard_manager.files:
+ if _is_dirmatch(lower_dirpart_query, f):
+ files.append(f)
+ if len(files) > self.max_hits:
+ break
+
+ # Rank the results
+ trace_begin("rank_results")
+ hits = []
+ basename_ranker = BasenameRanker()
+ for f in files:
+ basename = os.path.basename(f)
+ rank = basename_ranker.rank_query(basename_query, basename)
+ hits.append((f,rank))
+ trace_end("rank_results")
+
return QueryResult(hits=hits, truncated=truncated)
Oops, something went wrong.

0 comments on commit d920de3

Please sign in to comment.