Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Loading…

Fixes bug 838897 - Fixed the versions filter in search with elasticsearch. #1199

Merged
merged 1 commit into from

4 participants

@AdrianGaudebert

This PR adds integration tests for elasticsearch. They are disabled by default though, so we can pass jenkins' build. If you want to run them locally, you need an elasticsearch instance working on port 9200, and to run RUN_ES_INTEGRATION_TESTS=1 make test.

@ErikRose r?

socorro/unittest/external/elasticsearch/test_search.py
@@ -148,3 +152,239 @@ def test_get_counts(self):
self.assertTrue("is_linux" in res[0])
self.assertFalse("is_linux" in res[1])
+
+
+_run_integration_tests = os.environ.get('RUN_ES_INTEGRATION_TESTS', False)
+if _run_integration_tests in ('false', 'False', 'no', '0'):
+ _run_integration_tests = False
+
+if not _run_integration_tests:
+ import logging
+ logging.warning("Skipping elasticsearch integration tests")
+
+else:
+
+ @attr(integration='elasticsearch') # for nosetests
+ class FunctionnalElasticsearchSearch(unittest.TestCase):
@peterbe Owner
peterbe added a note

Functional

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
socorro/unittest/external/elasticsearch/test_search.py
((6 lines not shown))
+_run_integration_tests = os.environ.get('RUN_ES_INTEGRATION_TESTS', False)
+if _run_integration_tests in ('false', 'False', 'no', '0'):
+ _run_integration_tests = False
+
+if not _run_integration_tests:
+ import logging
+ logging.warning("Skipping elasticsearch integration tests")
+
+else:
+
+ @attr(integration='elasticsearch') # for nosetests
+ class FunctionnalElasticsearchSearch(unittest.TestCase):
+
+ def __init__(self, *args, **kwargs):
+ super(self.__class__, self).__init__(*args, **kwargs)
+
@peterbe Owner
peterbe added a note

Why is this done in an __init__? I've never seen that before. I mean, why is it not in an setUp().
If there is a good reason why, document it with a comment.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
socorro/unittest/external/elasticsearch/test_search.py
((57 lines not shown))
+
+ config_manager = ConfigurationManager(
+ [required_config],
+ app_name='testapp',
+ app_version='1.0',
+ app_description='app description',
+ values_source_list=[{
+ 'logger': mock_logging,
+ 'elasticsearch_urls': 'http://localhost:9200',
+ 'elasticsearch_index': 'socorro_integration_test',
+ }]
+ )
+
+ return config_manager
+
+ def setUp(self):
@peterbe Owner
peterbe added a note

Move this function up. All unit test classes should look like this:

class CLassname(TestCase):
    def setUp():
        ...
    def tearDown():
        ...
    def other_none_tests_or_private_methods():
       ...

    def test_first_test():

That way it's easy to navigate from the class definition to the setUp and don't miss that the class has one.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
@peterbe
Owner

Slightly hard to understand some of the logic about the renaming of things but the Python stuff looks right.

r+ with a nit on the use of __init__ in the test

@AdrianGaudebert

@peterbe I added a new commit with my attempt at making a decorator to skip the test. Can you please tell me what you think?

Note that I need to rebase before merging this.

socorro/unittest/external/elasticsearch/test_search.py
((14 lines not shown))
from socorro.external.elasticsearch.search import Search
+from socorro.lib import util, datetimeutil
+
+
+_run_integration_tests = os.environ.get('RUN_ES_INTEGRATION_TESTS', False)
+if _run_integration_tests in ('false', 'False', 'no', '0'):
+ _run_integration_tests = False
+
+
+def skip_if(skip):
+ """Decorator, skip a test if `skip` is true. """
+ def decorator(fn):
@peterbe Owner
peterbe added a note

Here, you might want to add @functools.wraps on the decorator function.

@AdrianGaudebert Owner

I tried to add it, but it adds a bunch of overhead for quite nothing here. Is that really necessary?

@peterbe Owner
peterbe added a note

I'm so used to it now that I had forgotten what good it does. So I looked it up. Check this out:

from functools import wraps
def my_decorator(f):
    @wraps(f)
    def wrapper(*args, **kwds):
       return f(*args, **kwds)
    return wrapper

@my_decorator
def example():
    """Docstring"""
    print 'Called example function'


example()
#Called example function

print example.__name__
#'example'
print example.__doc__
#'Docstring'


def without_functools_decorator(f):
    def wrapper(*args, **kwds):
       return f(*args, **kwds)
    return wrapper

@without_functools_decorator
def example():
    """Docstring"""
    print 'Called example function'

example()
#Called example function

print example.__name__
#'wrapper'
print example.__doc__
#None

Basically, what functools.wraps gives is that it makes the function name and docstring stay intact. I think that's quite useful. Especially around a test where code (e.g. nose) might need to report on a test class's name and stuff.

@erikrose Owner
erikrose added a note

Yep, wraps is a good idea. It's a one-liner and saves potential confusion later.

@AdrianGaudebert Owner

From my tests, wraps is not needed in this case. The reason why is because I don't return the result of a call to fn, but I return fn itself.

def skip_if(skip):
    def decorator(fn):
        '''decorator doc'''
        if skip:
            raise Exception('Nope, Chuck Testa')
        else:
            return fn
    return decorator

@skip_if(False)
def myfunc():
    '''myfunc doc'''
    print 'Is that a bear in my garden? '

print myfunc.__name__
# myfunc
print myfunc.__doc__
# myfunc doc

I think the main difference comes from the fact that here we decorate a class and not a function. We need to return the class object and not "call" it so nose can call all the tests itself. Does that make sense?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
socorro/unittest/external/elasticsearch/test_search.py
((14 lines not shown))
from socorro.external.elasticsearch.search import Search
+from socorro.lib import util, datetimeutil
+
+
+_run_integration_tests = os.environ.get('RUN_ES_INTEGRATION_TESTS', False)
+if _run_integration_tests in ('false', 'False', 'no', '0'):
+ _run_integration_tests = False
+
+
+def skip_if(skip):
+ """Decorator, skip a test if `skip` is true. """
+ def decorator(fn):
+ if skip:
+ import logging
+ logging.warning("Skipping elasticsearch integration tests")
@peterbe Owner
peterbe added a note

Hardcoding the function name in this string will make it hard to move this awesome decorator out to other places where it might be needed.
Can you do something like 'Skipping %s tests' % fn.__name__ or someting like that?

@erikrose Owner
erikrose added a note

Let's not do this; nose already has lots of ways of reporting which tests are skipped. They'll even work, if you use wraps. ;-)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
socorro/unittest/external/elasticsearch/test_search.py
((14 lines not shown))
from socorro.external.elasticsearch.search import Search
+from socorro.lib import util, datetimeutil
+
+
+_run_integration_tests = os.environ.get('RUN_ES_INTEGRATION_TESTS', False)
+if _run_integration_tests in ('false', 'False', 'no', '0'):
+ _run_integration_tests = False
+
+
+def skip_if(skip):
+ """Decorator, skip a test if `skip` is true. """
+ def decorator(fn):
+ if skip:
+ import logging
@peterbe Owner
peterbe added a note

If you're going to import SkipTest at the top of the file, you should also put logging there.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
@erikrose
Owner

Apologies for not getting to this yesterday! I will make it my first priority for today; I have a long ride to Corvallis. :-)

@AdrianGaudebert

@erikrose thanks!

@peterbe I added a commit to address your last comments.

@rhelmer rhelmer commented on the diff
socorro/external/elasticsearch/base.py
@@ -218,32 +218,38 @@ def build_query_from_params(params, config):
and_filter = []
key = ":".join((v["product"], v["version"]))
+ if key in versions_info:
+ channel = versions_info[key]["release_channel"].lower()
@rhelmer Owner
rhelmer added a note

you are checking that key is in versions_info, but "release_channel" could still throw a KeyError (in theory)

Also, what happens later if channel is None?

I think it'd be nicer to write it this way anyway, if you always expect the key to be there (looks like you do):

try:
    channel = versions_info[key]['release_channel'].lower()
except KeyError:
    # handle it somehow

And if you can't handle it don't bother with try and just let it bubble up and crash.

@erikrose Owner
erikrose added a note

IMO the right thing to do will depend on what the right behavior is on the off chance that the "release_channel" key isn't there. If that key should always be there, we should blow up.

It looks like Adrian handles the channel-is-None case in the elif on line 247.

@AdrianGaudebert Owner

The versions_info data comes from another service, and it is sure that if key exists, it will always contain a release_channel.

release_channel being None is an expected case where we simply don't filter by release channel.

@erikrose Owner
erikrose added a note

In that case, I think we're all good here.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
@erikrose erikrose commented on the diff
socorro/external/elasticsearch/base.py
@@ -218,32 +218,38 @@ def build_query_from_params(params, config):
@erikrose Owner
erikrose added a note

If you made this a class method rather than a static one, you wouldn't have to keep saying "ElasticSearchBase.whatever" all the time below. You could keep it to simply "cls.whatever", and it wouldn't need to be changed if you ever renamed the class. Also, shorter. :-)

Also also, it would work as expected if you ever decided to subclass it.

Obviously, that doesn't block this merge, but it would be great to do separately.

@erikrose Owner
erikrose added a note

Also, I'd love to see the whole # Generating the filters for versions section broken off as a separate method. This one is getting pretty long, and there's a lot of state flying around in there. A casual look suggests it'd be pretty easy.

@AdrianGaudebert Owner

This entire code is going to be refactored soon as part of the new search UI work. I know it's ugly (it was, after all, some of the very first Python I wrote), but I wouldn't worry about it now, so long as it works.

@erikrose Owner
erikrose added a note

Fair 'nuf.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
@erikrose erikrose commented on the diff
config/middleware.ini-dist
@@ -288,7 +288,7 @@
# name: channels
# doc: List of release channels, excluding the `release` one.
# converter: string_to_list
- #channels='Beta, Aurora, Nightly, beta, aurora, nightly'
+ #channels='beta, aurora, nightly'
@erikrose Owner
erikrose added a note

I was a little surprised that "release" wasn't in here. If we don't already have a strong convention established, maybe consider "non_release_channels" if that's the semantic.

@AdrianGaudebert Owner

Changing this would be outside of the scope of this bug, and could have unexpected side effects. I want to keep this as simple as possible, so I filed bug 868344.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
socorro/unittest/external/elasticsearch/test_search.py
((18 lines not shown))
+ # insert data into elasticsearch
+ default_crash_report = {
+ 'uuid': '0',
+ 'signature': 'js::break_your_browser',
+ 'date_processed': datetimeutil.date_to_string(yesterday),
+ 'product': 'WaterWolf',
+ 'version': '1.0',
+ 'release_channel': 'release',
+ 'os_name': 'Linux',
+ 'build': '1234567890',
+ 'reason': 'MOZALLOC_WENT_WRONG',
+ 'hangid': None,
+ 'process_type': 'browser',
+ }
+
+ crash_report = default_crash_report.copy()
@erikrose Owner
erikrose added a note

Shorter and with no local assignments:

self.storage.save_processed(dict(default_crash_report, uuid='1', product='EarthRaccoon'))

That'll save a few lines for each of these. You could also make it into a loop, of course, but I understand if you want to keep the test dead-simple.

@AdrianGaudebert Owner

Nice, when I wrote this code I tried to find something as short and simple as that, but couldn't. Thanks Erik!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
socorro/unittest/external/elasticsearch/test_search.py
((129 lines not shown))
+ config_manager = ConfigurationManager(
+ [required_config],
+ app_name='testapp',
+ app_version='1.0',
+ app_description='app description',
+ values_source_list=[{
+ 'logger': mock_logging,
+ 'elasticsearch_urls': 'http://localhost:9200',
+ 'elasticsearch_index': 'socorro_integration_test',
+ }]
+ )
+
+ return config_manager
+
+ @mock.patch('socorro.external.elasticsearch.search.Util')
+ def test_search(self, mock_psql_util):
@erikrose Owner
erikrose added a note

It looks like this tests several independent searches. Let's make them separate tests so we know they're independent and so we can still get partial info when one of them fails.

@AdrianGaudebert Owner

Doing this will make the test slower, as it will run the indexing and deletion of data for every single test. I don't think it will bring much information too, when a test fails it shows what line failed. And we have a lot of those tests running several calls to the tested function in the code base already.

If you say it's not a problem to make the tests longer, and it's better to have smaller, more specific tests, I won't argue against. I just want to discuss it first. :)

@peterbe Owner
peterbe added a note

I half and half agree. Erik is right but there's a trade-off. This is after all an integration test and if you have many independent paths that the code should test, then it shouldn't be an integration test at all so then the solution would be to get out of here and do them as unit tests.

My practice is to write unit tests (a la Erik's suggestion) for "every possible" thing that can happen and make each test do just one thing and the test name should reflect that accordingly. Unit tests should also cover when things go badly (e.g. you pass in date = not-a-date). But integration tests can be sloppy and just do the very basic stuff. Basically, the ideal stuff with no tricky parameters.

Perhaps take Erik's advice and go a slightly different direction and consider deleting a bunch of tests rather than keeping them all but split up.

End of the day, your call Adrian.

@peterbe Owner
peterbe added a note

As per our discussion, perhaps rename this to test_search_single_filter

@erikrose Owner
erikrose added a note

If you structure the test class properly, you can have the best of both worlds: index the data on setup_class, and delete it on teardown_class.

@AdrianGaudebert Owner

No can do, we're using Python 2.6 and those _class methods are only available in Python 2.7+, sadly. Peter and I have decided that we would include unittest2 into Socorro if we haven't moved to 2.7 by the end of this year though.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
socorro/unittest/external/elasticsearch/test_search.py
((74 lines not shown))
+
+ crash_report = default_crash_report.copy()
+ crash_report.update({
+ 'uuid': '7',
+ 'hangid': '12',
+ })
+ self.storage.save_processed(crash_report)
+
+ crash_report = default_crash_report.copy()
+ crash_report.update({
+ 'uuid': '8',
+ 'process_type': 'plugin',
+ })
+ self.storage.save_processed(crash_report)
+
+ self.storage.es.refresh()
@erikrose Owner
erikrose added a note

Good! I was looking for that! :-D

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
@erikrose erikrose commented on the diff
socorro/unittest/external/elasticsearch/test_search.py
((15 lines not shown))
from socorro.external.elasticsearch.search import Search
+from socorro.lib import util, datetimeutil
+
+
+_run_integration_tests = os.environ.get('RUN_ES_INTEGRATION_TESTS', False)
@erikrose Owner
erikrose added a note

I see you used nose's attr plugin down below for something else. Why not use it for this as well?

@AdrianGaudebert Owner

If I'm correct, you need to specify in the nosetests command that you want to not run those tests. Here, we want them to not be ran by default. That is also to be a temporary thing, until I configured jenkins to be able to use elasticsearch during the build process (but I need this code in master before doing that).

@erikrose Owner
erikrose added a note

Right you are. I was hoping socorro was a Python package and we could do the setup.cfg trick (http://stackoverflow.com/questions/14674713/nose-how-to-skip-tests-by-default), but it's not.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
@erikrose erikrose commented on the diff
socorro/external/elasticsearch/base.py
((43 lines not shown))
# this version is a release
and_filter.append({
"not":
ElasticSearchBase.build_terms_query(
- "ReleaseChannel",
+ "release_channel",
config.channels)
@erikrose Owner
erikrose added a note

Yep, this doesn't read accurately. I'd definitely like to see config.channels renamed to config.non_release_channels.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
@erikrose erikrose commented on the diff
socorro/external/elasticsearch/search.py
@@ -136,15 +136,15 @@ def search_for_signatures(self, params, es_result, query):
params["result_offset"], maxsize,
self.config.platforms)
- results = {
- "total": signature_count,
- "hits": []
- }
+ hits = [signatures[x] for x in range(params["result_offset"], maxsize)]
@erikrose Owner
erikrose added a note

xrange

@AdrianGaudebert Owner

Hah, now I want to summon @peterbe who, if I remember correctly, says there's no real need to use xrange over range. Or perhaps here it does make sense?

@peterbe Owner
peterbe added a note

Haha! xrange does one item at a time which is more efficient, but when it's a list of less than 100 elements it makes absolutely no difference.

@erikrose Owner
erikrose added a note

Yes, it's not going to make any noticeable difference here. It's just a good habit to get into. Basically, there's no reason not to do it. Not a blocker.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
@erikrose erikrose commented on the diff
socorro/external/elasticsearch/search.py
((14 lines not shown))
- return results
+ return {
@erikrose Owner
erikrose added a note

I like your refactoring of this hits-gathering section very much: it's so clear now what it returns.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
@erikrose
Owner

That's all I have to say for now. Go ahead and take a look at that stuff, and once you've revised I'll actually get it running locally and verify it by hand. Cheers! Sorry it took me so long to get to it!

@AdrianGaudebert

@erikrose @peterbe Are we good here?

@peterbe peterbe commented on the diff
socorro/unittest/external/elasticsearch/test_search.py
@@ -148,3 +171,208 @@ def test_get_counts(self):
self.assertTrue("is_linux" in res[0])
self.assertFalse("is_linux" in res[1])
+
+
+@attr(integration='elasticsearch') # for nosetests
+@skip_if(not _run_integration_tests)
+class FunctionalElasticsearchSearch(unittest.TestCase):
@peterbe Owner
peterbe added a note

It's not crucial, but can you call it "Integration" around socorro. Not "functional". The two words are very similar.

@peterbe Owner
peterbe added a note

ignore this

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
@erikrose
Owner

Sorry, I apparently don't get emailed for @ mentions. Hrrm.

socorro/unittest/external/elasticsearch/test_search.py
((5 lines not shown))
+
+@attr(integration='elasticsearch') # for nosetests
+@skip_if(not _run_integration_tests)
+class FunctionalElasticsearchSearch(unittest.TestCase):
+ """Test search with an elasticsearch database containing fake data. """
+
+ def setUp(self):
+ with self.get_config_manager().context() as config:
+ self.storage = crashstorage.ElasticSearchCrashStorage(config)
+
+ now = datetimeutil.utc_now()
+ yesterday = now - datetime.timedelta(days=1)
+
+ # insert data into elasticsearch
+ default_crash_report = {
+ 'uuid': '0',
@peterbe Owner
peterbe added a note

remove this line

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
@erikrose
Owner

r+. Let's get this merged, as it's been a long time, but let's not forget to clean up the jagged edges. :-)

@AdrianGaudebert

@peterbe I updated the tests following our discussion from yesterday: there are now two tests, one for "single" filters and one for "combined" filters. I also added a few missing filters (signature, date) and some comments. One thing is missing though, the test for the complex version / release channel filtering. That one is harder to write, and I lost some time on fixing a stupid issue. I'd like to move on with this, so if you don't mind, I'll add that test with my next elasticsearch-related pull request (I made a note on my todo list to not forget it! )

I'll rebase and merge tomorrow if you guys are happy with my last two commits. Thanks for your reviews @erikrose and @peterbe!

@peterbe peterbe commented on the diff
socorro/unittest/external/elasticsearch/test_search.py
((160 lines not shown))
+ res['hits'][1]['signature'],
+ 'my_bad'
+ )
+ self.assertEqual(res['hits'][0]['is_linux'], 8)
+ self.assertEqual(res['hits'][0]['is_windows'], 1)
+ self.assertEqual(res['hits'][0]['is_mac'], 0)
+
+ # test product
+ params = {
+ 'products': 'EarthRaccoon'
+ }
+ res = api.get(**params)
+
+ self.assertEqual(res['total'], 1)
+ self.assertEqual(res['hits'][0]['count'], 1)
+
@peterbe Owner
peterbe added a note

Do you want to do params = {'products': 'BLA BLA BLA'} and expect nothing?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
@peterbe
Owner

I'm happy to see this landed. I think you addressed the points we talked about on the phone.
...perhaps without the search where you expect nothing.

In fact, since searching with params={} does, rather strangely, return something then perhaps it's important to try to search with some values you don't expect.

@peterbe
Owner

Another thought; what should happen if you do params = {'never': 'heardof'}? In this example, never is not a known "index". Should it perhaps raise an error. It could potentially prevent some really silly accidental bugs like params = {'prducts': "Firefox"}

@peterbe peterbe merged commit 7ba71fa into from
@AdrianGaudebert AdrianGaudebert deleted the branch
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on May 9, 2013
  1. @AdrianGaudebert
This page is out of date. Refresh to see the latest.
View
4 config/middleware.ini-dist
@@ -288,7 +288,7 @@
# name: channels
# doc: List of release channels, excluding the `release` one.
# converter: string_to_list
- #channels='Beta, Aurora, Nightly, beta, aurora, nightly'
+ #channels='beta, aurora, nightly'
@erikrose Owner
erikrose added a note

I was a little surprised that "release" wasn't in here. If we don't already have a strong convention established, maybe consider "non_release_channels" if that's the semantic.

@AdrianGaudebert Owner

Changing this would be outside of the scope of this bug, and could have unexpected side effects. I want to keep this as simple as possible, so I filed bug 868344.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
# name: elasticSearchHostname
# doc: String containing the URI of the Elastic Search instance.
@@ -308,7 +308,7 @@
# name: restricted_channels
# doc: List of release channels to restrict based on build ids.
# converter: string_to_list
- #restricted_channels='Beta, beta'
+ #restricted_channels='beta'
# name: searchMaxNumberOfDistinctSignatures
# doc: Integer containing the maximum allowed number of distinct signatures the system should retrieve. Used mainly for performances in ElasticSearch
View
4 scripts/config/webapiconfig.py.dist
@@ -245,8 +245,8 @@ platforms.default = (
# Release Channels
channels = cm.Option()
channels.doc = 'List of release channels, excluding the `release` one.'
-channels.default = ['Beta', 'Aurora', 'Nightly', 'beta', 'aurora', 'nightly']
+channels.default = ['beta', 'aurora', 'nightly']
restricted_channels = cm.Option()
restricted_channels.doc = 'List of release channels to restrict based on build ids.'
-restricted_channels.default = ['Beta', 'beta']
+restricted_channels.default = ['beta']
View
36 socorro/external/elasticsearch/base.py
@@ -218,32 +218,38 @@ def build_query_from_params(params, config):
and_filter = []
key = ":".join((v["product"], v["version"]))
+ if key in versions_info:
+ channel = versions_info[key]["release_channel"].lower()
@rhelmer Owner
rhelmer added a note

you are checking that key is in versions_info, but "release_channel" could still throw a KeyError (in theory)

Also, what happens later if channel is None?

I think it'd be nicer to write it this way anyway, if you always expect the key to be there (looks like you do):

try:
    channel = versions_info[key]['release_channel'].lower()
except KeyError:
    # handle it somehow

And if you can't handle it don't bother with try and just let it bubble up and crash.

@erikrose Owner
erikrose added a note

IMO the right thing to do will depend on what the right behavior is on the off chance that the "release_channel" key isn't there. If that key should always be there, we should blow up.

It looks like Adrian handles the channel-is-None case in the elif on line 247.

@AdrianGaudebert Owner

The versions_info data comes from another service, and it is sure that if key exists, it will always contain a release_channel.

release_channel being None is an expected case where we simply don't filter by release channel.

@erikrose Owner
erikrose added a note

In that case, I think we're all good here.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
+ else:
+ channel = None
- if (key in versions_info and
- versions_info[key]["release_channel"] in
- config.restricted_channels):
- # this version is a beta
+ if channel in config.channels:
+ # this version is not a release
# first use the major version instead
v["version"] = versions_info[key]["major_version"]
- # then make sure it's a beta
+ # then make sure it's in the right release channel
and_filter.append(
ElasticSearchBase.build_terms_query(
- "ReleaseChannel",
- config.restricted_channels
+ "release_channel",
+ channel
)
)
- # last use the right build id
- and_filter.append(
+
+ if channel in config.restricted_channels:
+ # if it's a beta, verify the build id
+ and_filter.append(
ElasticSearchBase.build_terms_query(
- "build", versions_info[key]["build_id"]))
+ "build",
+ versions_info[key]["build_id"]
+ )
+ )
- elif (key in versions_info and
- versions_info[key]["release_channel"]):
+ elif channel:
# this version is a release
and_filter.append({
"not":
ElasticSearchBase.build_terms_query(
- "ReleaseChannel",
+ "release_channel",
config.channels)
@erikrose Owner
erikrose added a note

Yep, this doesn't read accurately. I'd definitely like to see config.channels renamed to config.non_release_channels.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
})
@@ -252,7 +258,9 @@ def build_query_from_params(params, config):
and_filter.append(ElasticSearchBase.build_terms_query(
"version", v["version"].lower()))
or_filter.append({"and": and_filter})
- filters["and"].append({"or": or_filter})
+
+ if or_filter:
+ filters["and"].append({"or": or_filter})
if len(queries) > 1:
query = {
View
14 socorro/external/elasticsearch/search.py
@@ -136,15 +136,15 @@ def search_for_signatures(self, params, es_result, query):
params["result_offset"], maxsize,
self.config.platforms)
- results = {
- "total": signature_count,
- "hits": []
- }
+ hits = [signatures[x] for x in range(params["result_offset"], maxsize)]
@erikrose Owner
erikrose added a note

xrange

@AdrianGaudebert Owner

Hah, now I want to summon @peterbe who, if I remember correctly, says there's no real need to use xrange over range. Or perhaps here it does make sense?

@peterbe Owner
peterbe added a note

Haha! xrange does one item at a time which is more efficient, but when it's a list of less than 100 elements it makes absolutely no difference.

@erikrose Owner
erikrose added a note

Yes, it's not going to make any noticeable difference here. It's just a good habit to get into. Basically, there's no reason not to do it. Not a blocker.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
- for i in range(params["result_offset"], maxsize):
- results["hits"].append(signatures[i])
+ # sort results by count *and* signatures
+ hits.sort(key=lambda x: (-x['count'], x['signature'].lower()))
- return results
+ return {
@erikrose Owner
erikrose added a note

I like your refactoring of this hits-gathering section very much: it's so clear now what it returns.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
+ "total": signature_count,
+ "hits": hits
+ }
@staticmethod
def get_signatures_facet(size):
View
4 socorro/middleware/middleware_app.py
@@ -210,13 +210,13 @@ class MiddlewareApp(App):
)
required_config.webapi.add_option(
'channels',
- default=['Beta', 'Aurora', 'Nightly', 'beta', 'aurora', 'nightly'],
+ default=['beta', 'aurora', 'nightly'],
doc='List of release channels, excluding the `release` one.',
from_string_converter=string_to_list
)
required_config.webapi.add_option(
'restricted_channels',
- default=['Beta', 'beta'],
+ default=['beta'],
doc='List of release channels to restrict based on build ids.',
from_string_converter=string_to_list
)
View
342 socorro/unittest/external/elasticsearch/test_search.py
@@ -2,12 +2,35 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+import datetime
+import mock
+import os
import unittest
+from nose.plugins.attrib import attr
+from nose.plugins.skip import SkipTest
+from configman import ConfigurationManager, Namespace
+
+from socorro.external.elasticsearch import crashstorage
from socorro.external.elasticsearch.search import Search
+from socorro.lib import util, datetimeutil
+
+
+_run_integration_tests = os.environ.get('RUN_ES_INTEGRATION_TESTS', False)
@erikrose Owner
erikrose added a note

I see you used nose's attr plugin down below for something else. Why not use it for this as well?

@AdrianGaudebert Owner

If I'm correct, you need to specify in the nosetests command that you want to not run those tests. Here, we want them to not be ran by default. That is also to be a temporary thing, until I configured jenkins to be able to use elasticsearch during the build process (but I need this code in master before doing that).

@erikrose Owner
erikrose added a note

Right you are. I was hoping socorro was a Python package and we could do the setup.cfg trick (http://stackoverflow.com/questions/14674713/nose-how-to-skip-tests-by-default), but it's not.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
+if _run_integration_tests in ('false', 'False', 'no', '0'):
+ _run_integration_tests = False
+
-import socorro.lib.util as util
-import socorro.unittest.testlib.util as testutil
+def skip_if(skip):
+ """Decorator, skip a test if `skip` is true. """
+ def decorator(fn):
+ if skip:
+ def skip_test():
+ raise SkipTest
+ return skip_test
+ else:
+ return fn
+ return decorator
#==============================================================================
@@ -148,3 +171,318 @@ def test_get_counts(self):
self.assertTrue("is_linux" in res[0])
self.assertFalse("is_linux" in res[1])
+
+
+@attr(integration='elasticsearch') # for nosetests
+@skip_if(not _run_integration_tests)
+class FunctionalElasticsearchSearch(unittest.TestCase):
@peterbe Owner
peterbe added a note

It's not crucial, but can you call it "Integration" around socorro. Not "functional". The two words are very similar.

@peterbe Owner
peterbe added a note

ignore this

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
+ """Test search with an elasticsearch database containing fake data. """
+
+ def setUp(self):
+ with self.get_config_manager().context() as config:
+ self.storage = crashstorage.ElasticSearchCrashStorage(config)
+
+ # clear the indices cache so the index is created on every test
+ self.storage.indices_cache = set()
+
+ now = datetimeutil.utc_now()
+
+ yesterday = now - datetime.timedelta(days=1)
+ yesterday = datetimeutil.date_to_string(yesterday)
+
+ last_month = now - datetime.timedelta(weeks=4)
+ last_month = datetimeutil.date_to_string(last_month)
+
+ # insert data into elasticsearch
+ default_crash_report = {
+ 'uuid': 100,
+ 'signature': 'js::break_your_browser',
+ 'date_processed': yesterday,
+ 'product': 'WaterWolf',
+ 'version': '1.0',
+ 'release_channel': 'release',
+ 'os_name': 'Linux',
+ 'build': '1234567890',
+ 'reason': 'MOZALLOC_WENT_WRONG',
+ 'hangid': None,
+ 'process_type': None,
+ }
+
+ self.storage.save_processed(default_crash_report)
+
+ self.storage.save_processed(
+ dict(default_crash_report, uuid=1, product='EarthRaccoon')
+ )
+
+ self.storage.save_processed(
+ dict(default_crash_report, uuid=2, version='2.0')
+ )
+
+ self.storage.save_processed(
+ dict(default_crash_report, uuid=3, release_channel='aurora')
+ )
+
+ self.storage.save_processed(
+ dict(default_crash_report, uuid=4, os_name='Windows NT')
+ )
+
+ self.storage.save_processed(
+ dict(default_crash_report, uuid=5, build='0987654321')
+ )
+
+ self.storage.save_processed(
+ dict(default_crash_report, uuid=6, reason='VERY_BAD_EXCEPTION')
+ )
+
+ self.storage.save_processed(
+ dict(default_crash_report, uuid=7, hangid='12')
+ )
+
+ self.storage.save_processed(
+ dict(default_crash_report, uuid=8, process_type='plugin')
+ )
+
+ self.storage.save_processed(
+ dict(default_crash_report, uuid=9, signature='my_bad')
+ )
+
+ self.storage.save_processed(
+ dict(
+ default_crash_report,
+ uuid=10,
+ date_processed=last_month,
+ signature='my_little_signature',
+ )
+ )
+
+ # As indexing is asynchronous, we need to force elasticsearch to
+ # make the newly created content searchable before we run the tests
+ self.storage.es.refresh()
+
+ def tearDown(self):
+ # clear the test index
+ with self.get_config_manager().context() as config:
+ self.storage.es.delete_index(config.webapi.elasticsearch_index)
+
+ def get_config_manager(self):
+ mock_logging = mock.Mock()
+
+ required_config = \
+ crashstorage.ElasticSearchCrashStorage.required_config
+ required_config.add_option('logger', default=mock_logging)
+
+ webapi = Namespace()
+ webapi.elasticSearchHostname = 'localhost'
+ webapi.elasticSearchPort = 9200
+ webapi.elasticsearch_index = 'socorro_integration_test'
+ webapi.elasticsearch_doctype = 'crash_reports'
+ webapi.searchMaxNumberOfDistinctSignatures = 1000
+ webapi.timeout = 2
+ webapi.platforms = (
+ {
+ "id": "windows",
+ "name": "Windows NT"
+ },
+ {
+ "id": "linux",
+ "name": "Linux"
+ },
+ {
+ "id": "mac",
+ "name": "Mac OS X"
+ }
+ )
+ webapi.channels = ['beta', 'aurora', 'nightly']
+ webapi.restricted_channels = ['beta']
+
+ required_config.webapi = webapi
+
+ config_manager = ConfigurationManager(
+ [required_config],
+ app_name='testapp',
+ app_version='1.0',
+ app_description='app description',
+ values_source_list=[{
+ 'logger': mock_logging,
+ 'elasticsearch_urls': 'http://localhost:9200',
+ 'elasticsearch_index': 'socorro_integration_test',
+ }]
+ )
+
+ return config_manager
+
+ @mock.patch('socorro.external.elasticsearch.search.Util')
+ def test_search_single_filters(self, mock_psql_util):
+ # verify results show expected numbers
+ with self.get_config_manager().context() as config:
+ api = Search(config=config)
+
+ # test no filter, get all results
+ params = {}
+ res = api.get()
+
+ self.assertEqual(res['total'], 2)
+ self.assertEqual(
+ res['hits'][0]['signature'],
+ 'js::break_your_browser'
+ )
+ self.assertEqual(
+ res['hits'][1]['signature'],
+ 'my_bad'
+ )
+ self.assertEqual(res['hits'][0]['is_linux'], 8)
+ self.assertEqual(res['hits'][0]['is_windows'], 1)
+ self.assertEqual(res['hits'][0]['is_mac'], 0)
+
+ # test product
+ params = {
+ 'products': 'EarthRaccoon'
+ }
+ res = api.get(**params)
+
+ self.assertEqual(res['total'], 1)
+ self.assertEqual(res['hits'][0]['count'], 1)
+
@peterbe Owner
peterbe added a note

Do you want to do params = {'products': 'BLA BLA BLA'} and expect nothing?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
+ # test version
+ params = {
+ 'versions': 'WaterWolf:2.0'
+ }
+ res = api.get(**params)
+
+ self.assertEqual(res['total'], 1)
+ self.assertEqual(res['hits'][0]['count'], 1)
+
+ # test release_channel
+ params = {
+ 'release_channels': 'aurora'
+ }
+ res = api.get(**params)
+
+ self.assertEqual(res['total'], 1)
+ self.assertEqual(res['hits'][0]['count'], 1)
+
+ # test os_name
+ params = {
+ 'os': 'Windows'
+ }
+ res = api.get(**params)
+
+ self.assertEqual(res['total'], 1)
+ self.assertEqual(res['hits'][0]['count'], 1)
+
+ # test build
+ params = {
+ 'build_ids': '0987654321'
+ }
+ res = api.get(**params)
+
+ self.assertEqual(res['total'], 1)
+ self.assertEqual(res['hits'][0]['count'], 1)
+
+ # test reason
+ params = {
+ 'reasons': 'VERY_BAD_EXCEPTION'
+ }
+ res = api.get(**params)
+
+ self.assertEqual(res['total'], 1)
+ self.assertEqual(res['hits'][0]['count'], 1)
+
+ # test hangid
+ params = {
+ 'report_type': 'hang'
+ }
+ res = api.get(**params)
+
+ self.assertEqual(res['total'], 1)
+ self.assertEqual(res['hits'][0]['count'], 1)
+
+ # test process_type
+ params = {
+ 'report_process': 'plugin'
+ }
+ res = api.get(**params)
+
+ self.assertEqual(res['total'], 1)
+ self.assertEqual(res['hits'][0]['count'], 1)
+
+ # test signature
+ params = {
+ 'terms': 'my_bad'
+ }
+ res = api.get(**params)
+
+ self.assertEqual(res['total'], 1)
+ self.assertEqual(res['hits'][0]['count'], 1)
+
+ @mock.patch('socorro.external.elasticsearch.search.Util')
+ def test_search_combined_filters(self, mock_psql_util):
+ with self.get_config_manager().context() as config:
+ api = Search(config=config)
+
+ # get the first, default crash report
+ params = {
+ 'terms': 'js::break_your_browser',
+ 'search_mode': 'is_exactly',
+ 'products': 'WaterWolf',
+ 'versions': 'WaterWolf:1.0',
+ 'release_channels': 'release',
+ 'os': 'Linux',
+ 'build_ids': '1234567890',
+ 'reasons': 'MOZALLOC_WENT_WRONG',
+ 'report_type': 'crash',
+ 'report_process': 'browser',
+ }
+ res = api.get(**params)
+
+ self.assertEqual(res['total'], 1)
+ self.assertEqual(
+ res['hits'][0]['signature'],
+ 'js::break_your_browser'
+ )
+ self.assertEqual(res['hits'][0]['is_linux'], 1)
+ self.assertEqual(res['hits'][0]['is_windows'], 0)
+ self.assertEqual(res['hits'][0]['is_mac'], 0)
+
+ # get the crash report from last month
+ now = datetimeutil.utc_now()
+
+ three_weeks_ago = now - datetime.timedelta(weeks=3)
+ three_weeks_ago = datetimeutil.date_to_string(three_weeks_ago)
+
+ five_weeks_ago = now - datetime.timedelta(weeks=5)
+ five_weeks_ago = datetimeutil.date_to_string(five_weeks_ago)
+
+ params = {
+ 'from_date': five_weeks_ago,
+ 'to_date': three_weeks_ago,
+ }
+ res = api.get(**params)
+
+ self.assertEqual(res['total'], 1)
+ self.assertEqual(
+ res['hits'][0]['signature'],
+ 'my_little_signature'
+ )
+ self.assertEqual(res['hits'][0]['is_linux'], 1)
+ self.assertEqual(res['hits'][0]['is_windows'], 0)
+ self.assertEqual(res['hits'][0]['is_mac'], 0)
+
+ @mock.patch('socorro.external.elasticsearch.search.Util')
+ def test_search_no_results(self, mock_psql_util):
+ with self.get_config_manager().context() as config:
+ api = Search(config=config)
+
+ # unexisting signature
+ params = {
+ 'terms': 'callMeMaybe()',
+ }
+ res = api.get(**params)
+ self.assertEqual(res['total'], 0)
+
+ # unexisting product
+ params = {
+ 'products': 'WindBear',
+ }
+ res = api.get(**params)
+ self.assertEqual(res['total'], 0)
Something went wrong with that request. Please try again.