Skip to content
This repository has been archived by the owner on Dec 7, 2022. It is now read-only.

Commit

Permalink
Added search support for a Solr backend
Browse files Browse the repository at this point in the history
re #142
  • Loading branch information
mhrivnak committed Feb 3, 2015
1 parent e96c97a commit 4c19668
Show file tree
Hide file tree
Showing 10 changed files with 273 additions and 4 deletions.
41 changes: 41 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,13 @@ Example:
Search
------

Only one of the following search backends should be configured. If multiple
backends are configured, crane will attempt to use the first one whose configuration
it finds, and the discovery order is not guaranteed to be consistent.

GSA
~~~

The API supporting ``docker search`` can be enabled by configuring a Google
Search Appliance for use by crane. In crane's configuration file, a section
``[gsa]`` must exist with key ``url``. The URL will be used in a GET request,
Expand All @@ -87,6 +94,40 @@ The XML returned by the GSA must contain values for ``portal_name`` and
``portal_short_description``, which will be turned into the name and
description returned by crane's search API.

Solr
~~~~

The API supporting ``docker search`` can be enabled by configuring a Solr
deployment for use by crane. In crane's configuration file, a section
``[solr]`` must exist with key ``url``. The URL will be used in a GET request,
and it must contain the string ``{0}`` as a placeholder where the search string
will be inserted.

Example:

::

[solr]
url: https://path/to/my/search?x={0}

.. warning:: crane does not currently verify the SSL certificate of the Solr service

The JSON returned by the request must contain the following minimum data
structure. Any additional keys and values will be ignored.

::

{
"response": {
"docs": [
{
"allTitle": "pulp/worker",
"ir_description": "A short description to display in the terminal"
}
]
}
}


Deployment
----------
Expand Down
9 changes: 9 additions & 0 deletions crane/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

# google search appliance settings
SECTION_GSA = 'gsa'
SECTION_SOLR = 'solr'
KEY_URL = 'url'


Expand Down Expand Up @@ -100,6 +101,14 @@ def read_config(app, parser):
with supress(NoOptionError):
section[key] = parser.get(SECTION_GSA, key)

# "solr" section settings
with supress(NoSectionError):
section = app.config.setdefault(SECTION_SOLR, {})

for key in (KEY_URL,):
with supress(NoOptionError):
section[key] = parser.get(SECTION_SOLR, key)


@contextmanager
def supress(*exceptions):
Expand Down
3 changes: 3 additions & 0 deletions crane/data/default_config.conf
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,6 @@ endpoint:

[gsa]
url:

[solr]
url:
19 changes: 16 additions & 3 deletions crane/search/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
import logging

from .. import config
from .base import SearchBackend
from .gsa import GSA
from .solr import Solr


_logger = logging.getLogger(__name__)


# default to a backend that will always return 404
Expand All @@ -17,10 +23,17 @@ def load_config(app):
"""
global backend

url = app.config.get(config.SECTION_GSA, {}).get(config.KEY_URL)
if url:
backend = GSA(url)
gsa_url = app.config.get(config.SECTION_GSA, {}).get(config.KEY_URL)
if gsa_url:
backend = GSA(gsa_url)
_logger.info('using GSA search backend')
return
solr_url = app.config.get(config.SECTION_SOLR, {}).get(config.KEY_URL)
if solr_url:
backend = Solr(solr_url)
_logger.info('using solr search backend')
return

# reset to default if the config previously had one configured, but changed.
_logger.info('no search backend configured')
backend = SearchBackend()
66 changes: 66 additions & 0 deletions crane/search/solr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import httplib
import itertools
import json
import logging
import urllib

from .. import exceptions
from .base import HTTPBackend, SearchResult


_logger = logging.getLogger(__name__)


class Solr(HTTPBackend):
def __init__(self, url_template):
"""
:param url_template: PEP3101 string that is a URL that will accept a
single argument to its .format() method, which
is the url-encoded search string.
:type url_template: str
"""
self.url_template = url_template

def search(self, query):
"""
Searches a Solr search backend based on a given query parameter.
:param query: a string representing the search input from a user that
should be passed through to the solr backend
:type query: basestring
:return: a collection of search results as a generator of
SearchResult instances. These results have been filtered
to exclude any repositories that are not being served by
this deployment of this app.
:rtype: generator
"""
quoted_query = urllib.quote(query)
url = self.url_template.format(quoted_query)
_logger.debug('searching with URL: %s' % url)

body = self._get_data(url)

results = self._parse(body)
filtered_results = itertools.ifilter(self._filter_result, results)
return itertools.imap(self._format_result, filtered_results)

def _parse(self, body):
"""
Processes the raw response body into search results
:param body: body from the web response object
:type body: str
:return: generator of SearchResult instances
:rtype: generator
"""
try:
data = json.loads(body)
for item in data['response']['docs']:
yield SearchResult(item['allTitle'], item['ir_description'])
except Exception, e:
_logger.error('could not parse response body: %s' % e)
_logger.exception('could not parse response')
raise exceptions.HTTPError(httplib.BAD_GATEWAY,
'error communicating with backend search service')
2 changes: 2 additions & 0 deletions tests/data/gsa/crane.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[gsa]
url: http://foo/bar
2 changes: 2 additions & 0 deletions tests/data/solr/crane.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[solr]
url: http://foo/bar
14 changes: 13 additions & 1 deletion tests/search/test_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import mock

from crane import config, search
from crane.search import SearchBackend, GSA
from crane.search import SearchBackend, GSA, Solr


class TestLoadConfig(unittest2.TestCase):
Expand All @@ -31,3 +31,15 @@ def test_gsa(self):

self.assertIsInstance(search.backend, GSA)
self.assertEqual(search.backend.url, fake_url)

def test_solr(self):
mock_app = mock.MagicMock()
fake_url = 'http://pulpproject.org/search'
mock_app.config = {
config.SECTION_SOLR: {config.KEY_URL: fake_url},
}

search.load_config(mock_app)

self.assertIsInstance(search.backend, Solr)
self.assertEqual(search.backend.url_template, fake_url)
98 changes: 98 additions & 0 deletions tests/search/test_solr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import httplib
import json

import mock
import unittest2

from crane import exceptions
from crane.search import Solr
from crane.search.base import SearchResult


class BaseSolrTest(unittest2.TestCase):
def setUp(self):
super(BaseSolrTest, self).setUp()
self.url = 'http://pulpproject.org/search?q={0}'
self.solr = Solr(self.url)


class TestInit(BaseSolrTest):
def test_stores_data(self):
self.assertEqual(self.solr.url_template, self.url)


class TestSearch(BaseSolrTest):
@mock.patch('crane.search.solr.Solr._get_data')
def test_quotes_query(self, mock_parse):
self.solr.search('hi mom')

mock_parse.assert_called_once_with(self.url.format('hi%20mom'))

@mock.patch('crane.search.solr.Solr._filter_result', spec_set=True, return_value=True)
@mock.patch('crane.search.solr.Solr._get_data', spec_set=True)
@mock.patch('crane.search.solr.Solr._parse')
def test_workflow_filter_true(self, mock_parse, mock_get_data, mock_filter):
mock_parse.return_value = [SearchResult('rhel', 'Red Hat Enterprise Linux')]

ret = self.solr.search('foo')

mock_get_data.assert_called_once_with('http://pulpproject.org/search?q=foo')
self.assertDictEqual(list(ret)[0], {
'name': 'rhel',
'description': 'Red Hat Enterprise Linux',
'star_count': 5,
'is_trusted': True,
'is_official': True,
})

@mock.patch('crane.search.solr.Solr._filter_result', spec_set=True, return_value=False)
@mock.patch('crane.search.solr.Solr._get_data', spec_set=True)
@mock.patch('crane.search.solr.Solr._parse')
def test_workflow_filter_true(self, mock_parse, mock_get_data, mock_filter):
mock_parse.return_value = [SearchResult('rhel', 'Red Hat Enterprise Linux')]

ret = self.solr.search('foo')

mock_get_data.assert_called_once_with('http://pulpproject.org/search?q=foo')
self.assertEqual(len(list(ret)), 0)


class TestParse(BaseSolrTest):
def test_normal(self):
result = list(self.solr._parse(json.dumps(fake_body)))

self.assertEqual(len(result), 1)

self.assertTrue(isinstance(result[0], SearchResult))
self.assertEqual(result[0].name, 'foo/bar')
self.assertEqual(result[0].description, 'marketing speak yada yada')

def test_json_exception(self):
"""
when an exception occurs, it should raise an HTTPError
"""
with self.assertRaises(exceptions.HTTPError) as assertion:
list(self.solr._parse('this is not valid json'))

self.assertEqual(assertion.exception.status_code, httplib.BAD_GATEWAY)

def test_attribute_exception(self):
"""
when an exception occurs, it should raise an HTTPError
"""
with self.assertRaises(exceptions.HTTPError) as assertion:
list(self.solr._parse(json.dumps({})))

self.assertEqual(assertion.exception.status_code, httplib.BAD_GATEWAY)


fake_body = {
'response': {
'docs': [
{
'allTitle': 'foo/bar',
'ir_description': 'marketing speak yada yada',
}
]
}
}
23 changes: 23 additions & 0 deletions tests/test_config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import unittest

import mock
Expand All @@ -6,6 +7,12 @@
import demo_data


basepath = os.path.dirname(__file__)

gsa_config_path = os.path.join(basepath, 'data/gsa/crane.conf')
solr_config_path = os.path.join(basepath, 'data/solr/crane.conf')


class TestLoad(unittest.TestCase):
def setUp(self):
self.app = mock.MagicMock()
Expand All @@ -25,6 +32,22 @@ def test_defaults(self):
self.assertEqual(self.app.config.get(config.KEY_DATA_POLLING_INTERVAL), 60)
configured_gsa_url = self.app.config.get(config.SECTION_GSA, {}).get(config.KEY_URL)
self.assertEqual(configured_gsa_url, '')
configured_solr_url = self.app.config.get(config.SECTION_SOLR, {}).get(config.KEY_URL)
self.assertEqual(configured_solr_url, '')

@mock.patch('os.environ.get', new={config.CONFIG_ENV_NAME: solr_config_path}.get,
spec_set=True)
def test_solr_url(self):
config.load(self.app)

self.assertEqual(self.app.config.get(config.SECTION_SOLR, {}).get(config.KEY_URL), 'http://foo/bar')

@mock.patch('os.environ.get', new={config.CONFIG_ENV_NAME: gsa_config_path}.get,
spec_set=True)
def test_gsa_url(self):
config.load(self.app)

self.assertEqual(self.app.config.get(config.SECTION_GSA, {}).get(config.KEY_URL), 'http://foo/bar')

@mock.patch('pkg_resources.resource_stream', side_effect=IOError, spec_set=True)
def test_defaults_not_found(self, mock_resource_stream):
Expand Down

0 comments on commit 4c19668

Please sign in to comment.