Skip to content

Commit

Permalink
Fixes bug 1013321 - Admin UI for the master list of Super Search Fiel…
Browse files Browse the repository at this point in the history
…ds. r=peterbe
  • Loading branch information
adngdb committed Jun 13, 2014
1 parent 2c0e7a7 commit 7960713
Show file tree
Hide file tree
Showing 33 changed files with 1,958 additions and 71 deletions.
File renamed without changes.
100 changes: 100 additions & 0 deletions scripts/setup_supersearch_app.py
@@ -0,0 +1,100 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

"""Index supersearch fields data into elasticsearch.
This script creates a first set of data to be used by Super Search as the list
of fields it exposes to users, as well as to generate the elasticsearch
mapping for processed and raw crashes.
"""

import json
import os

from configman import Namespace
from configman.converters import class_converter

from socorro.app import generic_app


class SetupSuperSearchApp(generic_app.App):
"""Index supersearch fields data into elasticsearch. """

app_name = 'setup-supersearch'
app_version = '1.0'
app_description = __doc__

required_config = Namespace()

required_config.add_option(
'supersearch_fields_file',
default=os.path.join(
os.path.dirname(os.path.abspath(__file__)),
'data',
'supersearch_fields.json'
),
)

required_config.namespace('elasticsearch')
required_config.elasticsearch.add_option(
'elasticsearch_class',
default='socorro.external.elasticsearch.connection_context.'
'ConnectionContext',
from_string_converter=class_converter,
)
required_config.elasticsearch.add_option(
'index_creator_class',
default='socorro.external.elasticsearch.crashstorage.'
'ElasticSearchCrashStorage',
from_string_converter=class_converter,
)

def main(self):
# Create the socorro index in elasticsearch.
index_creator = self.config.elasticsearch.index_creator_class(
self.config.elasticsearch
)
index_creator.create_index('socorro', None)

# Load the initial data set.
data_file = open(self.config.supersearch_fields_file, 'r')
all_fields = json.loads(data_file.read())

# Index the data.
es_connection = index_creator.es
es_connection.bulk_index(
index='socorro',
doc_type='supersearch_fields',
docs=all_fields.values(),
id_field='name',
)

# Verify data was correctly inserted.
es_connection.refresh()
total_indexed = es_connection.count(
'*',
index='socorro',
doc_type='supersearch_fields',
)['count']
total_expected = len(all_fields)

if total_expected != total_indexed:
indexed_fields = es_connection.search(
'*',
index='socorro',
doc_type='supersearch_fields',
size=total_indexed,
)
indexed_fields = [x['_id'] for x in indexed_fields['hits']['hits']]

self.config.logger.error(
'The SuperSearch fields data was not correctly indexed, '
'%s fields are missing from the database. Missing fields: %s',
total_expected - total_indexed,
list(set(all_fields.keys()) - set(indexed_fields))
)


if __name__ == '__main__':
generic_app.main(SetupSuperSearchApp)
6 changes: 6 additions & 0 deletions socorro/external/elasticsearch/connection_context.py
Expand Up @@ -22,6 +22,12 @@ class ConnectionContext(RequiredConfig):
doc='the time in seconds before a query to elasticsearch fails',
reference_value_from='resource.elasticsearch',
)
required_config.add_option(
'elasticsearch_default_index',
default='socorro',
doc='the default index used to store data',
reference_value_from='resource.elasticsearch',
)
required_config.add_option(
'elasticsearch_index',
default='socorro%Y%W',
Expand Down
201 changes: 185 additions & 16 deletions socorro/external/elasticsearch/supersearch.py
Expand Up @@ -2,16 +2,22 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

import json
import re
import os

from elasticutils import F, S
from pyelasticsearch.exceptions import ElasticHttpNotFoundError

from socorro.external import BadArgumentError
from pyelasticsearch.exceptions import (
ElasticHttpError,
ElasticHttpNotFoundError,
)

from socorro.external import (
BadArgumentError,
InsertionError,
MissingArgumentError,
ResourceNotFound,
)
from socorro.external.elasticsearch.base import ElasticSearchBase
from socorro.lib import datetimeutil
from socorro.lib import datetimeutil, external_common
from socorro.lib.search_common import SearchBase


Expand All @@ -34,8 +40,16 @@ def process_filter_missing(self, key, value, action):

class SuperSearch(SearchBase, ElasticSearchBase):

# Defining some filters for the field service that need to be considered
# as lists.
filters = [
('form_field_choices', None, ['list', 'str']),
('permissions_needed', None, ['list', 'str']),
]

def __init__(self, *args, **kwargs):
config = kwargs.get('config')
ElasticSearchBase.__init__(self, config=config)

self.all_fields = self.get_fields()

Expand All @@ -48,13 +62,18 @@ def __init__(self, *args, **kwargs):
# init is mandatory.
# See http://freshfoo.com/blog/object__init__takes_no_parameters
SearchBase.__init__(self, config=config, fields=self.all_fields)
ElasticSearchBase.__init__(self, config=config)

def get_connection(self):
return SuperS().es(
urls=self.config.elasticsearch_urls,
timeout=self.config.elasticsearch_timeout,
)

def get(self, **kwargs):
"""Return a list of results and facets based on parameters.
The list of accepted parameters (with types and default values) is in
socorro.lib.search_common.SearchBase
the database and can be accessed with the supersearch_fields service.
"""
# Filter parameters and raise potential errors.
params = self.get_parameters(**kwargs)
Expand All @@ -63,10 +82,7 @@ def get(self, **kwargs):
indexes = self.get_indexes(params['date'])

# Create and configure the search object.
search = SuperS().es(
urls=self.config.elasticsearch_urls,
timeout=self.config.elasticsearch_timeout,
)
search = self.get_connection()
search = search.indexes(*indexes)
search = search.doctypes(self.config.elasticsearch_doctype)

Expand Down Expand Up @@ -304,8 +320,161 @@ def format_field_names(self, hit):
return new_hit

def get_fields(self):
file_path = os.path.join(
os.path.dirname(os.path.realpath(__file__)),
'supersearch_fields.json'
""" Return all the fields from our database, as a dict where field
names are the keys.
No parameters are accepted.
"""
# Create and configure the search object.
search = self.get_connection()
search = search.indexes(
self.config.elasticsearch_default_index
)
search = search.doctypes('supersearch_fields')

count = search.count() # Total number of results.
search = search[:count]

# Get all fields from the database.
return dict((r['name'], r) for r in search.values_dict())

def create_field(self, **kwargs):
"""Create a new field in the database, to be used by supersearch and
all elasticsearch related services.
"""
filters = [
('name', None, 'str'),
('data_validation_type', 'enum', 'str'),
('default_value', None, 'str'),
('description', None, 'str'),
('form_field_type', 'MultipleValueField', 'str'),
('form_field_choices', None, ['list', 'str']),
('has_full_version', False, 'bool'),
('in_database_name', None, 'str'),
('is_exposed', False, 'bool'),
('is_returned', False, 'bool'),
('is_mandatory', False, 'bool'),
('query_type', 'enum', 'str'),
('namespace', None, 'str'),
('permissions_needed', None, ['list', 'str']),
('storage_mapping', None, 'json'),
]
params = external_common.parse_arguments(filters, kwargs)

mandatory_params = ('name', 'in_database_name')
for param in mandatory_params:
if not params[param]:
raise MissingArgumentError(param)

es_connection = self.get_connection().get_es()

try:
es_connection.index(
index=self.config.elasticsearch_default_index,
doc_type='supersearch_fields',
doc=params,
id=params['name'],
overwrite_existing=False,
refresh=True,
)
except ElasticHttpError, e:
if e.status_code == 409:
# This field exists in the database, it thus cannot be created!
raise InsertionError(
'The field "%s" already exists in the database, '
'impossible to create it. ' % params['name']
)

# Else this is an unexpected error and we want to know about it.
raise

return True

def update_field(self, **kwargs):
"""Update an existing field in the database.
If the field does not exist yet, a ResourceNotFound error is raised.
If you want to update only some keys, just do not pass the ones you
don't want to change.
"""
filters = [
('name', None, 'str'),
('data_validation_type', None, 'str'),
('default_value', None, 'str'),
('description', None, 'str'),
('form_field_type', None, 'str'),
('form_field_choices', None, ['list', 'str']),
('has_full_version', None, 'bool'),
('in_database_name', None, 'str'),
('is_exposed', None, 'bool'),
('is_returned', None, 'bool'),
('is_mandatory', None, 'bool'),
('query_type', None, 'str'),
('namespace', None, 'str'),
('permissions_needed', None, ['list', 'str']),
('storage_mapping', None, 'json'),
]
params = external_common.parse_arguments(filters, kwargs)

if not params['name']:
raise MissingArgumentError('name')

# Remove all the parameters that were not explicitely passed.
for key in params.keys():
if key not in kwargs:
del params[key]

es_connection = self.get_connection().get_es()

# First verify that the field does exist.
try:
es_connection.get(
index=self.config.elasticsearch_default_index,
doc_type='supersearch_fields',
id=params['name'],
)
except ElasticHttpNotFoundError:
# This field does not exist yet, it thus cannot be updated!
raise ResourceNotFound(
'The field "%s" does not exist in the database, it needs to '
'be created before it can be updated. ' % params['name']
)

# Then update the new field in the database. Note that pyelasticsearch
# takes care of merging the new document into the old one, so missing
# values won't be changed.
es_connection.update(
index=self.config.elasticsearch_default_index,
doc_type='supersearch_fields',
doc=params,
id=params['name'],
refresh=True,
)

return True

def delete_field(self, **kwargs):
"""Remove a field from the database.
Removing a field means that it won't be indexed in elasticsearch
anymore, nor will it be exposed or accessible via supersearch. It
doesn't delete the data from crash reports though, so it would be
possible to re-create the field and reindex some indices to get that
data back.
"""
filters = [
('name', None, 'str'),
]
params = external_common.parse_arguments(filters, kwargs)

if not params['name']:
raise MissingArgumentError('name')

es_connection = self.get_connection().get_es()
es_connection.delete(
index=self.config.elasticsearch_default_index,
doc_type='supersearch_fields',
id=params['name'],
refresh=True,
)
return json.loads(open(file_path, 'r').read())
8 changes: 8 additions & 0 deletions socorro/lib/external_common.py
Expand Up @@ -6,6 +6,8 @@
Common functions for external modules.
"""

import json

from datetime import datetime, timedelta, date
from socorro.lib.util import DotDict

Expand Down Expand Up @@ -117,4 +119,10 @@ def check_type(param, datatype):
except ValueError:
param = None

elif datatype == "json" and isinstance(param, basestring):
try:
param = json.loads(param)
except ValueError:
param = None

return param
3 changes: 3 additions & 0 deletions socorro/lib/search_common.py
Expand Up @@ -7,6 +7,7 @@
"""

import datetime
import json

import socorro.lib.external_common as extern
from socorro.external import BadArgumentError, MissingArgumentError
Expand Down Expand Up @@ -328,6 +329,8 @@ def convert_to_type(value, data_type):
value = datetimeutil.string_to_datetime(value)
elif data_type == 'date' and not isinstance(value, datetime.date):
value = datetimeutil.string_to_datetime(value).date()
elif data_type == 'json' and isinstance(value, basestring):
value = json.loads(value)
return value


Expand Down

0 comments on commit 7960713

Please sign in to comment.