Fixes bug 1013321 - Admin UI for the master list of Super Search Fiel…

…ds. r=peterbe
mozilla-services · Jun 13, 2014 · 7960713 · 7960713
1 parent 2c0e7a7
commit 7960713
Show file tree

Hide file tree

Showing 33 changed files with 1,958 additions and 71 deletions.
diff --git a/...nal/elasticsearch/supersearch_fields.json → scripts/data/supersearch_fields.json b/...nal/elasticsearch/supersearch_fields.json → scripts/data/supersearch_fields.json
diff --git a/scripts/setup_supersearch_app.py b/scripts/setup_supersearch_app.py
@@ -0,0 +1,100 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""Index supersearch fields data into elasticsearch.
+
+This script creates a first set of data to be used by Super Search as the list
+of fields it exposes to users, as well as to generate the elasticsearch
+mapping for processed and raw crashes.
+"""
+
+import json
+import os
+
+from configman import Namespace
+from configman.converters import class_converter
+
+from socorro.app import generic_app
+
+
+class SetupSuperSearchApp(generic_app.App):
+    """Index supersearch fields data into elasticsearch. """
+
+    app_name = 'setup-supersearch'
+    app_version = '1.0'
+    app_description = __doc__
+
+    required_config = Namespace()
+
+    required_config.add_option(
+        'supersearch_fields_file',
+        default=os.path.join(
+            os.path.dirname(os.path.abspath(__file__)),
+            'data',
+            'supersearch_fields.json'
+        ),
+    )
+
+    required_config.namespace('elasticsearch')
+    required_config.elasticsearch.add_option(
+        'elasticsearch_class',
+        default='socorro.external.elasticsearch.connection_context.'
+                'ConnectionContext',
+        from_string_converter=class_converter,
+    )
+    required_config.elasticsearch.add_option(
+        'index_creator_class',
+        default='socorro.external.elasticsearch.crashstorage.'
+                'ElasticSearchCrashStorage',
+        from_string_converter=class_converter,
+    )
+
+    def main(self):
+        # Create the socorro index in elasticsearch.
+        index_creator = self.config.elasticsearch.index_creator_class(
+            self.config.elasticsearch
+        )
+        index_creator.create_index('socorro', None)
+
+        # Load the initial data set.
+        data_file = open(self.config.supersearch_fields_file, 'r')
+        all_fields = json.loads(data_file.read())
+
+        # Index the data.
+        es_connection = index_creator.es
+        es_connection.bulk_index(
+            index='socorro',
+            doc_type='supersearch_fields',
+            docs=all_fields.values(),
+            id_field='name',
+        )
+
+        # Verify data was correctly inserted.
+        es_connection.refresh()
+        total_indexed = es_connection.count(
+            '*',
+            index='socorro',
+            doc_type='supersearch_fields',
+        )['count']
+        total_expected = len(all_fields)
+
+        if total_expected != total_indexed:
+            indexed_fields = es_connection.search(
+                '*',
+                index='socorro',
+                doc_type='supersearch_fields',
+                size=total_indexed,
+            )
+            indexed_fields = [x['_id'] for x in indexed_fields['hits']['hits']]
+
+            self.config.logger.error(
+                'The SuperSearch fields data was not correctly indexed, '
+                '%s fields are missing from the database. Missing fields: %s',
+                total_expected - total_indexed,
+                list(set(all_fields.keys()) - set(indexed_fields))
+            )
+
+
+if __name__ == '__main__':
+    generic_app.main(SetupSuperSearchApp)
diff --git a/socorro/external/elasticsearch/connection_context.py b/socorro/external/elasticsearch/connection_context.py
@@ -22,6 +22,12 @@ class ConnectionContext(RequiredConfig):
         doc='the time in seconds before a query to elasticsearch fails',
         reference_value_from='resource.elasticsearch',
     )
+    required_config.add_option(
+        'elasticsearch_default_index',
+        default='socorro',
+        doc='the default index used to store data',
+        reference_value_from='resource.elasticsearch',
+    )
     required_config.add_option(
         'elasticsearch_index',
         default='socorro%Y%W',

diff --git a/socorro/external/elasticsearch/supersearch.py b/socorro/external/elasticsearch/supersearch.py
@@ -2,16 +2,22 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-import json
 import re
-import os
 
 from elasticutils import F, S
-from pyelasticsearch.exceptions import ElasticHttpNotFoundError
-
-from socorro.external import BadArgumentError
+from pyelasticsearch.exceptions import (
+    ElasticHttpError,
+    ElasticHttpNotFoundError,
+)
+
+from socorro.external import (
+    BadArgumentError,
+    InsertionError,
+    MissingArgumentError,
+    ResourceNotFound,
+)
 from socorro.external.elasticsearch.base import ElasticSearchBase
-from socorro.lib import datetimeutil
+from socorro.lib import datetimeutil, external_common
 from socorro.lib.search_common import SearchBase
 
 
@@ -34,8 +40,16 @@ def process_filter_missing(self, key, value, action):
 
 class SuperSearch(SearchBase, ElasticSearchBase):
 
+    # Defining some filters for the field service that need to be considered
+    # as lists.
+    filters = [
+        ('form_field_choices', None, ['list', 'str']),
+        ('permissions_needed', None, ['list', 'str']),
+    ]
+
     def __init__(self, *args, **kwargs):
         config = kwargs.get('config')
+        ElasticSearchBase.__init__(self, config=config)
 
         self.all_fields = self.get_fields()
 
@@ -48,13 +62,18 @@ def __init__(self, *args, **kwargs):
         # init is mandatory.
         # See http://freshfoo.com/blog/object__init__takes_no_parameters
         SearchBase.__init__(self, config=config, fields=self.all_fields)
-        ElasticSearchBase.__init__(self, config=config)
+
+    def get_connection(self):
+        return SuperS().es(
+            urls=self.config.elasticsearch_urls,
+            timeout=self.config.elasticsearch_timeout,
+        )
 
     def get(self, **kwargs):
         """Return a list of results and facets based on parameters.
 
         The list of accepted parameters (with types and default values) is in
-        socorro.lib.search_common.SearchBase
+        the database and can be accessed with the supersearch_fields service.
         """
         # Filter parameters and raise potential errors.
         params = self.get_parameters(**kwargs)
@@ -63,10 +82,7 @@ def get(self, **kwargs):
         indexes = self.get_indexes(params['date'])
 
         # Create and configure the search object.
-        search = SuperS().es(
-            urls=self.config.elasticsearch_urls,
-            timeout=self.config.elasticsearch_timeout,
-        )
+        search = self.get_connection()
         search = search.indexes(*indexes)
         search = search.doctypes(self.config.elasticsearch_doctype)
 
@@ -304,8 +320,161 @@ def format_field_names(self, hit):
         return new_hit
 
     def get_fields(self):
-        file_path = os.path.join(
-            os.path.dirname(os.path.realpath(__file__)),
-            'supersearch_fields.json'
+        """ Return all the fields from our database, as a dict where field
+        names are the keys.
+
+        No parameters are accepted.
+        """
+        # Create and configure the search object.
+        search = self.get_connection()
+        search = search.indexes(
+            self.config.elasticsearch_default_index
+        )
+        search = search.doctypes('supersearch_fields')
+
+        count = search.count()  # Total number of results.
+        search = search[:count]
+
+        # Get all fields from the database.
+        return dict((r['name'], r) for r in search.values_dict())
+
+    def create_field(self, **kwargs):
+        """Create a new field in the database, to be used by supersearch and
+        all elasticsearch related services.
+        """
+        filters = [
+            ('name', None, 'str'),
+            ('data_validation_type', 'enum', 'str'),
+            ('default_value', None, 'str'),
+            ('description', None, 'str'),
+            ('form_field_type', 'MultipleValueField', 'str'),
+            ('form_field_choices', None, ['list', 'str']),
+            ('has_full_version', False, 'bool'),
+            ('in_database_name', None, 'str'),
+            ('is_exposed', False, 'bool'),
+            ('is_returned', False, 'bool'),
+            ('is_mandatory', False, 'bool'),
+            ('query_type', 'enum', 'str'),
+            ('namespace', None, 'str'),
+            ('permissions_needed', None, ['list', 'str']),
+            ('storage_mapping', None, 'json'),
+        ]
+        params = external_common.parse_arguments(filters, kwargs)
+
+        mandatory_params = ('name', 'in_database_name')
+        for param in mandatory_params:
+            if not params[param]:
+                raise MissingArgumentError(param)
+
+        es_connection = self.get_connection().get_es()
+
+        try:
+            es_connection.index(
+                index=self.config.elasticsearch_default_index,
+                doc_type='supersearch_fields',
+                doc=params,
+                id=params['name'],
+                overwrite_existing=False,
+                refresh=True,
+            )
+        except ElasticHttpError, e:
+            if e.status_code == 409:
+                # This field exists in the database, it thus cannot be created!
+                raise InsertionError(
+                    'The field "%s" already exists in the database, '
+                    'impossible to create it. ' % params['name']
+                )
+
+            # Else this is an unexpected error and we want to know about it.
+            raise
+
+        return True
+
+    def update_field(self, **kwargs):
+        """Update an existing field in the database.
+
+        If the field does not exist yet, a ResourceNotFound error is raised.
+
+        If you want to update only some keys, just do not pass the ones you
+        don't want to change.
+        """
+        filters = [
+            ('name', None, 'str'),
+            ('data_validation_type', None, 'str'),
+            ('default_value', None, 'str'),
+            ('description', None, 'str'),
+            ('form_field_type', None, 'str'),
+            ('form_field_choices', None, ['list', 'str']),
+            ('has_full_version', None, 'bool'),
+            ('in_database_name', None, 'str'),
+            ('is_exposed', None, 'bool'),
+            ('is_returned', None, 'bool'),
+            ('is_mandatory', None, 'bool'),
+            ('query_type', None, 'str'),
+            ('namespace', None, 'str'),
+            ('permissions_needed', None, ['list', 'str']),
+            ('storage_mapping', None, 'json'),
+        ]
+        params = external_common.parse_arguments(filters, kwargs)
+
+        if not params['name']:
+            raise MissingArgumentError('name')
+
+        # Remove all the parameters that were not explicitely passed.
+        for key in params.keys():
+            if key not in kwargs:
+                del params[key]
+
+        es_connection = self.get_connection().get_es()
+
+        # First verify that the field does exist.
+        try:
+            es_connection.get(
+                index=self.config.elasticsearch_default_index,
+                doc_type='supersearch_fields',
+                id=params['name'],
+            )
+        except ElasticHttpNotFoundError:
+            # This field does not exist yet, it thus cannot be updated!
+            raise ResourceNotFound(
+                'The field "%s" does not exist in the database, it needs to '
+                'be created before it can be updated. ' % params['name']
+            )
+
+        # Then update the new field in the database. Note that pyelasticsearch
+        # takes care of merging the new document into the old one, so missing
+        # values won't be changed.
+        es_connection.update(
+            index=self.config.elasticsearch_default_index,
+            doc_type='supersearch_fields',
+            doc=params,
+            id=params['name'],
+            refresh=True,
+        )
+
+        return True
+
+    def delete_field(self, **kwargs):
+        """Remove a field from the database.
+
+        Removing a field means that it won't be indexed in elasticsearch
+        anymore, nor will it be exposed or accessible via supersearch. It
+        doesn't delete the data from crash reports though, so it would be
+        possible to re-create the field and reindex some indices to get that
+        data back.
+        """
+        filters = [
+            ('name', None, 'str'),
+        ]
+        params = external_common.parse_arguments(filters, kwargs)
+
+        if not params['name']:
+            raise MissingArgumentError('name')
+
+        es_connection = self.get_connection().get_es()
+        es_connection.delete(
+            index=self.config.elasticsearch_default_index,
+            doc_type='supersearch_fields',
+            id=params['name'],
+            refresh=True,
         )
-        return json.loads(open(file_path, 'r').read())
diff --git a/socorro/lib/external_common.py b/socorro/lib/external_common.py
@@ -6,6 +6,8 @@
 Common functions for external modules.
 """
 
+import json
+
 from datetime import datetime, timedelta, date
 from socorro.lib.util import DotDict
 
@@ -117,4 +119,10 @@ def check_type(param, datatype):
         except ValueError:
             param = None
 
+    elif datatype == "json" and isinstance(param, basestring):
+        try:
+            param = json.loads(param)
+        except ValueError:
+            param = None
+
     return param
diff --git a/socorro/lib/search_common.py b/socorro/lib/search_common.py
@@ -7,6 +7,7 @@
 """
 
 import datetime
+import json
 
 import socorro.lib.external_common as extern
 from socorro.external import BadArgumentError, MissingArgumentError
@@ -328,6 +329,8 @@ def convert_to_type(value, data_type):
         value = datetimeutil.string_to_datetime(value)
     elif data_type == 'date' and not isinstance(value, datetime.date):
         value = datetimeutil.string_to_datetime(value).date()
+    elif data_type == 'json' and isinstance(value, basestring):
+        value = json.loads(value)
     return value