Skip to content

Commit

Permalink
Merge pull request #1078 from okfn/1078-package_show-performance
Browse files Browse the repository at this point in the history
package_show could be faster
  • Loading branch information
kindly committed Aug 23, 2013
2 parents 682e6bc + e9dc42c commit a14b54a
Show file tree
Hide file tree
Showing 8 changed files with 77 additions and 23 deletions.
1 change: 1 addition & 0 deletions ckan/config/solr/schema-2.0.xml
Expand Up @@ -129,6 +129,7 @@
<field name="title_string" type="string" indexed="true" stored="false" />

<field name="data_dict" type="string" indexed="false" stored="true" />
<field name="validated_data_dict" type="string" indexed="false" stored="true" />

<field name="_version_" type="string" indexed="true" stored="true"/>

Expand Down
4 changes: 0 additions & 4 deletions ckan/lib/dictization/model_dictize.py
Expand Up @@ -300,10 +300,6 @@ def package_dictize(pkg, context):
result_dict['metadata_created'] = pkg.metadata_created.isoformat() \
if pkg.metadata_created else None

if context.get('for_view'):
for item in plugins.PluginImplementations( plugins.IPackageController):
result_dict = item.before_view(result_dict)

return result_dict

def _get_members(context, group, member_type):
Expand Down
9 changes: 9 additions & 0 deletions ckan/lib/navl/dictization_functions.py
@@ -1,6 +1,7 @@
import copy
import formencode as fe
import inspect
import json
from pylons import config

from ckan.common import _
Expand Down Expand Up @@ -402,3 +403,11 @@ def unflatten(data):
unflattened[key] = [unflattened[key][s] for s in sorted(unflattened[key])]

return unflattened


class MissingNullEncoder(json.JSONEncoder):
'''json encoder that treats missing objects as null'''
def default(self, obj):
if isinstance(obj, Missing):
return None
return json.JSONEncoder.default(self, obj)
15 changes: 7 additions & 8 deletions ckan/lib/search/__init__.py
Expand Up @@ -123,7 +123,8 @@ def notify(self, entity, operation):
dispatch_by_operation(
entity.__class__.__name__,
logic.get_action('package_show')(
{'model': model, 'ignore_auth': True, 'validate': False},
{'model': model, 'ignore_auth': True, 'validate': False,
'use_cache': False},
{'id': entity.id}),
operation
)
Expand All @@ -147,18 +148,18 @@ def rebuild(package_id=None, only_missing=False, force=False, refresh=False, def
log.info("Rebuilding search index...")

package_index = index_for(model.Package)
context = {'model': model, 'ignore_auth': True, 'validate': False,
'use_cache': False}

if package_id:
pkg_dict = logic.get_action('package_show')(
{'model': model, 'ignore_auth': True, 'validate': False},
pkg_dict = logic.get_action('package_show')(context,
{'id': package_id})
log.info('Indexing just package %r...', pkg_dict['name'])
package_index.remove_dict(pkg_dict)
package_index.insert_dict(pkg_dict)
elif package_ids:
for package_id in package_ids:
pkg_dict = logic.get_action('package_show')(
{'model': model, 'ignore_auth': True, 'validate': False},
pkg_dict = logic.get_action('package_show')(context,
{'id': package_id})
log.info('Indexing just package %r...', pkg_dict['name'])
package_index.update_dict(pkg_dict, True)
Expand All @@ -185,9 +186,7 @@ def rebuild(package_id=None, only_missing=False, force=False, refresh=False, def
for pkg_id in package_ids:
try:
package_index.update_dict(
logic.get_action('package_show')(
{'model': model, 'ignore_auth': True,
'validate': False},
logic.get_action('package_show')(context,
{'id': pkg_id}
),
defer_commit
Expand Down
15 changes: 15 additions & 0 deletions ckan/lib/search/index.py
Expand Up @@ -18,9 +18,13 @@
from ckan.plugins import (PluginImplementations,
IPackageController)
import ckan.logic as logic
import ckan.lib.plugins as lib_plugins
import ckan.lib.navl.dictization_functions

log = logging.getLogger(__name__)

_validate = ckan.lib.navl.dictization_functions.validate

TYPE_FIELD = "entity_type"
PACKAGE_TYPE = "package"
KEY_CHARS = string.digits + string.letters + "_-"
Expand Down Expand Up @@ -102,8 +106,19 @@ def update_dict(self, pkg_dict, defer_commit=False):
def index_package(self, pkg_dict, defer_commit=False):
if pkg_dict is None:
return

pkg_dict['data_dict'] = json.dumps(pkg_dict)

if config.get('ckan.cache_validated_datasets', True):
package_plugin = lib_plugins.lookup_package_plugin(
pkg_dict.get('type'))

schema = package_plugin.show_package_schema()
validated_pkg_dict, errors = _validate(pkg_dict, schema, {
'model': model, 'session': model.Session})
pkg_dict['validated_data_dict'] = json.dumps(validated_pkg_dict,
cls=ckan.lib.navl.dictization_functions.MissingNullEncoder)

# add to string field for sorting
title = pkg_dict.get('title')
if title:
Expand Down
5 changes: 3 additions & 2 deletions ckan/lib/search/query.py
Expand Up @@ -279,11 +279,12 @@ def get_index(self,reference):
data = json.loads(solr_response)

if data['response']['numFound'] == 0:
raise SearchError('Dataset not found in the search index: %s' % reference)
raise SearchError('Dataset not found in the search index: %s' % reference)
else:
return data['response']['docs'][0]
except Exception, e:
log.exception(e)
if not isinstance(e, SearchError):
log.exception(e)
raise SearchError(e)
finally:
conn.close()
Expand Down
50 changes: 41 additions & 9 deletions ckan/logic/action/get.py
Expand Up @@ -4,6 +4,7 @@
import logging
import json
import datetime
import socket

from pylons import config
import sqlalchemy
Expand Down Expand Up @@ -768,7 +769,37 @@ def package_show(context, data_dict):

_check_access('package_show', context, data_dict)

package_dict = model_dictize.package_dictize(pkg, context)
package_dict = None
use_cache = (context.get('use_cache', True)
and not 'revision_id' in context
and not 'revision_date' in context)
if use_cache:
try:
search_result = search.show(name_or_id)
except (search.SearchError, socket.error):
pass
else:
use_validated_cache = 'schema' not in context
if use_validated_cache and 'validated_data_dict' in search_result:
package_dict = json.loads(search_result['validated_data_dict'])
package_dict_validated = True
else:
package_dict = json.loads(search_result['data_dict'])
package_dict_validated = False
metadata_modified = pkg.metadata_modified.isoformat()
search_metadata_modified = search_result['metadata_modified']
# solr stores less precice datetime,
# truncate to 22 charactors to get good enough match
if metadata_modified[:22] != search_metadata_modified[:22]:
package_dict = None

if not package_dict:
package_dict = model_dictize.package_dictize(pkg, context)
package_dict_validated = False

if context.get('for_view'):
for item in plugins.PluginImplementations(plugins.IPackageController):
package_dict = item.before_view(package_dict)

for item in plugins.PluginImplementations(plugins.IPackageController):
item.read(pkg)
Expand All @@ -777,14 +808,15 @@ def package_show(context, data_dict):
for item in plugins.PluginImplementations(plugins.IResourceController):
resource_dict = item.before_show(resource_dict)

package_plugin = lib_plugins.lookup_package_plugin(package_dict['type'])
if 'schema' in context:
schema = context['schema']
else:
schema = package_plugin.show_package_schema()

if schema and context.get('validate', True):
package_dict, errors = _validate(package_dict, schema, context=context)
if not package_dict_validated:
package_plugin = lib_plugins.lookup_package_plugin(package_dict['type'])
if 'schema' in context:
schema = context['schema']
else:
schema = package_plugin.show_package_schema()
if schema and context.get('validate', True):
package_dict, errors = _validate(package_dict, schema,
context=context)

for item in plugins.PluginImplementations(plugins.IPackageController):
item.after_show(context, package_dict)
Expand Down
1 change: 1 addition & 0 deletions ckanext/multilingual/solr/schema.xml
Expand Up @@ -436,6 +436,7 @@
<field name="title_pl" type="text_pl" indexed="true" stored="true"/>

<field name="data_dict" type="string" indexed="false" stored="true" />
<field name="validated_data_dict" type="string" indexed="false" stored="true" />

<field name="_version_" type="string" indexed="true" stored="true"/>

Expand Down

0 comments on commit a14b54a

Please sign in to comment.