Skip to content
This repository
Browse code

Fix cross-datasets search export. Closes #848.

  • Loading branch information...
commit f2df2520049bc4b9fd079342bccfa97f7dcd5aa2 1 parent ad279e7
Christopher Groskopf onyxfish authored
8 client/static/js/models/datasets.js
@@ -494,10 +494,12 @@ PANDA.collections.Datasets = Backbone.Collection.extend({
494 494 *
495 495 * NB: Uses the cross-dataset export url, resulting in a ZIP file.
496 496 */
497   - data = {};
  497 + data = {
  498 + export: true
  499 + };
498 500
499 501 if (query) {
500   - data['q'] = query;
  502 + data.q = query;
501 503 }
502 504
503 505 if (since != "all") {
@@ -509,7 +511,7 @@ PANDA.collections.Datasets = Backbone.Collection.extend({
509 511 }
510 512
511 513 Redd.ajax({
512   - url: PANDA.API + "/data/export/",
  514 + url: PANDA.API + "/data/",
513 515 dataType: 'json',
514 516 data: data,
515 517 success: _.bind(function(response) {
7 docs/api.rst
Source Rendered
@@ -852,3 +852,10 @@ The response is a ``meta`` object with paging information and an ``objects`` arr
852 852
853 853 When using this endpoint the ``limit`` and ``offset`` parameters refer to the Datasets (that is, the **groups**) returned. If you wish to paginate the result sets within each group you can use ``group_limit`` and ``group_offset``, however, this is rarely useful behavior.
854 854
  855 +Exporting global search results
  856 +===============================
  857 +
  858 +You may export any set of search results to a zip file by passing ``export=true`` in the querystring. The ``limit``, ``offset``, ``group_limit`` and ``group_offset`` parameters will be ignored for export.
  859 +
  860 +This is an asynchronous operation which will return success or failure based on whether the export task was queued. Use the Task API to check its status or the Export API to retrieve the results.
  861 +
197 panda/api/data.py
@@ -3,7 +3,6 @@
3 3 import re
4 4
5 5 from django.conf import settings
6   -from django.conf.urls.defaults import url
7 6 from django.core.urlresolvers import get_script_prefix, resolve, reverse
8 7 from django.utils import simplejson as json
9 8 from tastypie import fields, http
@@ -12,7 +11,6 @@
12 11 from tastypie.exceptions import BadRequest, NotFound, ImmediateHttpResponse
13 12 from tastypie.utils import dict_strip_unicode_keys
14 13 from tastypie.utils.mime import build_content_type
15   -from tastypie.utils.urls import trailing_slash
16 14 from tastypie.validation import Validation
17 15
18 16 from panda import solr
@@ -91,12 +89,6 @@ class Meta:
91 89
92 90 object_class = SolrObject
93 91
94   - def override_urls(self):
95   - """
96   - Add urls for export.
97   - """
98   - url(r'^export%s' % trailing_slash(), self.wrap_view('search_export'), name='api_data_search_export'),
99   -
100 92 def dehydrate_data(self, bundle):
101 93 """
102 94 Convert csv data into a proper array for JSON serialization
@@ -413,6 +405,7 @@ def search_all_data(self, request, **kwargs):
413 405 offset = int(request.GET.get('offset', 0))
414 406 group_limit = int(request.GET.get('group_limit', settings.PANDA_DEFAULT_SEARCH_ROWS_PER_GROUP))
415 407 group_offset = int(request.GET.get('group_offset', 0))
  408 + export = bool(request.GET.get('export', False))
416 409
417 410 if category:
418 411 if category != 'uncategorized':
@@ -426,119 +419,95 @@ def search_all_data(self, request, **kwargs):
426 419 if since:
427 420 query = 'last_modified:[' + since + 'Z TO *] AND (%s)' % query
428 421
429   - response = solr.query_grouped(
430   - settings.SOLR_DATA_CORE,
431   - query,
432   - 'dataset_slug',
433   - offset=offset,
434   - limit=limit,
435   - group_limit=group_limit,
436   - group_offset=group_offset
437   - )
438   - groups = response['grouped']['dataset_slug']['groups']
439   -
440   - page = PandaPaginator(
441   - request.GET,
442   - groups,
443   - resource_uri=request.path_info,
444   - count=response['grouped']['dataset_slug']['ngroups']
445   - ).page()
446   -
447   - datasets = []
448   -
449   - for group in groups:
450   - dataset_slug = group['groupValue']
451   - results = group['doclist']
452   -
453   - try:
454   - dataset = Dataset.objects.get(slug=dataset_slug)
455   - # In the event that stale data exists in Solr, skip this dataset,
456   - # request the invalid data be purged and return the other results.
457   - # Pagination may be wrong, but this is the most functional solution. (#793)
458   - except Dataset.DoesNotExist:
459   - PurgeDataTask.apply_async(args=[dataset_slug])
460   - solr.delete(settings.SOLR_DATASETS_CORE, 'slug:%s' % dataset_slug)
461   -
462   - page['meta']['total_count'] -= 1
463   -
464   - continue
465   -
466   - dataset_resource = DatasetResource()
467   - dataset_bundle = dataset_resource.build_bundle(obj=dataset, request=request)
468   - dataset_bundle = dataset_resource.full_dehydrate(dataset_bundle)
469   - dataset_bundle = dataset_resource.simplify_bundle(dataset_bundle)
  422 + if export:
  423 + task_type = ExportSearchTask
470 424
471   - objects = [SolrObject(obj) for obj in results['docs']]
472   -
473   - dataset_search_url = reverse('api_dataset_data_list', kwargs={ 'api_name': self._meta.api_name, 'dataset_resource_name': 'dataset', 'resource_name': 'data', 'dataset_slug': dataset.slug })
  425 + task = TaskStatus.objects.create(
  426 + task_name=task_type.name,
  427 + task_description='Export search results for "%s".' % query,
  428 + creator=request.user
  429 + )
474 430
475   - data_page = PandaPaginator(
476   - { 'limit': str(group_limit), 'offset': str(group_offset), 'q': query },
477   - objects,
478   - resource_uri=dataset_search_url,
479   - count=results['numFound']
  431 + task_type.apply_async(
  432 + args=[query, task.id],
  433 + kwargs={},
  434 + task_id=task.id
  435 + )
  436 + else:
  437 + response = solr.query_grouped(
  438 + settings.SOLR_DATA_CORE,
  439 + query,
  440 + 'dataset_slug',
  441 + offset=offset,
  442 + limit=limit,
  443 + group_limit=group_limit,
  444 + group_offset=group_offset
  445 + )
  446 + groups = response['grouped']['dataset_slug']['groups']
  447 +
  448 + page = PandaPaginator(
  449 + request.GET,
  450 + groups,
  451 + resource_uri=request.path_info,
  452 + count=response['grouped']['dataset_slug']['ngroups']
480 453 ).page()
481 454
482   - dataset_bundle.data.update(data_page)
483   - dataset_bundle.data['objects'] = []
484   -
485   - for obj in objects:
486   - data_bundle = self.build_bundle(obj=obj, request=request)
487   - data_bundle = self.full_dehydrate(data_bundle)
488   - dataset_bundle.data['objects'].append(data_bundle)
489   -
490   - datasets.append(dataset_bundle.data)
491   -
492   - page['objects'] = datasets
493   -
494   - # Log query
495   - SearchLog.objects.create(user=request.user, dataset=None, query=query)
496   -
497   - self.log_throttled_access(request)
498   -
499   - return self.create_response(request, page)
500   -
501   - def search_export(self, request, **kwargs):
502   - """
503   - Export the results of a Solr query.
504   - """
505   - self.method_check(request, allowed=['get'])
506   - self.is_authenticated(request)
507   - self.throttle_check(request)
508   -
509   - query = request.GET.get('q', '')
510   - category = request.GET.get('category', '')
511   - since = request.GET.get('since', None)
512   -
513   - if category:
514   - if category != 'uncategorized':
515   - category = Category.objects.get(slug=category)
516   - dataset_slugs = category.datasets.values_list('slug', flat=True)
517   - else:
518   - dataset_slugs = Dataset.objects.filter(categories=None).values_list('slug', flat=True)
519   -
520   - query += ' dataset_slug:(%s)' % ' '.join(dataset_slugs)
521   -
522   - if since:
523   - query = 'last_modified:[' + since + 'Z TO *] AND (%s)' % query
524   -
525   - task_type = ExportSearchTask
526   -
527   - task = TaskStatus.objects.create(
528   - task_name=task_type.name,
529   - task_description='Export search results for "%s".' % query,
530   - creator=request.user
531   - )
532   -
533   - task_type.apply_async(
534   - args=[query, task.id],
535   - kwargs={},
536   - task_id=task.id
537   - )
  455 + datasets = []
  456 +
  457 + for group in groups:
  458 + dataset_slug = group['groupValue']
  459 + results = group['doclist']
  460 +
  461 + try:
  462 + dataset = Dataset.objects.get(slug=dataset_slug)
  463 + # In the event that stale data exists in Solr, skip this dataset,
  464 + # request the invalid data be purged and return the other results.
  465 + # Pagination may be wrong, but this is the most functional solution. (#793)
  466 + except Dataset.DoesNotExist:
  467 + PurgeDataTask.apply_async(args=[dataset_slug])
  468 + solr.delete(settings.SOLR_DATASETS_CORE, 'slug:%s' % dataset_slug)
  469 +
  470 + page['meta']['total_count'] -= 1
  471 +
  472 + continue
  473 +
  474 + dataset_resource = DatasetResource()
  475 + dataset_bundle = dataset_resource.build_bundle(obj=dataset, request=request)
  476 + dataset_bundle = dataset_resource.full_dehydrate(dataset_bundle)
  477 + dataset_bundle = dataset_resource.simplify_bundle(dataset_bundle)
  478 +
  479 + objects = [SolrObject(obj) for obj in results['docs']]
  480 +
  481 + dataset_search_url = reverse('api_dataset_data_list', kwargs={ 'api_name': self._meta.api_name, 'dataset_resource_name': 'dataset', 'resource_name': 'data', 'dataset_slug': dataset.slug })
  482 +
  483 + data_page = PandaPaginator(
  484 + { 'limit': str(group_limit), 'offset': str(group_offset), 'q': query },
  485 + objects,
  486 + resource_uri=dataset_search_url,
  487 + count=results['numFound']
  488 + ).page()
  489 +
  490 + dataset_bundle.data.update(data_page)
  491 + dataset_bundle.data['objects'] = []
  492 +
  493 + for obj in objects:
  494 + data_bundle = self.build_bundle(obj=obj, request=request)
  495 + data_bundle = self.full_dehydrate(data_bundle)
  496 + dataset_bundle.data['objects'].append(data_bundle)
  497 +
  498 + datasets.append(dataset_bundle.data)
  499 +
  500 + page['objects'] = datasets
  501 +
  502 + # Log query
  503 + SearchLog.objects.create(user=request.user, dataset=None, query=query)
538 504
539 505 self.log_throttled_access(request)
540 506
541   - return self.create_response(request, 'Export queued.')
  507 + if export:
  508 + return self.create_response(request, 'Export queued.')
  509 + else:
  510 + return self.create_response(request, page)
542 511
543 512 def search_dataset_data(self, request, **kwargs):
544 513 """
2  panda/tasks/export_search.py
@@ -199,6 +199,8 @@ def send_notifications(self, query, task_status, retval, einfo):
199 199 creation_date=task_status.start,
200 200 dataset=None)
201 201
  202 + extra_context['related_export'] = export
  203 +
202 204 url = '#export/%i' % export.id
203 205
204 206 template_prefix = 'export_search_complete'
3  panda/tests/test_api_data.py
@@ -569,7 +569,8 @@ def test_search_unauthorized(self):
569 569 def test_export_data(self):
570 570 self.dataset.import_data(self.user, self.upload, 0)
571 571
572   - response = self.client.get('/api/1.0/data/export/?q=joseph', **self.auth_headers)
  572 + response = self.client.get('/api/1.0/data/?q=joseph&export=true', **self.auth_headers)
573 573
574 574 self.assertEqual(response.status_code, 200)
  575 + self.assertEqual(response.content, '"Export queued."')
575 576

0 comments on commit f2df252

Please sign in to comment.
Something went wrong with that request. Please try again.