Archive dataset feature and pin several dependencies (#2172)

* Archive dataset feature * Pin faulty deps * order deps * pin some potentially troublesome deps
opendatateam · Jun 4, 2019 · 28fe147 · 28fe147
1 parent 79d8ac4
commit 28fe147
Show file tree

Hide file tree

Showing 14 changed files with 243 additions and 15 deletions.
diff --git a/docs/adapting-settings.md b/docs/adapting-settings.md
@@ -125,6 +125,19 @@ Enables the search autocomplete on frontend if set to `True`, disables otherwise
 
 The search autocomplete debounce delay on frontend, in milliseconds.
 
+### ARCHIVE_COMMENT_USER_ID
+
+**default**: `None`
+
+The id of an existing user which will post a comment when a dataset is archived.
+
+### ARCHIVE_COMMENT_TITLE
+
+**default**: `_('This dataset has been archived')`
+
+The title of the comment optionaly posted when a dataset is archived.
+NB: the content of the comment is located in `udata/templates/comments/dataset_archived.txt`.
+
 ## URLs validation
 
 ### URLS_ALLOW_PRIVATE

diff --git a/less/front/dataset.less b/less/front/dataset.less
@@ -0,0 +1,3 @@
+.content.archived {
+    opacity: 0.6;
+}
diff --git a/less/site.less b/less/site.less
@@ -242,6 +242,10 @@ small.deleted {
     .square-stamp(#ff0000);
 }
 
+small.archived {
+    .square-stamp(#eea236);
+}
+
 
 .aside-map {
     height: 220px;

diff --git a/requirements/install.pip b/requirements/install.pip
@@ -1,13 +1,17 @@
-awesome-slugify==1.6.5
 authlib==0.10
+awesome-slugify==1.6.5
+Babel==2.6.0
+bcrypt==3.1.6
 bleach==3.1.0
 blinker==1.4
 celery==4.1.1
 celerybeat-mongo==0.1.0
 chardet==3.0.4
+click==6.7
 CommonMark==0.8.1
-elasticsearch==2.4.1
+cryptography==2.7
 elasticsearch-dsl==2.2.0
+elasticsearch==2.4.1
 factory-boy==2.11.1
 Faker==1.0.5
 Flask-BabelEx==0.9.3
@@ -25,31 +29,35 @@ Flask-Sitemap==0.3.0
 Flask-Themes2==0.1.4
 Flask-WTF==0.14.2
 Flask==1.0.2
+future==0.17.1
+geojson==2.4.1
 html2text==2018.1.9
+itsdangerous==1.1.0
+Jinja2==2.10.1
+jsonschema==3.0.1
+kombu==4.6.0
 lxml==4.3.3
 mongoengine==0.16.3
 msgpack-python==0.4.8
 netaddr==0.7.19
-bcrypt==3.1.6
 pillow==6.0.0
 pydenticon==0.3.1
 pyliblzma==0.5.3
+pymongo==3.7.2
 python-dateutil==2.8.0
 pytz==2019.1
 PyYAML==5.1
-rdflib==4.2.2
 rdflib-jsonld==0.4.0
+rdflib==4.2.2
 redis==2.10.6 # this can be safely upgraded back to 3.1.0 as soon as celery 4.2.2 is released
 requests==2.21.0
+simplejson==3.16.0
 StringDist==1.0.9
 tlds
-unicodecsv==0.14.1
 ujson==1.35
+unicodecsv==0.14.1
 voluptuous==0.10.5
 werkzeug==0.14.1
 wtforms-json==0.3.3
 wtforms==2.2.1
 xmltodict==0.12.0
-geojson==2.4.1
-pymongo==3.7.2
-Jinja2==2.10.1
diff --git a/udata/core/dataset/actions.py b/udata/core/dataset/actions.py
@@ -0,0 +1,38 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import logging
+from datetime import datetime
+
+from flask import current_app
+
+from udata import theme, i18n
+from udata.models import Discussion, Message
+
+log = logging.getLogger(__name__)
+
+
+def archive(dataset, comment=False):
+    """Archive a dataset"""
+    if dataset.archived:
+        log.warning('Dataset %s already archived, bumping date', dataset)
+    dataset.archived = datetime.now()
+    dataset.save()
+
+    if comment:
+        log.info('Posting comment for dataset %s...', dataset)
+        lang = current_app.config['DEFAULT_LANGUAGE']
+        title = current_app.config['ARCHIVE_COMMENT_TITLE']
+        user_id = current_app.config['ARCHIVE_COMMENT_USER_ID']
+        if user_id:
+            with i18n.language(lang):
+                msg = theme.render('comments/dataset_archived.txt')
+                message = Message(content=msg, posted_by=user_id)
+                discussion = Discussion(
+                    user=user_id, discussion=[message], subject=dataset,
+                    title=title)
+                discussion.save()
+        else:
+            log.warning('ARCHIVE_COMMENT_USER_ID not set, skipping comment')
+
+    log.info('Archived dataset %s', dataset)
diff --git a/udata/core/dataset/commands.py b/udata/core/dataset/commands.py
@@ -6,9 +6,12 @@
 import logging
 import requests
 
-from udata.commands import cli, success
-from udata.models import License, DEFAULT_LICENSE
+from bson import ObjectId
+
+from udata.commands import cli, success, exit_with_error
+from udata.models import License, DEFAULT_LICENSE, Dataset
 from .tasks import send_frequency_reminder
+from . import actions
 
 log = logging.getLogger(__name__)
 
@@ -71,3 +74,47 @@ def frequency_reminder():
     to remind them they have outdated datasets on the website.
     """
     send_frequency_reminder()
+
+
+@cli.group('dataset')
+def grp():
+    '''Dataset related operations'''
+    pass
+
+
+@grp.command()
+@click.argument('dataset_id')
+@click.option('-c', '--comment', is_flag=True, help='Post a comment when archiving')
+def archive_one(dataset_id, comment):
+    """Archive one dataset"""
+    try:
+        dataset = Dataset.objects.get(id=dataset_id)
+    except Dataset.DoesNotExist:
+        exit_with_error('Cannot find a dataset with id %s' % dataset_id)
+    else:
+        actions.archive(dataset, comment)
+
+
+@grp.command()
+@click.argument('filepath')
+@click.option('-c', '--comment', is_flag=True, help='Post a comment when archiving')
+def archive(filepath, comment):
+    """Archive multiple datasets from a list in a file (one id per line)"""
+    count = 0
+    errors = 0
+    log.info('Archiving datasets...')
+    with open(filepath) as inputfile:
+        for line in inputfile.readlines():
+            line = line.rstrip()
+            if not line:
+                continue
+            try:
+                dataset = Dataset.objects.get(id=ObjectId(line))
+            except Exception as e:  # noqa  (Never stop on failure)
+                log.error('Unable to archive dataset %s: %s', line, e)
+                errors += 1
+                continue
+            else:
+                actions.archive(dataset, comment)
+                count += 1
+    log.info('Archived %s datasets, %s failed', count, errors)
diff --git a/udata/core/dataset/models.py b/udata/core/dataset/models.py
@@ -191,12 +191,14 @@ def default(cls):
 
 class DatasetQuerySet(db.OwnedQuerySet):
     def visible(self):
-        return self(private__ne=True, resources__0__exists=True, deleted=None)
+        return self(private__ne=True, resources__0__exists=True,
+                    deleted=None, archived=None)
 
     def hidden(self):
         return self(db.Q(private=True) |
                     db.Q(resources__0__exists=False) |
-                    db.Q(deleted__ne=None))
+                    db.Q(deleted__ne=None) |
+                    db.Q(archived__ne=None))
 
 
 class Checksum(db.EmbeddedDocument):
@@ -379,6 +381,7 @@ class Dataset(WithMetrics, BadgeMixin, db.Owned, db.Document):
     featured = db.BooleanField(required=True, default=False)
 
     deleted = db.DateTimeField()
+    archived = db.DateTimeField()
 
     def __unicode__(self):
         return self.title or ''
@@ -405,6 +408,7 @@ def __unicode__(self):
     before_delete = signal('Dataset.before_delete')
     after_delete = signal('Dataset.after_delete')
     on_delete = signal('Dataset.on_delete')
+    on_archive = signal('Dataset.on_archive')
     on_resource_added = signal('Dataset.on_resource_added')
 
     verbose_name = _('dataset')
@@ -424,6 +428,8 @@ def post_save(cls, sender, document, **kwargs):
             cls.on_update.send(document)
         if document.deleted:
             cls.on_delete.send(document)
+        if document.archived:
+            cls.on_archive.send(document)
         if kwargs.get('resource_added'):
             cls.on_resource_added.send(document,
                                        resource_id=kwargs['resource_added'])
@@ -444,7 +450,8 @@ def is_visible(self):
 
     @property
     def is_hidden(self):
-        return len(self.resources) == 0 or self.private or self.deleted
+        return (len(self.resources) == 0 or self.private or self.deleted
+                or self.archived)
 
     @property
     def full_title(self):

diff --git a/udata/core/dataset/search.py b/udata/core/dataset/search.py
@@ -172,7 +172,7 @@ class Meta:
 
     @classmethod
     def is_indexable(cls, dataset):
-        return (dataset.deleted is None and
+        return (dataset.deleted is None and dataset.archived is None and
                 len(dataset.resources) > 0 and
                 not dataset.private)
 

diff --git a/udata/settings.py b/udata/settings.py
@@ -328,9 +328,16 @@ class Defaults(object):
         'organization': None,
     }
 
+    # Autocomplete parameters
+    #########################
     SEARCH_AUTOCOMPLETE_ENABLED = True
     SEARCH_AUTOCOMPLETE_DEBOUNCE = 200  # in ms
 
+    # Archive parameters
+    ####################
+    ARCHIVE_COMMENT_USER_ID = None
+    ARCHIVE_COMMENT_TITLE = _('This dataset has been archived')
+
 
 class Testing(object):
     '''Sane values for testing. Should be applied as override'''

diff --git a/udata/templates/comments/dataset_archived.txt b/udata/templates/comments/dataset_archived.txt
@@ -0,0 +1,6 @@
+{% trans %}This dataset has been archived by our administration team.
+
+An archived dataset is considered out of date by a combination of factors.
+It does not appear in the search results and has a special layout warning the site's users.
+
+If you think this is a mistake, you can contact the site administrator.{% endtrans %}
diff --git a/udata/templates/dataset/display.html b/udata/templates/dataset/display.html
@@ -36,7 +36,14 @@
 {% cache cache_duration, 'dataset-content', dataset.id|string, g.lang_code, current_user.slug or 'anonymous' %}
 <!-- Placeholder for non-routable modals -->
 <div v-el:modal></div>
-<section class="content {% if not dataset.organization.public_service %}non{% endif %}certified">
+
+{% if dataset.archived %}
+<div class="container-fluid alert alert-warning">
+    <div class="container" role="alert">{{ _('This dataset has been archived.') }}</div>
+</div>
+{% endif %}
+
+<section class="content {% if not dataset.organization.public_service %}non{% endif %}certified {% if dataset.archived %}archived{% endif %}">
     <div class="container dataset-container">
         <div class="row">
 
@@ -84,6 +91,12 @@ <h2>{{ dataset.title }}
                                 popover-title="{{ _('Deleted') }}" popover-trigger="hover" popover-placement="top"
                                 >{{ _('Deleted') }}</small>
                             {% endif %}
+                            {% if dataset.archived %}
+                            <small class="archived"
+                                v-popover.literal="{{ _('This dataset has been archived.') }}"
+                                popover-title="{{ _('Archived') }}" popover-trigger="hover" popover-placement="top"
+                                >{{ _('Archived') }}</small>
+                            {% endif %}
                         </p>
                     </div>
                 </div>

diff --git a/udata/tests/api/test_datasets_api.py b/udata/tests/api/test_datasets_api.py
@@ -1061,6 +1061,28 @@ def test_dataset_allowed_resources_extensions(self):
         self.assertEqual(response.json, extensions)
 
 
+class DatasetArchivedAPITest(APITestCase):
+    modules = ['core.dataset']
+
+    def test_dataset_api_search_archived(self):
+        '''It should search datasets from the API, excluding archived ones'''
+        with self.autoindex():
+            VisibleDatasetFactory(archived=None)
+            dataset = VisibleDatasetFactory(archived=datetime.now())
+
+        response = self.get(url_for('api.datasets', q=''))
+        self.assert200(response)
+        self.assertEqual(len(response.json['data']), 1)
+        self.assertNotIn(str(dataset.id),
+                         [r['id'] for r in response.json['data']])
+
+    def test_dataset_api_get_archived(self):
+        '''It should fetch an archived dataset from the API and return 200'''
+        dataset = VisibleDatasetFactory(archived=datetime.now())
+        response = self.get(url_for('api.dataset', dataset=dataset))
+        self.assert200(response)
+
+
 class CommunityResourceAPITest(APITestCase):
     modules = ['core.dataset', 'core.user', 'core.organization']
 

diff --git a/udata/tests/dataset/test_dataset_actions.py b/udata/tests/dataset/test_dataset_actions.py
@@ -0,0 +1,27 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import pytest
+
+from udata.core.dataset.actions import archive
+from udata.core.dataset.factories import VisibleDatasetFactory
+from udata.core.discussions.models import Discussion
+from udata.core.user.factories import UserFactory
+
+
+@pytest.mark.usefixtures('clean_db')
+class DatasetActionsTest:
+
+    def test_dataset_archive(self, app):
+        user = UserFactory()
+        app.config['ARCHIVE_COMMENT_USER_ID'] = user.id
+
+        dataset = VisibleDatasetFactory()
+
+        archive(dataset, comment=True)
+
+        dataset.reload()
+        assert dataset.archived is not None
+        discussions = Discussion.objects.filter(subject=dataset)
+        assert len(discussions) == 1
+        assert 'archived' in discussions[0].discussion[0].content