Skip to content

Commit

Permalink
Archive dataset feature and pin several dependencies (#2172)
Browse files Browse the repository at this point in the history
* Archive dataset feature
* Pin faulty deps
* order deps
* pin some potentially troublesome deps
  • Loading branch information
abulte committed Jun 4, 2019
1 parent 79d8ac4 commit 28fe147
Show file tree
Hide file tree
Showing 14 changed files with 243 additions and 15 deletions.
13 changes: 13 additions & 0 deletions docs/adapting-settings.md
Expand Up @@ -125,6 +125,19 @@ Enables the search autocomplete on frontend if set to `True`, disables otherwise

The search autocomplete debounce delay on frontend, in milliseconds.

### ARCHIVE_COMMENT_USER_ID

**default**: `None`

The id of an existing user which will post a comment when a dataset is archived.

### ARCHIVE_COMMENT_TITLE

**default**: `_('This dataset has been archived')`

The title of the comment optionaly posted when a dataset is archived.
NB: the content of the comment is located in `udata/templates/comments/dataset_archived.txt`.

## URLs validation

### URLS_ALLOW_PRIVATE
Expand Down
3 changes: 3 additions & 0 deletions less/front/dataset.less
@@ -0,0 +1,3 @@
.content.archived {
opacity: 0.6;
}
4 changes: 4 additions & 0 deletions less/site.less
Expand Up @@ -242,6 +242,10 @@ small.deleted {
.square-stamp(#ff0000);
}

small.archived {
.square-stamp(#eea236);
}


.aside-map {
height: 220px;
Expand Down
24 changes: 16 additions & 8 deletions requirements/install.pip
@@ -1,13 +1,17 @@
awesome-slugify==1.6.5
authlib==0.10
awesome-slugify==1.6.5
Babel==2.6.0
bcrypt==3.1.6
bleach==3.1.0
blinker==1.4
celery==4.1.1
celerybeat-mongo==0.1.0
chardet==3.0.4
click==6.7
CommonMark==0.8.1
elasticsearch==2.4.1
cryptography==2.7
elasticsearch-dsl==2.2.0
elasticsearch==2.4.1
factory-boy==2.11.1
Faker==1.0.5
Flask-BabelEx==0.9.3
Expand All @@ -25,31 +29,35 @@ Flask-Sitemap==0.3.0
Flask-Themes2==0.1.4
Flask-WTF==0.14.2
Flask==1.0.2
future==0.17.1
geojson==2.4.1
html2text==2018.1.9
itsdangerous==1.1.0
Jinja2==2.10.1
jsonschema==3.0.1
kombu==4.6.0
lxml==4.3.3
mongoengine==0.16.3
msgpack-python==0.4.8
netaddr==0.7.19
bcrypt==3.1.6
pillow==6.0.0
pydenticon==0.3.1
pyliblzma==0.5.3
pymongo==3.7.2
python-dateutil==2.8.0
pytz==2019.1
PyYAML==5.1
rdflib==4.2.2
rdflib-jsonld==0.4.0
rdflib==4.2.2
redis==2.10.6 # this can be safely upgraded back to 3.1.0 as soon as celery 4.2.2 is released
requests==2.21.0
simplejson==3.16.0
StringDist==1.0.9
tlds
unicodecsv==0.14.1
ujson==1.35
unicodecsv==0.14.1
voluptuous==0.10.5
werkzeug==0.14.1
wtforms-json==0.3.3
wtforms==2.2.1
xmltodict==0.12.0
geojson==2.4.1
pymongo==3.7.2
Jinja2==2.10.1
38 changes: 38 additions & 0 deletions udata/core/dataset/actions.py
@@ -0,0 +1,38 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

import logging
from datetime import datetime

from flask import current_app

from udata import theme, i18n
from udata.models import Discussion, Message

log = logging.getLogger(__name__)


def archive(dataset, comment=False):
"""Archive a dataset"""
if dataset.archived:
log.warning('Dataset %s already archived, bumping date', dataset)
dataset.archived = datetime.now()
dataset.save()

if comment:
log.info('Posting comment for dataset %s...', dataset)
lang = current_app.config['DEFAULT_LANGUAGE']
title = current_app.config['ARCHIVE_COMMENT_TITLE']
user_id = current_app.config['ARCHIVE_COMMENT_USER_ID']
if user_id:
with i18n.language(lang):
msg = theme.render('comments/dataset_archived.txt')
message = Message(content=msg, posted_by=user_id)
discussion = Discussion(
user=user_id, discussion=[message], subject=dataset,
title=title)
discussion.save()
else:
log.warning('ARCHIVE_COMMENT_USER_ID not set, skipping comment')

log.info('Archived dataset %s', dataset)
51 changes: 49 additions & 2 deletions udata/core/dataset/commands.py
Expand Up @@ -6,9 +6,12 @@
import logging
import requests

from udata.commands import cli, success
from udata.models import License, DEFAULT_LICENSE
from bson import ObjectId

from udata.commands import cli, success, exit_with_error
from udata.models import License, DEFAULT_LICENSE, Dataset
from .tasks import send_frequency_reminder
from . import actions

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -71,3 +74,47 @@ def frequency_reminder():
to remind them they have outdated datasets on the website.
"""
send_frequency_reminder()


@cli.group('dataset')
def grp():
'''Dataset related operations'''
pass


@grp.command()
@click.argument('dataset_id')
@click.option('-c', '--comment', is_flag=True, help='Post a comment when archiving')
def archive_one(dataset_id, comment):
"""Archive one dataset"""
try:
dataset = Dataset.objects.get(id=dataset_id)
except Dataset.DoesNotExist:
exit_with_error('Cannot find a dataset with id %s' % dataset_id)
else:
actions.archive(dataset, comment)


@grp.command()
@click.argument('filepath')
@click.option('-c', '--comment', is_flag=True, help='Post a comment when archiving')
def archive(filepath, comment):
"""Archive multiple datasets from a list in a file (one id per line)"""
count = 0
errors = 0
log.info('Archiving datasets...')
with open(filepath) as inputfile:
for line in inputfile.readlines():
line = line.rstrip()
if not line:
continue
try:
dataset = Dataset.objects.get(id=ObjectId(line))
except Exception as e: # noqa (Never stop on failure)
log.error('Unable to archive dataset %s: %s', line, e)
errors += 1
continue
else:
actions.archive(dataset, comment)
count += 1
log.info('Archived %s datasets, %s failed', count, errors)
13 changes: 10 additions & 3 deletions udata/core/dataset/models.py
Expand Up @@ -191,12 +191,14 @@ def default(cls):

class DatasetQuerySet(db.OwnedQuerySet):
def visible(self):
return self(private__ne=True, resources__0__exists=True, deleted=None)
return self(private__ne=True, resources__0__exists=True,
deleted=None, archived=None)

def hidden(self):
return self(db.Q(private=True) |
db.Q(resources__0__exists=False) |
db.Q(deleted__ne=None))
db.Q(deleted__ne=None) |
db.Q(archived__ne=None))


class Checksum(db.EmbeddedDocument):
Expand Down Expand Up @@ -379,6 +381,7 @@ class Dataset(WithMetrics, BadgeMixin, db.Owned, db.Document):
featured = db.BooleanField(required=True, default=False)

deleted = db.DateTimeField()
archived = db.DateTimeField()

def __unicode__(self):
return self.title or ''
Expand All @@ -405,6 +408,7 @@ def __unicode__(self):
before_delete = signal('Dataset.before_delete')
after_delete = signal('Dataset.after_delete')
on_delete = signal('Dataset.on_delete')
on_archive = signal('Dataset.on_archive')
on_resource_added = signal('Dataset.on_resource_added')

verbose_name = _('dataset')
Expand All @@ -424,6 +428,8 @@ def post_save(cls, sender, document, **kwargs):
cls.on_update.send(document)
if document.deleted:
cls.on_delete.send(document)
if document.archived:
cls.on_archive.send(document)
if kwargs.get('resource_added'):
cls.on_resource_added.send(document,
resource_id=kwargs['resource_added'])
Expand All @@ -444,7 +450,8 @@ def is_visible(self):

@property
def is_hidden(self):
return len(self.resources) == 0 or self.private or self.deleted
return (len(self.resources) == 0 or self.private or self.deleted
or self.archived)

@property
def full_title(self):
Expand Down
2 changes: 1 addition & 1 deletion udata/core/dataset/search.py
Expand Up @@ -172,7 +172,7 @@ class Meta:

@classmethod
def is_indexable(cls, dataset):
return (dataset.deleted is None and
return (dataset.deleted is None and dataset.archived is None and
len(dataset.resources) > 0 and
not dataset.private)

Expand Down
7 changes: 7 additions & 0 deletions udata/settings.py
Expand Up @@ -328,9 +328,16 @@ class Defaults(object):
'organization': None,
}

# Autocomplete parameters
#########################
SEARCH_AUTOCOMPLETE_ENABLED = True
SEARCH_AUTOCOMPLETE_DEBOUNCE = 200 # in ms

# Archive parameters
####################
ARCHIVE_COMMENT_USER_ID = None
ARCHIVE_COMMENT_TITLE = _('This dataset has been archived')


class Testing(object):
'''Sane values for testing. Should be applied as override'''
Expand Down
6 changes: 6 additions & 0 deletions udata/templates/comments/dataset_archived.txt
@@ -0,0 +1,6 @@
{% trans %}This dataset has been archived by our administration team.

An archived dataset is considered out of date by a combination of factors.
It does not appear in the search results and has a special layout warning the site's users.

If you think this is a mistake, you can contact the site administrator.{% endtrans %}
15 changes: 14 additions & 1 deletion udata/templates/dataset/display.html
Expand Up @@ -36,7 +36,14 @@
{% cache cache_duration, 'dataset-content', dataset.id|string, g.lang_code, current_user.slug or 'anonymous' %}
<!-- Placeholder for non-routable modals -->
<div v-el:modal></div>
<section class="content {% if not dataset.organization.public_service %}non{% endif %}certified">

{% if dataset.archived %}
<div class="container-fluid alert alert-warning">
<div class="container" role="alert">{{ _('This dataset has been archived.') }}</div>
</div>
{% endif %}

<section class="content {% if not dataset.organization.public_service %}non{% endif %}certified {% if dataset.archived %}archived{% endif %}">
<div class="container dataset-container">
<div class="row">

Expand Down Expand Up @@ -84,6 +91,12 @@ <h2>{{ dataset.title }}
popover-title="{{ _('Deleted') }}" popover-trigger="hover" popover-placement="top"
>{{ _('Deleted') }}</small>
{% endif %}
{% if dataset.archived %}
<small class="archived"
v-popover.literal="{{ _('This dataset has been archived.') }}"
popover-title="{{ _('Archived') }}" popover-trigger="hover" popover-placement="top"
>{{ _('Archived') }}</small>
{% endif %}
</p>
</div>
</div>
Expand Down
22 changes: 22 additions & 0 deletions udata/tests/api/test_datasets_api.py
Expand Up @@ -1061,6 +1061,28 @@ def test_dataset_allowed_resources_extensions(self):
self.assertEqual(response.json, extensions)


class DatasetArchivedAPITest(APITestCase):
modules = ['core.dataset']

def test_dataset_api_search_archived(self):
'''It should search datasets from the API, excluding archived ones'''
with self.autoindex():
VisibleDatasetFactory(archived=None)
dataset = VisibleDatasetFactory(archived=datetime.now())

response = self.get(url_for('api.datasets', q=''))
self.assert200(response)
self.assertEqual(len(response.json['data']), 1)
self.assertNotIn(str(dataset.id),
[r['id'] for r in response.json['data']])

def test_dataset_api_get_archived(self):
'''It should fetch an archived dataset from the API and return 200'''
dataset = VisibleDatasetFactory(archived=datetime.now())
response = self.get(url_for('api.dataset', dataset=dataset))
self.assert200(response)


class CommunityResourceAPITest(APITestCase):
modules = ['core.dataset', 'core.user', 'core.organization']

Expand Down
27 changes: 27 additions & 0 deletions udata/tests/dataset/test_dataset_actions.py
@@ -0,0 +1,27 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

import pytest

from udata.core.dataset.actions import archive
from udata.core.dataset.factories import VisibleDatasetFactory
from udata.core.discussions.models import Discussion
from udata.core.user.factories import UserFactory


@pytest.mark.usefixtures('clean_db')
class DatasetActionsTest:

def test_dataset_archive(self, app):
user = UserFactory()
app.config['ARCHIVE_COMMENT_USER_ID'] = user.id

dataset = VisibleDatasetFactory()

archive(dataset, comment=True)

dataset.reload()
assert dataset.archived is not None
discussions = Discussion.objects.filter(subject=dataset)
assert len(discussions) == 1
assert 'archived' in discussions[0].discussion[0].content

0 comments on commit 28fe147

Please sign in to comment.