Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Archive dataset feature #2172

Merged
merged 16 commits into from Jun 4, 2019
@@ -125,6 +125,19 @@ Enables the search autocomplete on frontend if set to `True`, disables otherwise

The search autocomplete debounce delay on frontend, in milliseconds.

### ARCHIVE_COMMENT_USER_ID

**default**: `None`

The id of an existing user which will post a comment when a dataset is archived.

### ARCHIVE_COMMENT_TITLE

**default**: `_('This dataset has been archived')`

The title of the comment optionaly posted when a dataset is archived.
NB: the content of the comment is located in `udata/templates/comments/dataset_archived.txt`.

## URLs validation

### URLS_ALLOW_PRIVATE
@@ -0,0 +1,3 @@
.content.archived {
opacity: 0.6;
}
@@ -242,6 +242,10 @@ small.deleted {
.square-stamp(#ff0000);
}

small.archived {
.square-stamp(#eea236);
}


.aside-map {
height: 220px;
@@ -1,13 +1,17 @@
awesome-slugify==1.6.5
authlib==0.10
awesome-slugify==1.6.5
Babel==2.6.0
bcrypt==3.1.6
bleach==3.1.0
blinker==1.4
celery==4.1.1
celerybeat-mongo==0.1.0
chardet==3.0.4
click==6.7
CommonMark==0.8.1
elasticsearch==2.4.1
cryptography==2.7
elasticsearch-dsl==2.2.0
elasticsearch==2.4.1
factory-boy==2.11.1
Faker==1.0.5
Flask-BabelEx==0.9.3
@@ -25,31 +29,35 @@ Flask-Sitemap==0.3.0
Flask-Themes2==0.1.4
Flask-WTF==0.14.2
Flask==1.0.2
future==0.17.1
geojson==2.4.1
html2text==2018.1.9
itsdangerous==1.1.0
Jinja2==2.10.1
jsonschema==3.0.1
kombu==4.6.0
lxml==4.3.3
mongoengine==0.16.3
msgpack-python==0.4.8
netaddr==0.7.19
bcrypt==3.1.6
pillow==6.0.0
pydenticon==0.3.1
pyliblzma==0.5.3
pymongo==3.7.2
python-dateutil==2.8.0
pytz==2019.1
PyYAML==5.1
rdflib==4.2.2
rdflib-jsonld==0.4.0
rdflib==4.2.2
redis==2.10.6 # this can be safely upgraded back to 3.1.0 as soon as celery 4.2.2 is released
requests==2.21.0
simplejson==3.16.0
StringDist==1.0.9
tlds
unicodecsv==0.14.1
ujson==1.35
unicodecsv==0.14.1
voluptuous==0.10.5
werkzeug==0.14.1
wtforms-json==0.3.3
wtforms==2.2.1
xmltodict==0.12.0
geojson==2.4.1
pymongo==3.7.2
Jinja2==2.10.1
@@ -0,0 +1,38 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

import logging
from datetime import datetime

from flask import current_app

from udata import theme, i18n
from udata.models import Discussion, Message

log = logging.getLogger(__name__)


def archive(dataset, comment=False):
"""Archive a dataset"""
if dataset.archived:
log.warning('Dataset %s already archived, bumping date', dataset)
dataset.archived = datetime.now()
dataset.save()

if comment:
log.info('Posting comment for dataset %s...', dataset)
lang = current_app.config['DEFAULT_LANGUAGE']
title = current_app.config['ARCHIVE_COMMENT_TITLE']
user_id = current_app.config['ARCHIVE_COMMENT_USER_ID']
if user_id:
with i18n.language(lang):
msg = theme.render('comments/dataset_archived.txt')
message = Message(content=msg, posted_by=user_id)
discussion = Discussion(
user=user_id, discussion=[message], subject=dataset,
title=title)
discussion.save()
else:
log.warning('ARCHIVE_COMMENT_USER_ID not set, skipping comment')

log.info('Archived dataset %s', dataset)
@@ -6,9 +6,12 @@
import logging
import requests

from udata.commands import cli, success
from udata.models import License, DEFAULT_LICENSE
from bson import ObjectId

from udata.commands import cli, success, exit_with_error
from udata.models import License, DEFAULT_LICENSE, Dataset
from .tasks import send_frequency_reminder
from . import actions

log = logging.getLogger(__name__)

@@ -71,3 +74,47 @@ def frequency_reminder():
to remind them they have outdated datasets on the website.
"""
send_frequency_reminder()


@cli.group('dataset')
def grp():
'''Dataset related operations'''
pass


@grp.command()
@click.argument('dataset_id')
@click.option('-c', '--comment', is_flag=True, help='Post a comment when archiving')
def archive_one(dataset_id, comment):
"""Archive one dataset"""
try:
dataset = Dataset.objects.get(id=dataset_id)
except Dataset.DoesNotExist:
exit_with_error('Cannot find a dataset with id %s' % dataset_id)
else:
actions.archive(dataset, comment)


@grp.command()
@click.argument('filepath')
@click.option('-c', '--comment', is_flag=True, help='Post a comment when archiving')
def archive(filepath, comment):
"""Archive multiple datasets from a list in a file (one id per line)"""
count = 0
errors = 0
log.info('Archiving datasets...')
with open(filepath) as inputfile:
for line in inputfile.readlines():
line = line.rstrip()
if not line:
continue
try:
dataset = Dataset.objects.get(id=ObjectId(line))
except Exception as e: # noqa (Never stop on failure)
log.error('Unable to archive dataset %s: %s', line, e)
errors += 1
continue
else:
actions.archive(dataset, comment)
count += 1
log.info('Archived %s datasets, %s failed', count, errors)
@@ -191,12 +191,14 @@ def default(cls):

class DatasetQuerySet(db.OwnedQuerySet):
def visible(self):
return self(private__ne=True, resources__0__exists=True, deleted=None)
return self(private__ne=True, resources__0__exists=True,
deleted=None, archived=None)

def hidden(self):
return self(db.Q(private=True) |
db.Q(resources__0__exists=False) |
db.Q(deleted__ne=None))
db.Q(deleted__ne=None) |
db.Q(archived__ne=None))


class Checksum(db.EmbeddedDocument):
@@ -379,6 +381,7 @@ class Dataset(WithMetrics, BadgeMixin, db.Owned, db.Document):
featured = db.BooleanField(required=True, default=False)

deleted = db.DateTimeField()
archived = db.DateTimeField()

def __unicode__(self):
return self.title or ''
@@ -405,6 +408,7 @@ def __unicode__(self):
before_delete = signal('Dataset.before_delete')
after_delete = signal('Dataset.after_delete')
on_delete = signal('Dataset.on_delete')
on_archive = signal('Dataset.on_archive')
on_resource_added = signal('Dataset.on_resource_added')

verbose_name = _('dataset')
@@ -424,6 +428,8 @@ def post_save(cls, sender, document, **kwargs):
cls.on_update.send(document)
if document.deleted:
cls.on_delete.send(document)
if document.archived:
cls.on_archive.send(document)
if kwargs.get('resource_added'):
cls.on_resource_added.send(document,
resource_id=kwargs['resource_added'])
@@ -444,7 +450,8 @@ def is_visible(self):

@property
def is_hidden(self):
return len(self.resources) == 0 or self.private or self.deleted
return (len(self.resources) == 0 or self.private or self.deleted
or self.archived)

@property
def full_title(self):
@@ -172,7 +172,7 @@ class Meta:

@classmethod
def is_indexable(cls, dataset):
return (dataset.deleted is None and
return (dataset.deleted is None and dataset.archived is None and
len(dataset.resources) > 0 and
not dataset.private)

@@ -328,9 +328,16 @@ class Defaults(object):
'organization': None,
}

# Autocomplete parameters
#########################
SEARCH_AUTOCOMPLETE_ENABLED = True
SEARCH_AUTOCOMPLETE_DEBOUNCE = 200 # in ms

# Archive parameters
####################
ARCHIVE_COMMENT_USER_ID = None
ARCHIVE_COMMENT_TITLE = _('This dataset has been archived')


class Testing(object):
'''Sane values for testing. Should be applied as override'''
@@ -0,0 +1,6 @@
{% trans %}This dataset has been archived by our administration team.

An archived dataset is considered out of date by a combination of factors.
It does not appear in the search results and has a special layout warning the site's users.

If you think this is a mistake, you can contact the site administrator.{% endtrans %}
@@ -36,7 +36,14 @@
{% cache cache_duration, 'dataset-content', dataset.id|string, g.lang_code, current_user.slug or 'anonymous' %}
<!-- Placeholder for non-routable modals -->
<div v-el:modal></div>
<section class="content {% if not dataset.organization.public_service %}non{% endif %}certified">

{% if dataset.archived %}
<div class="container-fluid alert alert-warning">
<div class="container" role="alert">{{ _('This dataset has been archived.') }}</div>
</div>
{% endif %}

<section class="content {% if not dataset.organization.public_service %}non{% endif %}certified {% if dataset.archived %}archived{% endif %}">
<div class="container dataset-container">
<div class="row">

@@ -84,6 +91,12 @@ <h2>{{ dataset.title }}
popover-title="{{ _('Deleted') }}" popover-trigger="hover" popover-placement="top"
>{{ _('Deleted') }}</small>
{% endif %}
{% if dataset.archived %}
<small class="archived"
v-popover.literal="{{ _('This dataset has been archived.') }}"
popover-title="{{ _('Archived') }}" popover-trigger="hover" popover-placement="top"
>{{ _('Archived') }}</small>
{% endif %}
</p>
</div>
</div>
@@ -1061,6 +1061,28 @@ def test_dataset_allowed_resources_extensions(self):
self.assertEqual(response.json, extensions)


class DatasetArchivedAPITest(APITestCase):
modules = ['core.dataset']

def test_dataset_api_search_archived(self):
'''It should search datasets from the API, excluding archived ones'''
with self.autoindex():
VisibleDatasetFactory(archived=None)
dataset = VisibleDatasetFactory(archived=datetime.now())

response = self.get(url_for('api.datasets', q=''))
self.assert200(response)
self.assertEqual(len(response.json['data']), 1)
self.assertNotIn(str(dataset.id),
[r['id'] for r in response.json['data']])

def test_dataset_api_get_archived(self):
'''It should fetch an archived dataset from the API and return 200'''
dataset = VisibleDatasetFactory(archived=datetime.now())
response = self.get(url_for('api.dataset', dataset=dataset))
self.assert200(response)


class CommunityResourceAPITest(APITestCase):
modules = ['core.dataset', 'core.user', 'core.organization']

@@ -0,0 +1,27 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

import pytest

from udata.core.dataset.actions import archive
from udata.core.dataset.factories import VisibleDatasetFactory
from udata.core.discussions.models import Discussion
from udata.core.user.factories import UserFactory


@pytest.mark.usefixtures('clean_db')
class DatasetActionsTest:

def test_dataset_archive(self, app):
user = UserFactory()
app.config['ARCHIVE_COMMENT_USER_ID'] = user.id

dataset = VisibleDatasetFactory()

archive(dataset, comment=True)

dataset.reload()
assert dataset.archived is not None
discussions = Discussion.objects.filter(subject=dataset)
assert len(discussions) == 1
assert 'archived' in discussions[0].discussion[0].content
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.