Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expose dataset's created_at_internal date #2862

Merged
merged 18 commits into from
Jul 27, 2023
Merged
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

- Improve DCAT harvest of mime type [#2857](https://github.com/opendatateam/udata/pull/2857)
- Don't crash on files not found when purging resources [2858](https://github.com/opendatateam/udata/pull/2858)
- Expose dataset's `*_internal` dates in a nested `internal` nested field in api marshalling [#2862](https://github.com/opendatateam/udata/pull/2862)
- Improve DCAT catalog exposed [#2860](https://github.com/opendatateam/udata/pull/2860)
- Use the resource's extra `analysis:last-modified-at` in the `last_modified` property [#2863](https://github.com/opendatateam/udata/pull/2863)
- Add optionnal harvest validation form [#2864](https://github.com/opendatateam/udata/pull/2864)
Expand Down
22 changes: 20 additions & 2 deletions udata/core/dataset/api_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,13 @@
'label': fields.String(description='The frequency display name')
})

resource_internal_fields = api.model('ResourceInternals', {
'created_at_internal': fields.ISODateTime(
description='The resource\'s internal creation date on the site', required=True),
'last_modified_internal': fields.ISODateTime(
description='The resource\'s internal last modification date', required=True),
})

resource_fields = api.model('Resource', {
'id': fields.String(description='The resource unique ID', readonly=True),
'title': fields.String(description='The resource title', required=True),
Expand Down Expand Up @@ -112,6 +119,8 @@
'new page)',
readonly=True),
'schema': fields.Raw(description='Reference to the associated schema', readonly=True),
'internal': fields.Nested(
resource_internal_fields, readonly=True, description='Site internal and specific object\'s data'),
})

upload_fields = api.inherit('UploadedResource', resource_fields, {
Expand Down Expand Up @@ -162,9 +171,16 @@
'id', 'title', 'acronym', 'slug', 'description', 'created_at', 'last_modified', 'deleted',
'private', 'tags', 'badges', 'resources', 'frequency', 'frequency_date', 'extras', 'harvest',
'metrics', 'organization', 'owner', 'temporal_coverage', 'spatial', 'license',
'uri', 'page', 'last_update', 'archived', 'quality'
'uri', 'page', 'last_update', 'archived', 'quality', 'internal'
))

dataset_internal_fields = api.model('DatasetInternals', {
quaxsze marked this conversation as resolved.
Show resolved Hide resolved
'created_at_internal': fields.ISODateTime(
description='The dataset\'s internal creation date on the site', required=True),
'last_modified_internal': fields.ISODateTime(
description='The dataset\'s internal last modification date', required=True),
})

dataset_fields = api.model('Dataset', {
'id': fields.String(description='The dataset identifier', readonly=True),
'title': fields.String(description='The dataset title', required=True),
Expand All @@ -174,7 +190,7 @@
'description': fields.Markdown(
description='The dataset description in markdown', required=True),
'created_at': fields.ISODateTime(
description='The dataset creation date', required=True),
description='This date is computed between harvested creation date if any and site\'s internal creation date' , required=True),
'last_modified': fields.ISODateTime(
description='The dataset last modification date', required=True),
'deleted': fields.ISODateTime(description='The deletion date if deleted'),
Expand Down Expand Up @@ -228,6 +244,8 @@
'quality': fields.Raw(description='The dataset quality', readonly=True),
'last_update': fields.ISODateTime(
description='The resources last modification date', required=True),
'internal': fields.Nested(
dataset_internal_fields, readonly=True, description='Site internal and specific object\'s data'),
}, mask=DEFAULT_MASK)

dataset_page_fields = api.model('DatasetPage', fields.pager(dataset_fields),
Expand Down
10 changes: 8 additions & 2 deletions udata/core/dataset/apiv2.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
user_ref_fields,
checksum_fields,
dataset_harvest_fields,
resource_harvest_fields
dataset_internal_fields,
resource_harvest_fields,
resource_internal_fields
)
from udata.core.spatial.api_fields import geojson
from .models import (
Expand All @@ -33,7 +35,7 @@
'id', 'title', 'acronym', 'slug', 'description', 'created_at', 'last_modified', 'deleted',
'private', 'tags', 'badges', 'resources', 'community_resources', 'frequency', 'frequency_date',
'extras', 'metrics', 'organization', 'owner', 'temporal_coverage', 'spatial', 'license',
'uri', 'page', 'last_update', 'archived', 'quality', 'harvest'
'uri', 'page', 'last_update', 'archived', 'quality', 'harvest', 'internal'
))

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -129,6 +131,8 @@
'quality': fields.Raw(description='The dataset quality', readonly=True),
'last_update': fields.ISODateTime(
description='The resources last modification date', required=True),
'internal': fields.Nested(
dataset_internal_fields, readonly=True, description='Site internal and specific object\'s data')
}, mask=DEFAULT_MASK_APIV2)


Expand Down Expand Up @@ -164,6 +168,8 @@
apiv2.inherit('Checksum', checksum_fields)
apiv2.inherit('HarvestDatasetMetadata', dataset_harvest_fields)
apiv2.inherit('HarvestResourceMetadata', resource_harvest_fields)
apiv2.inherit('DatasetInternals', dataset_internal_fields)
apiv2.inherit('ResourceInternals', resource_internal_fields)


@ns.route('/search/', endpoint='dataset_search')
Expand Down
14 changes: 14 additions & 0 deletions udata/core/dataset/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,13 @@ class ResourceMixin(object):
last_modified_internal = db.DateTimeField(default=datetime.utcnow, required=True)
deleted = db.DateTimeField()

@property
def internal(self):
return {
'created_at_internal': self.created_at_internal,
'last_modified_internal': self.last_modified_internal
}

@property
def created_at(self):
return self.harvest.created_at if self.harvest and self.harvest.created_at else self.created_at_internal
Expand Down Expand Up @@ -827,6 +834,13 @@ def json_ld(self):

return result

@property
def internal(self):
return {
'created_at_internal': self.created_at_internal,
'last_modified_internal': self.last_modified_internal
}

@property
def views_count(self):
return self.metrics.get('views', 0)
Expand Down
10 changes: 9 additions & 1 deletion udata/tests/api/test_datasets_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,12 +229,20 @@ def test_dataset_api_get(self):
'''It should fetch a dataset from the API'''
resources = [ResourceFactory() for _ in range(2)]
dataset = DatasetFactory(resources=resources)

response = self.get(url_for('api.dataset', dataset=dataset))
self.assert200(response)
data = json.loads(response.data)
self.assertEqual(len(data['resources']), len(resources))
self.assertTrue('quality' in data)
self.assertTrue('internal' in data)
# Reloads dataset from mongoDB to get mongoDB's date's milliseconds reset.
dataset.reload()
self.assertEqual(data['internal']['created_at_internal'], fields.ISODateTime().format(dataset.created_at_internal))
quaxsze marked this conversation as resolved.
Show resolved Hide resolved
self.assertEqual(data['internal']['last_modified_internal'], fields.ISODateTime().format(dataset.last_modified_internal))

self.assertTrue('internal' in data['resources'][0])
self.assertEqual(data['resources'][0]['internal']['created_at_internal'], fields.ISODateTime().format(dataset.resources[0].created_at_internal))
self.assertEqual(data['resources'][0]['internal']['last_modified_internal'], fields.ISODateTime().format(dataset.resources[0].last_modified_internal))

def test_dataset_api_get_deleted(self):
'''It should not fetch a deleted dataset from the API and raise 410'''
Expand Down