Skip to content

Commit

Permalink
Merge pull request #837 from jdesboeufs/ods-ignore-inspire
Browse files Browse the repository at this point in the history
Ignore INSPIRE dataset in ODS harvester (temp)
  • Loading branch information
jdesboeufs committed Mar 31, 2017
2 parents e3d0602 + 305bcc2 commit 04b4a9b
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 5 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Expand Up @@ -10,6 +10,8 @@
[#824](https://github.com/opendatateam/udata/issues/824)
- Restore the broken URL check feature
[#840](https://github.com/opendatateam/udata/issues/840)
- Temporarily ignore INSPIRE in ODS harvester
[#837](https://github.com/opendatateam/udata/pull/837)

## 1.0.5 (2017-03-27)

Expand Down
2 changes: 1 addition & 1 deletion udata/harvest/backends/base.py
Expand Up @@ -142,7 +142,7 @@ def process_item(self, item):
item.dataset = dataset
item.status = 'done'
except HarvestSkipException as e:
log.info("SKipped item %s : %s" % (item.remote_id, str(e)))
log.info("Skipped item %s : %s" % (item.remote_id, str(e)))
item.status = 'skipped'
item.errors.append(HarvestError(message=str(e)))
except Exception as e:
Expand Down
13 changes: 11 additions & 2 deletions udata/harvest/backends/ods.py
Expand Up @@ -63,8 +63,11 @@ def should_fetch():
return count < max_value

while should_fetch():
response = self.get(self.api_url,
params={"start": count, "rows": 50})
response = self.get(self.api_url, params={
"start": count,
"rows": 50,
"interopmetas": 'true',
})
response.raise_for_status()
data = response.json()
nhits = data["nhits"]
Expand All @@ -76,11 +79,17 @@ def process(self, item):
ods_dataset = item.kwargs["dataset"]
dataset_id = ods_dataset["datasetid"]
ods_metadata = ods_dataset["metas"]
ods_interopmetas = ods_dataset.get('interop_metas', {})

if not ods_dataset.get('has_records'):
msg = 'Dataset {datasetid} has no record'.format(**ods_dataset)
raise HarvestSkipException(msg)

# TODO: This behavior should be enabled with an option
if 'inspire' in ods_interopmetas:
msg = 'Dataset {datasetid} has INSPIRE metadata'.format(**ods_dataset)
raise HarvestSkipException(msg)

dataset = self.get_dataset(item.remote_id)

dataset.title = ods_metadata['title']
Expand Down
38 changes: 38 additions & 0 deletions udata/harvest/tests/search-ods.json
Expand Up @@ -48,6 +48,44 @@
"features": [],
"attachments": [],
"fields": []
}, {
"datasetid": "test-d",
"metas": {
"domain": "etalab-sandbox",
"language": "en",
"title": "test-d",
"records_count": 1,
"modified": "2015-04-10T09:39:13+00:00",
"visibility": "domain"
},
"interop_metas": {
"inspire": {
"hierarchy_level": "dataset",
"extend_bounding_box_westbound_longitude": "0.1568",
"contact_individual_name": "ACME",
"extend_bounding_box_southbound_latitude": "46.4944",
"contact_email": "tnt@acme.com",
"extend_bounding_box_northbound_latitude": "46.6848",
"extend_bounding_box_eastbound_longitude": "0.5084",
"file_identifier": "{73E--sfA386709}"
}
},
"has_records": true,
"features": ["geo"],
"attachments": [],
"fields": [{
"label": "test",
"type": "text",
"name": "test"
}, {
"label": "geo",
"type": "geo_point_2d",
"name": "geo"
}, {
"label": "value",
"type": "text",
"name": "value"
}]
}, {
"datasetid": "test-a",
"metas": {
Expand Down
7 changes: 5 additions & 2 deletions udata/harvest/tests/test_ods_harvester.py
Expand Up @@ -47,13 +47,13 @@ def test_simple(self):

self.assertEqual(
httpretty.last_request().querystring,
{'start': ['0'], 'rows': ['50']}
{'start': ['0'], 'rows': ['50'], 'interopmetas': ['true']}
)

source.reload()

job = source.get_last_job()
self.assertEqual(len(job.items), 3)
self.assertEqual(len(job.items), 4)
self.assertEqual(job.status, 'done')

datasets = {d.extras["harvest:remote_id"]: d for d in Dataset.objects}
Expand Down Expand Up @@ -136,3 +136,6 @@ def test_simple(self):

# test-c has no data
self.assertNotIn('test-c', datasets)

# test-d is INSPIRE
self.assertNotIn('test-d', datasets)

0 comments on commit 04b4a9b

Please sign in to comment.