Skip to content

Commit

Permalink
feat: Map more fields from SDK json export
Browse files Browse the repository at this point in the history
  • Loading branch information
bellisk committed May 7, 2024
1 parent a862a76 commit c99a27e
Showing 1 changed file with 18 additions and 2 deletions.
20 changes: 18 additions & 2 deletions ckanext/stadtzhharvest/sdk_harvester.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import requests
from ckan.lib.munge import munge_tag, munge_title_to_name
from ckan.logic import ValidationError
from requests.exceptions import HTTPError, JSONDecodeError

from ckanext.harvest.harvesters import HarvesterBase
Expand Down Expand Up @@ -69,6 +70,7 @@ def gather_stage(self, harvest_job):
for dataset in datasets:
dataset_name = munge_title_to_name(dataset["title"]).strip("-")
log.debug(f"Gathering dataset {dataset_name}")
dataset["name"] = dataset_name
package_dict = self._map_metadata(dataset)

obj = HarvestObject(
Expand Down Expand Up @@ -99,9 +101,16 @@ def import_stage(self, harvest_object):

package_dict = json.loads(harvest_object.content)

# todo: map resources here, make sure they are in the right order on the dataset

try:
# todo: Return 'unchanged' if the package has not changed
return stadtzhharvest_create_package(package_dict, harvest_object)
except ValidationError as e:
self._save_object_error(
f"Validation error on creating package {harvest_object.guid}: {e}",
harvest_object,
)
except Exception as e:
log.exception(e)
self._save_object_error(
Expand Down Expand Up @@ -134,15 +143,16 @@ def _map_metadata(self, dataset):
"dateFirstPublished": dataset.get("dateFirstPublished", ""),
"dateLastUpdated": dataset.get("dateLastUpdated", ""),
"updateInterval": dataset.get("updateInterval", ""),
"legalInformation": dataset.get("legalInformation", []),
"legalInformation": self._get_legal_information(dataset),
"timeRange": dataset.get("timeRange", ""),
"sszBemerkungen": dataset.get("sszBemerkungen", ""),
"dataQuality": dataset.get("dataQuality", ""),
"sszFields": self._get_attributes(dataset),
}

# todo: not in the JSON export: license_id
# todo: for legalInformation we get a list, but this should be a string
# todo: for updateInterval we get a string like "008" - we need to know how to
# map this
# todo: we need a link to the location of the actual data

stadtzhharvest_find_or_create_organization(package_dict)
Expand Down Expand Up @@ -184,3 +194,9 @@ def _get_attributes(self, dataset):
return attributes

return attributes

def _get_legal_information(self, dataset):
# todo: for legalInformation we get a list - we need to know how to map this
legal_information = dataset.get("legalInformation", [])
if len(legal_information) > 0:
return legal_information[0].get("code", "")

0 comments on commit c99a27e

Please sign in to comment.