diff --git a/ckanext/pdeu/plugin.py b/ckanext/pdeu/plugin.py index 68fa997..fe4bdd7 100644 --- a/ckanext/pdeu/plugin.py +++ b/ckanext/pdeu/plugin.py @@ -6,6 +6,7 @@ import countries +class UnexpectedDateFormat(Exception) : pass class PDEUCustomizations(plugins.SingletonPlugin): plugins.implements(plugins.IRoutes) @@ -17,14 +18,31 @@ def before_index(self, dataset_dict): # Change the Data Publica harvester's '2010-07-19T13:36:00'-formatted # date strings into SOLR-compatible '1995-12-31T23:59:59Z' ones. - regex = ('^(?P\d\d\d\d)-(?P\d\d)-(?P\d\d)' - 'T(?P\d\d):(?P\d\d):(?P\d\d)$') + publica_format = ('^(?P\d\d\d\d)-(?P\d\d)-(?P\d\d)' + 'T(?P\d\d):(?P\d\d):(?P\d\d)$') + + solr_format = ('^(?P\d\d\d\d)-(?P\d\d)-(?P\d\d)' + 'T(?P\d\d):(?P\d\d):(?P\d\d)Z$') + new_format = '{year}-{month}-{day}T{hours}:{minutes}:{seconds}Z' + for date_key in ('deposit_date', 'update_date'): - old_date_str = dataset_dict.get(date_key) - if old_date_str: - match = re.match(regex, old_date_str) - dataset_dict[date_key] = new_format.format(**match.groupdict()) + if date_key in dataset_dict.keys(): + match = re.match(publica_format, dataset_dict[date_key]) + import ipdb; ipdb.set_trace() + if match: + solrfied_date = new_format.format(**match.groupdict()) + dataset_dict[date_key] = solrfied_date + elif re.match(solr_format, dataset_dict[date_key]): + # TODO: dates already appeared solrfied, if this is not + # needed in ckan2.0 we might be able to remove this + # code entirely + continue + else: + raise UnexpectedDateFormat("{0} is not in Data Publica or" + " Solr Format for package {1}".format(date_key, + dataset_dict['id'])) + return dataset_dict def update_config(self, config):