Skip to content

Commit

Permalink
fix plugin/before_index if date is already in solr format
Browse files Browse the repository at this point in the history
if the date was not in the data publica format, the search
index rebuild fails.
  • Loading branch information
joetsoi committed Mar 29, 2013
1 parent 8d51140 commit c1d7456
Showing 1 changed file with 24 additions and 6 deletions.
30 changes: 24 additions & 6 deletions ckanext/pdeu/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import countries

class UnexpectedDateFormat(Exception) : pass

class PDEUCustomizations(plugins.SingletonPlugin):
plugins.implements(plugins.IRoutes)
Expand All @@ -17,14 +18,31 @@ def before_index(self, dataset_dict):

# Change the Data Publica harvester's '2010-07-19T13:36:00'-formatted
# date strings into SOLR-compatible '1995-12-31T23:59:59Z' ones.
regex = ('^(?P<year>\d\d\d\d)-(?P<month>\d\d)-(?P<day>\d\d)'
'T(?P<hours>\d\d):(?P<minutes>\d\d):(?P<seconds>\d\d)$')
publica_format = ('^(?P<year>\d\d\d\d)-(?P<month>\d\d)-(?P<day>\d\d)'
'T(?P<hours>\d\d):(?P<minutes>\d\d):(?P<seconds>\d\d)$')

solr_format = ('^(?P<year>\d\d\d\d)-(?P<month>\d\d)-(?P<day>\d\d)'
'T(?P<hours>\d\d):(?P<minutes>\d\d):(?P<seconds>\d\d)Z$')

new_format = '{year}-{month}-{day}T{hours}:{minutes}:{seconds}Z'

for date_key in ('deposit_date', 'update_date'):
old_date_str = dataset_dict.get(date_key)
if old_date_str:
match = re.match(regex, old_date_str)
dataset_dict[date_key] = new_format.format(**match.groupdict())
if date_key in dataset_dict.keys():
match = re.match(publica_format, dataset_dict[date_key])
import ipdb; ipdb.set_trace()
if match:
solrfied_date = new_format.format(**match.groupdict())
dataset_dict[date_key] = solrfied_date
elif re.match(solr_format, dataset_dict[date_key]):
# TODO: dates already appeared solrfied, if this is not
# needed in ckan2.0 we might be able to remove this
# code entirely
continue
else:
raise UnexpectedDateFormat("{0} is not in Data Publica or"
" Solr Format for package {1}".format(date_key,
dataset_dict['id']))

return dataset_dict

def update_config(self, config):
Expand Down

0 comments on commit c1d7456

Please sign in to comment.