Skip to content

Commit

Permalink
fall back on inferred identifier if we really can't interpret the ide…
Browse files Browse the repository at this point in the history
…ntifier on the HTML page
  • Loading branch information
staffanm committed Aug 7, 2018
1 parent d8aa8c9 commit 8e2c536
Showing 1 changed file with 11 additions and 2 deletions.
13 changes: 11 additions & 2 deletions ferenda/sources/legal/se/regeringen.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,8 +336,17 @@ def sanitize_metadata(self, a, basefile):
if k in a:
a[k] = util.normalize_space(a[k])
# trim identifier
a["dcterms:identifier"] = self.sanitize_identifier(
a["dcterms:identifier"].replace("ID-nummer: ", ""))
try:
# The identifier displayed on the HTML page is not always
# correct -- it might be missing digits (eg "SOU 207:111"
# instead of "SOU 2017:111"). Try to sanitize it, but if
# we fail, infer it from our basefile instead.
a["dcterms:identifier"] = self.sanitize_identifier(
a["dcterms:identifier"].replace("ID-nummer: ", ""))
except ValueError as e:
inferred_identifier = str(self.infer_identifier(basefile))
self.log.error("%s: Irregular identifier %s, using inferred identifier %s instead" % (basefile, a["dcterms:identifier"], inferred_identifier))
a["dcterms:identifier"] = inferred_identifier
# save for later
self._identifier = a["dcterms:identifier"]
# it's rare, but in some cases a document can be published by
Expand Down

0 comments on commit 8e2c536

Please sign in to comment.