Skip to content

Commit

Permalink
py2 compat, better sanitize_identifier for offtryck
Browse files Browse the repository at this point in the history
  • Loading branch information
staffanm committed Jun 13, 2017
1 parent 75430f5 commit 99d0e61
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 2 deletions.
8 changes: 7 additions & 1 deletion ferenda/pdfreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -677,7 +677,10 @@ def _parse_xml_make_textbox(self, element, nextelement, after_footnote, lastbox,
del attribs['font']
return self._textdecoder(Textbox(textelements, **attribs), self.fontspec)

ws_trans = str.maketrans("\n\t\xa0", " ")
import string
ws_trans = {ord("\n"): " ",
ord("\t"): " ",
ord("\xa0"): " "}

def _parse_xml_make_textelement(self, element, **origkwargs):
# the complication is that a hierarchical sequence of tags
Expand Down Expand Up @@ -716,6 +719,9 @@ def cleantag(kwargs):

def normspace(txt):
# like util.normalize_space, but preserves a single leading/trailing space
if not isinstance(txt, str): # under py2, element.text can
# sometimes be a bytestring?
txt = txt.decode()
txt = txt.translate(self.ws_trans)
startspace = " " if txt.startswith(" ") else ""
endspace = " " if txt.endswith(" ") and len(txt) > 1 else ""
Expand Down
2 changes: 1 addition & 1 deletion ferenda/sources/legal/se/direktiv.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ def metadata_from_basefile(self, basefile):
return a

def infer_identifier(self, basefile):
return "Dir %s" % basefile
return "Dir. %s" % basefile

def postprocess_doc(self, doc):
next_is_title = False
Expand Down
10 changes: 10 additions & 0 deletions ferenda/sources/legal/se/offtryck.py
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,16 @@ def sanitize_identifier(self, identifier):
self.SO: "%s %s:%s"}
try:
parts = re.split("[\.:/ ]+", identifier.strip())
id_template = pattern[self.document_type]
# do we have enough parts for our template?
if len(parts) == id_template.count("%s") - 1:
# we're probably missing the first part (eg "Prop",
# "Ds") and so what we have is a basefile-like
# thing. Reconstruct the first part.
parts.insert(0, re.split("[\.:/ ]+", self.infer_identifier(identifier))[0])
# make sure the initial char is capitalized (this is
# preferred to .capitalize() for strings that should be
# all-caps, eg "SOU"
parts[0] = parts[0][0].upper() + parts[0][1:]
return pattern[self.document_type] % tuple(parts)
except:
Expand Down

0 comments on commit 99d0e61

Please sign in to comment.