Skip to content

Commit

Permalink
make sure the metadataonly parse option is respected for all offtryck…
Browse files Browse the repository at this point in the history
… derived sources
  • Loading branch information
staffanm committed Jun 18, 2017
1 parent c2c0218 commit a4695f4
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 3 deletions.
5 changes: 3 additions & 2 deletions ferenda/sources/legal/se/offtryck.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,8 +332,9 @@ def parse_body(self, fp, basefile):
if options == "skip":
raise DocumentSkippedError("%s: Skipped because of options.py" % basefile,
dummyfile=self.store.parsed_path(basefile))
# elif options == "metadataonly":
# do something smart
elif options == "metadataonly":
return Preformatted("Dokumentttext saknas (se originaldokument)")

# elif options == "simple":
# do something else smart

Expand Down
3 changes: 2 additions & 1 deletion ferenda/sources/legal/se/riksdagen.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,8 +323,9 @@ def extract_body(self, fp, basefile):
# fp can now be a pointer to a hocr file, a pdf2xml file,
# a html file or a StringIO object containing html taken
# from index.xml
options = self.get_parse_options(basefile)
if (os.path.exists(pdffile) and
self.get_parse_options(basefile) != "metadataonly"):
options != "metadataonly"):
fp = self.parse_open(basefile)
parser = "ocr" if ".hocr." in util.name_from_fp(fp) else "xml"
reader = StreamingPDFReader().read(fp, parser=parser)
Expand Down

0 comments on commit a4695f4

Please sign in to comment.