Skip to content

Commit

Permalink
workaround to enable testing of PDFReader code even though pdftohtml …
Browse files Browse the repository at this point in the history
…is not installed
  • Loading branch information
staffanm committed Nov 4, 2013
1 parent 62356a8 commit 213a809
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 6 deletions.
Binary file added test/files/pdfreader/intermediate/index.pdf
Binary file not shown.
14 changes: 14 additions & 0 deletions test/files/pdfreader/intermediate/index.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE pdf2xml SYSTEM "pdf2xml.dtd">

<pdf2xml producer="poppler" version="0.22.5">
<page number="1" position="absolute" top="0" left="0" height="1263" width="892">
<fontspec id="0" size="21" family="Times" color="#345a8a"/>
<fontspec id="1" size="16" family="Times" color="#000000"/>
<fontspec id="2" size="16" family="Times" color="#000000"/>
<fontspec id="3" size="16" family="Times" color="#000000"/>
<text top="146" left="135" width="155" height="29" font="0"><b>Document  title  </b></text>
<text top="178" left="135" width="4" height="21" font="1">  </text>
<text top="199" left="135" width="318" height="21" font="1">This  is  a  <i>simple  <b>document</b></i>  in  PDF  format.  </text>
</page>
</pdf2xml>
Expand Down
Binary file added test/files/pdfreader/intermediate/index001.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
19 changes: 15 additions & 4 deletions test/testPDFDocRepo.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@
import os
import shutil

from ferenda import util


from ferenda import util, errors

# SUT
from ferenda import PDFDocumentRepository
Expand All @@ -23,7 +21,20 @@ def test_parse(self):
util.ensure_dir(self.repo.store.downloaded_path("sample"))
shutil.copy2("test/files/pdfreader/sample.pdf",
self.repo.store.downloaded_path("sample"))
self.repo.parse("sample")
try:
self.repo.parse("sample")
except errors.ExternalCommandError:
# print("pdftohtml error: retrying")
# for systems that don't have pdftohtml, we copy the expected
# intermediate files, so that we can test the rest of the logic
targetdir = os.path.dirname(self.repo.store.intermediate_path("sample"))
# print("working around by copying to %s" % targetdir)
if os.path.exists(targetdir):
shutil.rmtree(targetdir)
shutil.copytree("test/files/pdfreader/intermediate",
targetdir)
self.repo.parse("sample")
# print("Workaround succeeded")
p = self.repo.store.datadir
self.assertTrue(os.path.exists(p+'/intermediate/sample/index001.png'))
self.assertTrue(os.path.exists(p+'/intermediate/sample/index.pdf'))
Expand Down
13 changes: 11 additions & 2 deletions test/testPDFReader.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from ferenda.compat import unittest
if os.getcwd() not in sys.path: sys.path.insert(0,os.getcwd())

from ferenda import errors
# SUT
from ferenda import PDFReader

Expand All @@ -22,8 +23,16 @@ def tearDown(self):
shutil.rmtree(self.datadir)

def test_basic(self):
self.reader.read("test/files/pdfreader/sample.pdf",
self.datadir)
try:
self.reader.read("test/files/pdfreader/sample.pdf",
self.datadir)
except errors.ExternalCommandError:
for fname in os.listdir("test/files/pdfreader/intermediate"):
to = fname.replace("index", "sample")
shutil.copy("test/files/pdfreader/intermediate/%s" % fname,
self.datadir + os.sep + to)
self.reader.read("test/files/pdfreader/sample.pdf",
self.datadir)
self.assertEqual(len(self.reader), 1)
# first page, first box
title = str(self.reader[0][0])
Expand Down

0 comments on commit 213a809

Please sign in to comment.