/
test_roundtrip.py
executable file
·134 lines (111 loc) · 4.12 KB
/
test_roundtrip.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#! /usr/bin/env python
# A part of pdfrw (https://github.com/pmaupin/pdfrw)
# Copyright (C) 2015 Patrick Maupin, Austin, Texas
# MIT license -- See LICENSE.txt for details
'''
Run from the directory above like so:
python -m tests.test_roundtrip
A PDF that has been determined to be good or bad
should be added to expected.txt with either a good
checksum, or just the word "fail".
These tests are incomplete, but they allow us to try
out various PDFs. There is a collection of difficult
PDFs available on github.
In order to use them:
1) Insure that github.com/pmaupin/static_pdfs is on your path.
2) Use the imagemagick compare program to look at differences
between the static_pdfs/global directory and the tmp_results
directory after you run this.
'''
import os
import hashlib
import pdfrw
import static_pdfs
import expected
from pdfrw.py23_diffs import convert_store
try:
import unittest2 as unittest
except ImportError:
import unittest
class TestOnePdf(unittest.TestCase):
def roundtrip(self, testname, basename, srcf, decompress=False,
compress=False, repaginate=False):
dstd = os.path.join(expected.result_dir, testname)
if not os.path.exists(dstd):
os.makedirs(dstd)
dstf = os.path.join(dstd, basename)
hashfile = os.path.join(expected.result_dir, 'hashes.txt')
hashkey = '%s/%s' % (testname, basename)
hash = '------no-file-generated---------'
expects = expected.results[hashkey]
# If the test has been deliberately skipped,
# we are done. Otherwise, execute it even
# if we don't know about it yet, so we have
# results to compare.
result = 'fail'
size = 0
try:
if 'skip' in expects:
result = 'skip requested'
return self.skipTest(result)
elif 'xfail' in expects:
result = 'xfail requested'
return self.fail(result)
exists = os.path.exists(dstf)
if expects or not exists:
if exists:
os.remove(dstf)
trailer = pdfrw.PdfReader(srcf, decompress=decompress,
verbose=False)
writer = pdfrw.PdfWriter(dstf, compress=compress)
if repaginate:
writer.addpages(trailer.pages)
else:
writer.trailer = trailer
writer.write()
with open(dstf, 'rb') as f:
data = f.read()
size = len(data)
if data:
hash = hashlib.md5(data).hexdigest()
else:
os.remove(dstf)
if expects:
if len(expects) == 1:
expects, = expects
self.assertEqual(hash, expects)
else:
self.assertIn(hash, expects)
result = 'pass'
else:
result = 'skip'
self.skipTest('No hash available')
finally:
result = '%8d %-20s %s %s\n' % (size, result, hashkey, hash)
with open(hashfile, 'ab') as f:
f.write(convert_store(result))
def build_tests():
def test_closure(*args, **kw):
def test(self):
self.roundtrip(*args, **kw)
return test
for mytest, repaginate, decompress, compress in (
('simple', False, False, False),
('repaginate', True, False, False),
('decompress', False, True, False),
('compress', False, True, True),
):
for srcf in static_pdfs.pdffiles[0]:
basename = os.path.basename(srcf)
test_name = 'test_%s_%s' % (mytest, basename)
test = test_closure(mytest, basename, srcf,
repaginate=repaginate,
decompress=decompress,
compress=compress,
)
setattr(TestOnePdf, test_name, test)
build_tests()
def main():
unittest.main()
if __name__ == '__main__':
main()