Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/mstamy2/PyPDF2
Browse files Browse the repository at this point in the history
  • Loading branch information
Hatell committed Jul 31, 2015
2 parents c74ff24 + 7456f0a commit fd2fe4d
Show file tree
Hide file tree
Showing 6 changed files with 224 additions and 67 deletions.
79 changes: 78 additions & 1 deletion CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,4 +1,81 @@
Version 1.24, 2014-12-31
Patch 1.25.1, 2015-07-20

- Fix bug when parsing inline images. Occurred when merging
certain pages with inline images

- Fixed type error when creating outlines by utilizing the
isString() test

Version 1.25, 2015-07-07
------------------------

BUGFIXES:

- Added Python 3 algorithm for ASCII85Decode. Fixes issue when
reading reportlab-generated files with Py 3 (jerickbixly)

- Recognize more escape sequence which would otherwise throw an
exception (manuelzs, robertsoakes)

- Fixed overflow error in generic.py. Occurred
when reading a too-large int in Python 2 (by Raja Jamwal)

- Allow access to files which were encrypted with an empty
password. Previously threw a "File has not been decrypted"
exception (Elena Williams)

- Do not attempt to decode an empty data stream. Previously
would cause an error in decode algorithms (vladir)

- Fixed some type issues specific to Py 2 or Py 3

- Fix issue when stream data begins with whitespace (soloma83)

- Recognize abbreviated filter names (AlmightyOatmeal and
Matthew Weiss)

- Copy decryption key from PdfFileReader to PdfFileMerger.
Allows usage of PdfFileMerger with encrypted files (twolfson)

- Fixed bug which occurred when a NameObject is present at end
of a file stream. Threw a "Stream has ended unexpectedly"
exception (speedplane)

FEATURES:

- Initial work on a test suite; to be expanded in future.
Tests and Resources directory added, README updated (robertsoakes)

- Added document cloning methods to PdfFileWriter:
appendPagesFromReader, cloneReaderDocumentRoot, and
cloneDocumentFromReader. See official documentation (robertsoakes)

- Added method for writing to form fields: updatePageFormFieldValues.
This will be enhanced in the future. See official documentation
(robertsoakes)

- New addAttachment method. See documentation. Support for adding
and extracting embedded files to be enhanced in the future
(moshekaplan)

- Added methods to get page number of given PageObject or
Destination: getPageNumber and getDestinationPageNumber.
See documentation (mozbugbox)

OTHER ENHANCEMENTS:

- Enhanced type handling (Brent Amrhein)

- Enhanced exception handling in NameObject (sbywater)

- Enhanced extractText method output (peircej)

- Better exception handling

- Enhanced regex usage in NameObject class (speedplane)


Version 1.24, 2014-12-31
------------------------

- Bugfixes for reading files in Python 3 (by Anthony Tuininga and
Expand Down
2 changes: 1 addition & 1 deletion PyPDF2/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '1.24'
__version__ = '1.25.1'
126 changes: 75 additions & 51 deletions PyPDF2/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
from cStringIO import StringIO
else:
from io import StringIO
import struct

try:
import zlib
Expand Down Expand Up @@ -256,55 +257,78 @@ def decode(data,decodeParams=None):

class ASCII85Decode(object):
def decode(data, decodeParms=None):
retval = ""
group = []
x = 0
hitEod = False
# remove all whitespace from data
data = [y for y in data if not (y in ' \n\r\t')]
while not hitEod:
c = data[x]
if len(retval) == 0 and c == "<" and data[x+1] == "~":
x += 2
continue
#elif c.isspace():
# x += 1
# continue
elif c == 'z':
assert len(group) == 0
retval += '\x00\x00\x00\x00'
x += 1
continue
elif c == "~" and data[x+1] == ">":
if len(group) != 0:
# cannot have a final group of just 1 char
assert len(group) > 1
cnt = len(group) - 1
group += [ 85, 85, 85 ]
hitEod = cnt
if version_info < ( 3, 0 ):
retval = ""
group = []
x = 0
hitEod = False
# remove all whitespace from data
data = [y for y in data if not (y in ' \n\r\t')]
while not hitEod:
c = data[x]
if len(retval) == 0 and c == "<" and data[x+1] == "~":
x += 2
continue
#elif c.isspace():
# x += 1
# continue
elif c == 'z':
assert len(group) == 0
retval += '\x00\x00\x00\x00'
x += 1
continue
elif c == "~" and data[x+1] == ">":
if len(group) != 0:
# cannot have a final group of just 1 char
assert len(group) > 1
cnt = len(group) - 1
group += [ 85, 85, 85 ]
hitEod = cnt
else:
break
else:
c = ord(c) - 33
assert c >= 0 and c < 85
group += [ c ]
if len(group) >= 5:
b = group[0] * (85**4) + \
group[1] * (85**3) + \
group[2] * (85**2) + \
group[3] * 85 + \
group[4]
assert b < (2**32 - 1)
c4 = chr((b >> 0) % 256)
c3 = chr((b >> 8) % 256)
c2 = chr((b >> 16) % 256)
c1 = chr(b >> 24)
retval += (c1 + c2 + c3 + c4)
if hitEod:
retval = retval[:-4+hitEod]
group = []
x += 1
return retval
else:
if isinstance(data, str):
data = data.encode('ascii')
n = b = 0
out = bytearray()
for c in data:
if ord('!') <= c and c <= ord('u'):
n += 1
b = b*85+(c-33)
if n == 5:
out += struct.pack(b'>L',b)
n = b = 0
elif c == ord('z'):
assert n == 0
out += b'\0\0\0\0'
elif c == ord('~'):
if n:
for _ in range(5-n):
b = b*85+84
out += struct.pack(b'>L',b)[:n-1]
break
else:
c = ord(c) - 33
assert c >= 0 and c < 85
group += [ c ]
if len(group) >= 5:
b = group[0] * (85**4) + \
group[1] * (85**3) + \
group[2] * (85**2) + \
group[3] * 85 + \
group[4]
assert b < (2**32 - 1)
c4 = chr((b >> 0) % 256)
c3 = chr((b >> 8) % 256)
c2 = chr((b >> 16) % 256)
c1 = chr(b >> 24)
retval += (c1 + c2 + c3 + c4)
if hitEod:
retval = retval[:-4+hitEod]
group = []
x += 1
return retval
return bytes(out)
decode = staticmethod(decode)


Expand All @@ -318,13 +342,13 @@ def decodeStreamData(stream):
# If there is not data to decode we should not try to decode the data.
if data:
for filterType in filters:
if filterType == "/FlateDecode":
if filterType == "/FlateDecode" or filterType == "/Fl":
data = FlateDecode.decode(data, stream.get("/DecodeParms"))
elif filterType == "/ASCIIHexDecode":
elif filterType == "/ASCIIHexDecode" or filterType == "/AHx":
data = ASCIIHexDecode.decode(data)
elif filterType == "/LZWDecode":
elif filterType == "/LZWDecode" or filterType == "/LZW":
data = LZWDecode.decode(data, stream.get("/DecodeParms"))
elif filterType == "/ASCII85Decode":
elif filterType == "/ASCII85Decode" or filterType == "/A85":
data = ASCII85Decode.decode(data)
elif filterType == "/Crypt":
decodeParams = stream.get("/DecodeParams", {})
Expand Down
5 changes: 3 additions & 2 deletions PyPDF2/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,7 @@ def writeToStream(self, stream, encryption_key):


class NameObject(str, PdfObject):
delimiterPattern = re.compile(b_("\s+|[()<>[\]{}/%]"))
delimiterPattern = re.compile(b_(r"\s+|[\(\)<>\[\]{}/%]"))
surfix = b_("/")

def hashValue(self):
Expand All @@ -504,7 +504,8 @@ def readFromStream(stream, pdf):
name = stream.read(1)
if name != NameObject.surfix:
raise utils.PdfReadError("name read error")
name += utils.readUntilRegex(stream, NameObject.delimiterPattern)
name += utils.readUntilRegex(stream, NameObject.delimiterPattern,
ignore_eof=True)
if debug: print(name)
try:
return NameObject(name.decode('utf-8'))
Expand Down
5 changes: 5 additions & 0 deletions PyPDF2/merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ def merge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=T
# it is a PdfFileReader, copy that reader's stream into a
# BytesIO (or StreamIO) stream.
# If fileobj is none of the above types, it is not modified
decryption_key = None
if isString(fileobj):
fileobj = file(fileobj, 'rb')
my_file = True
Expand All @@ -123,11 +124,15 @@ def merge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=T
filecontent = StreamIO(fileobj.stream.read())
fileobj.stream.seek(orig_tell) # reset the stream to its original location
fileobj = filecontent
if hasattr(fileobj, '_decryption_key'):
decryption_key = fileobj._decryption_key
my_file = True

# Create a new PdfFileReader instance using the stream
# (either file or BytesIO or StringIO) created above
pdfr = PdfFileReader(fileobj, strict=self.strict)
if decryption_key is not None:
pdfr._decryption_key = decryption_key

# Find the range of pages to merge.
if pages == None:
Expand Down

0 comments on commit fd2fe4d

Please sign in to comment.