Merge branch 'master' of https://github.com/mstamy2/PyPDF2

py-pdf · Jul 31, 2015 · fd2fe4d · fd2fe4d
2 parents c74ff24 + 7456f0a
commit fd2fe4d
Show file tree

Hide file tree

Showing 6 changed files with 224 additions and 67 deletions.
diff --git a/CHANGELOG b/CHANGELOG
@@ -1,4 +1,81 @@
-Version 1.24, 2014-12-31
+Patch 1.25.1, 2015-07-20
+
+ - Fix bug when parsing inline images. Occurred when merging
+   certain pages with inline images
+
+ - Fixed type error when creating outlines by utilizing the
+   isString() test
+
+Version 1.25, 2015-07-07
+------------------------
+
+BUGFIXES:
+
+ - Added Python 3 algorithm for ASCII85Decode. Fixes issue when
+   reading reportlab-generated files with Py 3 (jerickbixly)
+
+ - Recognize more escape sequence which would otherwise throw an
+   exception (manuelzs, robertsoakes)
+
+ - Fixed overflow error in generic.py. Occurred
+   when reading a too-large int in Python 2 (by Raja Jamwal)
+
+ - Allow access to files which were encrypted with an empty
+   password. Previously threw a "File has not been decrypted"
+   exception (Elena Williams)
+
+ - Do not attempt to decode an empty data stream. Previously
+   would cause an error in decode algorithms (vladir)
+
+ - Fixed some type issues specific to Py 2 or Py 3
+
+ - Fix issue when stream data begins with whitespace (soloma83)
+
+ - Recognize abbreviated filter names (AlmightyOatmeal and
+   Matthew Weiss)
+
+ - Copy decryption key from PdfFileReader to PdfFileMerger.
+   Allows usage of PdfFileMerger with encrypted files (twolfson)
+
+ - Fixed bug which occurred when a NameObject is present at end
+   of a file stream. Threw a "Stream has ended unexpectedly"
+   exception (speedplane)
+
+FEATURES:
+
+ - Initial work on a test suite; to be expanded in future.
+   Tests and Resources directory added, README updated (robertsoakes)
+
+ - Added document cloning methods to PdfFileWriter:
+   appendPagesFromReader, cloneReaderDocumentRoot, and
+   cloneDocumentFromReader. See official documentation (robertsoakes)
+
+ - Added method for writing to form fields: updatePageFormFieldValues.
+   This will be enhanced in the future. See official documentation
+   (robertsoakes)
+
+ - New addAttachment method. See documentation. Support for adding
+   and extracting embedded files to be enhanced in the future
+   (moshekaplan)
+
+ - Added methods to get page number of given PageObject or
+   Destination: getPageNumber and getDestinationPageNumber.
+   See documentation (mozbugbox)
+
+OTHER ENHANCEMENTS:
+
+ - Enhanced type handling (Brent Amrhein)
+
+ - Enhanced exception handling in NameObject (sbywater)
+
+ - Enhanced extractText method output (peircej)
+
+ - Better exception handling
+
+ - Enhanced regex usage in NameObject class (speedplane)
+
+
+Version 1.24, 2014-12-31
 ------------------------
 
  - Bugfixes for reading files in Python 3 (by Anthony Tuininga and

diff --git a/PyPDF2/_version.py b/PyPDF2/_version.py
@@ -1 +1 @@
-__version__ = '1.24'
+__version__ = '1.25.1'
diff --git a/PyPDF2/filters.py b/PyPDF2/filters.py
@@ -40,6 +40,7 @@
     from cStringIO import StringIO
 else:
     from io import StringIO
+    import struct
 
 try:
     import zlib
@@ -256,55 +257,78 @@ def decode(data,decodeParams=None):
 
 class ASCII85Decode(object):
     def decode(data, decodeParms=None):
-        retval = ""
-        group = []
-        x = 0
-        hitEod = False
-        # remove all whitespace from data
-        data = [y for y in data if not (y in ' \n\r\t')]
-        while not hitEod:
-            c = data[x]
-            if len(retval) == 0 and c == "<" and data[x+1] == "~":
-                x += 2
-                continue
-            #elif c.isspace():
-            #    x += 1
-            #    continue
-            elif c == 'z':
-                assert len(group) == 0
-                retval += '\x00\x00\x00\x00'
-                x += 1
-                continue
-            elif c == "~" and data[x+1] == ">":
-                if len(group) != 0:
-                    # cannot have a final group of just 1 char
-                    assert len(group) > 1
-                    cnt = len(group) - 1
-                    group += [ 85, 85, 85 ]
-                    hitEod = cnt
+        if version_info < ( 3, 0 ):
+            retval = ""
+            group = []
+            x = 0
+            hitEod = False
+            # remove all whitespace from data
+            data = [y for y in data if not (y in ' \n\r\t')]
+            while not hitEod:
+                c = data[x]
+                if len(retval) == 0 and c == "<" and data[x+1] == "~":
+                    x += 2
+                    continue
+                #elif c.isspace():
+                #    x += 1
+                #    continue
+                elif c == 'z':
+                    assert len(group) == 0
+                    retval += '\x00\x00\x00\x00'
+                    x += 1
+                    continue
+                elif c == "~" and data[x+1] == ">":
+                    if len(group) != 0:
+                        # cannot have a final group of just 1 char
+                        assert len(group) > 1
+                        cnt = len(group) - 1
+                        group += [ 85, 85, 85 ]
+                        hitEod = cnt
+                    else:
+                        break
                 else:
+                    c = ord(c) - 33
+                    assert c >= 0 and c < 85
+                    group += [ c ]
+                if len(group) >= 5:
+                    b = group[0] * (85**4) + \
+                        group[1] * (85**3) + \
+                        group[2] * (85**2) + \
+                        group[3] * 85 + \
+                        group[4]
+                    assert b < (2**32 - 1)
+                    c4 = chr((b >> 0) % 256)
+                    c3 = chr((b >> 8) % 256)
+                    c2 = chr((b >> 16) % 256)
+                    c1 = chr(b >> 24)
+                    retval += (c1 + c2 + c3 + c4)
+                    if hitEod:
+                        retval = retval[:-4+hitEod]
+                    group = []
+                x += 1
+            return retval
+        else:
+            if isinstance(data, str):
+                data = data.encode('ascii')
+            n = b = 0
+            out = bytearray()
+            for c in data:
+                if ord('!') <= c and c <= ord('u'):
+                    n += 1
+                    b = b*85+(c-33)
+                    if n == 5:
+                        out += struct.pack(b'>L',b)
+                        n = b = 0
+                elif c == ord('z'):
+                    assert n == 0
+                    out += b'\0\0\0\0'
+                elif c == ord('~'):
+                    if n:
+                        for _ in range(5-n):
+                            b = b*85+84
+                        out += struct.pack(b'>L',b)[:n-1]
                     break
-            else:
-                c = ord(c) - 33
-                assert c >= 0 and c < 85
-                group += [ c ]
-            if len(group) >= 5:
-                b = group[0] * (85**4) + \
-                    group[1] * (85**3) + \
-                    group[2] * (85**2) + \
-                    group[3] * 85 + \
-                    group[4]
-                assert b < (2**32 - 1)
-                c4 = chr((b >> 0) % 256)
-                c3 = chr((b >> 8) % 256)
-                c2 = chr((b >> 16) % 256)
-                c1 = chr(b >> 24)
-                retval += (c1 + c2 + c3 + c4)
-                if hitEod:
-                    retval = retval[:-4+hitEod]
-                group = []
-            x += 1
-        return retval
+            return bytes(out)
     decode = staticmethod(decode)
 
 
@@ -318,13 +342,13 @@ def decodeStreamData(stream):
     # If there is not data to decode we should not try to decode the data.
     if data:
         for filterType in filters:
-            if filterType == "/FlateDecode":
+            if filterType == "/FlateDecode" or filterType == "/Fl":
                 data = FlateDecode.decode(data, stream.get("/DecodeParms"))
-            elif filterType == "/ASCIIHexDecode":
+            elif filterType == "/ASCIIHexDecode" or filterType == "/AHx":
                 data = ASCIIHexDecode.decode(data)
-            elif filterType == "/LZWDecode":
+            elif filterType == "/LZWDecode" or filterType == "/LZW":
                 data = LZWDecode.decode(data, stream.get("/DecodeParms"))
-            elif filterType == "/ASCII85Decode":
+            elif filterType == "/ASCII85Decode" or filterType == "/A85":
                 data = ASCII85Decode.decode(data)
             elif filterType == "/Crypt":
                 decodeParams = stream.get("/DecodeParams", {})

diff --git a/PyPDF2/generic.py b/PyPDF2/generic.py
@@ -487,7 +487,7 @@ def writeToStream(self, stream, encryption_key):
 
 
 class NameObject(str, PdfObject):
-    delimiterPattern = re.compile(b_("\s+|[()<>[\]{}/%]"))
+    delimiterPattern = re.compile(b_(r"\s+|[\(\)<>\[\]{}/%]"))
     surfix = b_("/")
 
     def hashValue(self):
@@ -504,7 +504,8 @@ def readFromStream(stream, pdf):
         name = stream.read(1)
         if name != NameObject.surfix:
             raise utils.PdfReadError("name read error")
-        name += utils.readUntilRegex(stream, NameObject.delimiterPattern)
+        name += utils.readUntilRegex(stream, NameObject.delimiterPattern, 
+            ignore_eof=True)
         if debug: print(name)
         try:
             return NameObject(name.decode('utf-8'))

diff --git a/PyPDF2/merger.py b/PyPDF2/merger.py
@@ -109,6 +109,7 @@ def merge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=T
         # it is a PdfFileReader, copy that reader's stream into a
         # BytesIO (or StreamIO) stream.
         # If fileobj is none of the above types, it is not modified
+        decryption_key = None
         if isString(fileobj):
             fileobj = file(fileobj, 'rb')
             my_file = True
@@ -123,11 +124,15 @@ def merge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=T
             filecontent = StreamIO(fileobj.stream.read())
             fileobj.stream.seek(orig_tell) # reset the stream to its original location
             fileobj = filecontent
+            if hasattr(fileobj, '_decryption_key'):
+                decryption_key = fileobj._decryption_key
             my_file = True
 
         # Create a new PdfFileReader instance using the stream
         # (either file or BytesIO or StringIO) created above
         pdfr = PdfFileReader(fileobj, strict=self.strict)
+        if decryption_key is not None:
+            pdfr._decryption_key = decryption_key
 
         # Find the range of pages to merge.
         if pages == None: