Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/mstamy2/PyPDF2
Browse files Browse the repository at this point in the history
  • Loading branch information
switham committed Feb 25, 2014
2 parents 1252414 + f3c9dc5 commit 84a8669
Show file tree
Hide file tree
Showing 10 changed files with 217 additions and 78 deletions.
22 changes: 16 additions & 6 deletions CHANGELOG
@@ -1,11 +1,14 @@
Version 1.20, 2014-01-??
Version 1.20, 2014-01-27
------------------------

- Many Python 3 support changes (with contributions from TWAC and cgammans)
- Official Python 3+ support (with contributions from TWAC and cgammans)
Support for Python versions 2.6 and 2.7 will be maintained

- Updated FAQ; link included in README
- Command line concatenation (see pdfcat in sample code) (by Steve Witham)

- Allow more (unnecessary) escape sequences
- New FAQ; link included in README

- Allow more (although unnecessary) escape sequences

- Prevent exception when reading a null object in decoding parameters

Expand All @@ -19,11 +22,18 @@ Version 1.20, 2014-01-??

- Additions to Sample Code and Sample PDFs

- changes to allow 2up script to work (by Dylan McNamee)
- changes to allow 2up script to work (see sample code) (by Dylan McNamee)

- changes to metadata encoding (by Chris Hiestand)

- New methods for links: addLink() (by Enrico Lambertini) and ignoreLinks()
- New methods for links: addLink() (by Enrico Lambertini) and removeLinks()

- Bugfix to handle nested bookmarks correctly (by Jamie Lentin)

- New methods removeImages() and removeText() available for PdfFileWriter
(by Tien Ha�)

- Exception handling for illegal characters in Name Objects


Version 1.19, 2013-10-08
Expand Down
2 changes: 1 addition & 1 deletion PyPDF2/_version.py
@@ -1,2 +1,2 @@
__version__ = '1.20b'
__version__ = '1.20'

4 changes: 2 additions & 2 deletions PyPDF2/filters.py
Expand Up @@ -34,7 +34,7 @@
__author__ = "Mathieu Fenniak"
__author_email__ = "biziqe@mathieu.fenniak.net"

from .utils import PdfReadError
from .utils import PdfReadError, ord_, chr_
from sys import version_info
if version_info < ( 3, 0 ):
from cStringIO import StringIO
Expand Down Expand Up @@ -118,7 +118,7 @@ def decode(data, decodeParms):
assert len(data) % rowlength == 0
prev_rowdata = (0,) * rowlength
for row in range(len(data) // rowlength):
rowdata = [ord(x) for x in data[(row*rowlength):((row+1)*rowlength)]]
rowdata = [ord_(x) for x in data[(row*rowlength):((row+1)*rowlength)]]
filterByte = rowdata[0]
if filterByte == 0:
pass
Expand Down
26 changes: 19 additions & 7 deletions PyPDF2/generic.py
Expand Up @@ -56,7 +56,7 @@ def readObject(stream, pdf):
return readStringFromStream(stream)
elif tok == b_('/'):
# name object
return NameObject.readFromStream(stream)
return NameObject.readFromStream(stream, pdf)
elif tok == b_('['):
# array object
return ArrayObject.readFromStream(stream, pdf)
Expand Down Expand Up @@ -85,7 +85,7 @@ def readObject(stream, pdf):
return NumberObject.readFromStream(stream)
peek = stream.read(20)
stream.seek(-len(peek), 1) # reset to start
if re.match(b_(r"(\d+)\s(\d+)\sR[^a-zA-Z]"), peek) != None:
if re.match(b_(r"(\d+)\s+(\d+)\s+R[^a-zA-Z]"), peek) != None:
return IndirectObject.readFromStream(stream, pdf)
else:
return NumberObject.readFromStream(stream)
Expand Down Expand Up @@ -204,9 +204,11 @@ def readFromStream(stream, pdf):
# stream has truncated prematurely
raise PdfStreamError("Stream has ended unexpectedly")
if tok.isspace():
if not generation:
continue
break
generation += tok
r = stream.read(1)
r = readNonWhitespace(stream)
if r != b_("R"):
raise utils.PdfReadError("Error reading indirect object reference at byte %s" % utils.hexStr(stream.tell()))
return IndirectObject(int(idnum), int(generation), pdf)
Expand All @@ -218,7 +220,7 @@ def __new__(cls, value="0", context=None):
try:
return decimal.Decimal.__new__(cls, utils.str_(value), context)
except:
return decimal.Decimal.__new__(cls, utils.str_(value))
return decimal.Decimal.__new__(cls, str(value))
def __repr__(self):
if self == self.to_integral():
return str(self.quantize(decimal.Decimal(1)))
Expand Down Expand Up @@ -452,7 +454,7 @@ def __init__(self, data):
def writeToStream(self, stream, encryption_key):
stream.write(b_(self))

def readFromStream(stream):
def readFromStream(stream, pdf):
debug = False
if debug: print((stream.tell()))
name = stream.read(1)
Expand All @@ -468,7 +470,17 @@ def readFromStream(stream):
break
name += tok
if debug: print(name)
return NameObject(name.decode('utf-8'))
try:
return NameObject(name.decode('utf-8'))
except UnicodeDecodeError as e:
# Name objects should represent irregular characters
# with a '#' followed by the symbol's hex number
if not pdf.strict:
warnings.warn("Illegal character in Name Object", utils.PdfReadWarning)
return NameObject(name)
else:
raise utils.PdfReadError("Illegal character in Name Object")

readFromStream = staticmethod(readFromStream)


Expand Down Expand Up @@ -909,7 +921,7 @@ def getWidth(self):
return self.getUpperRight_x() - self.getLowerLeft_x()

def getHeight(self):
return self.getUpperRight_y() - self.getLowerLeft_x()
return self.getUpperRight_y() - self.getLowerLeft_y()

lowerLeft = property(getLowerLeft, setLowerLeft, None, None)
lowerRight = property(getLowerRight, setLowerRight, None, None)
Expand Down
5 changes: 3 additions & 2 deletions PyPDF2/merger.py
Expand Up @@ -28,6 +28,7 @@
# POSSIBILITY OF SUCH DAMAGE.

from .generic import *
from .utils import string_type
from .pdf import PdfFileReader, PdfFileWriter
from .pagerange import PageRange
from sys import version_info
Expand Down Expand Up @@ -98,7 +99,7 @@ def merge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=T
# it is a PdfFileReader, copy that reader's stream into a
# StringIO stream.
# If fileobj is none of the above types, it is not modified
if type(fileobj) in (str, str):
if type(fileobj) == string_type:
fileobj = file(fileobj, 'rb')
my_file = True
elif isinstance(fileobj, file):
Expand Down Expand Up @@ -417,7 +418,7 @@ def findBookmark(self, bookmark, root=None):
res = self.findBookmark(bookmark, b)
if res:
return [i] + res
if b == bookmark or b['/Title'] == bookmark:
elif b == bookmark or b['/Title'] == bookmark:
return [i]

return None
Expand Down
11 changes: 7 additions & 4 deletions PyPDF2/pagerange.py
Expand Up @@ -9,14 +9,17 @@

import re

# "Str" maintains compatibility with Python 2.x.
# The next line is obfuscated like this so 2to3 won't change it.
Str = getattr(__builtins__, "basestring", str)

_INT_RE = r"(0|-?[1-9]\d*)" # A decimal int, don't allow "-0".
PAGE_RANGE_RE = "^({int}|({int}?(:{int}?(:{int}?)?)))$".format(int=_INT_RE)
# groups: 12 34 5 6 7 8


class ParseError(Exception):
def __init__(self, message):
super(self, ParseError).__init__(repr(message))
pass


PAGE_RANGE_HELP = """Remember, page indices start with zero.
Expand Down Expand Up @@ -68,7 +71,7 @@ def __init__(self, arg):
self._slice = arg.to_slice()
return

m = re.match(PAGE_RANGE_RE, arg)
m = isinstance(arg, Str) and re.match(PAGE_RANGE_RE, arg)
if not m:
raise ParseError(arg)
elif m.group(2):
Expand All @@ -87,7 +90,7 @@ def valid(input):
""" True if input is a valid initializer for a PageRange. """
return isinstance(input, slice) or \
isinstance(input, PageRange) or \
(isinstance(input, basestring)
(isinstance(input, Str)
and bool(re.match(PAGE_RANGE_RE, input)))

def to_slice(self):
Expand Down

0 comments on commit 84a8669

Please sign in to comment.