Skip to content

Commit

Permalink
Multiple Fix and enhancement (py-pdf#75)
Browse files Browse the repository at this point in the history
* Local imports

imports made local to have concurrent versions in parallel & upgrade version

* Fix Reading Files

fix regression since 1.27.0 in encoded stream loading and extraction of XRefStm and Prev in trailer

* Lazy file/stream selection

Provide capability to provide the filename/stream only when calling write

* Add Clone capability

Provide capability to copy pages and objects from a reader into a writer. Provide also capability to copy a full reader into a writer

* Add PageLabel access functions

implement PageLabels functions both in Reader (get) and Writer (get/add/remove)

* Object Standardization

Create _rootObject in Reader to match Writer and also introduce a getIndirectionFunction on both Reader and Writer

* Enhance NamedDestinations functions

Extend NamedDestinations to work with PDF 1.1 and 1.2+ with both get/insert/remove functions
Also complete/fix some issues with outlines

* enhance pages api for writer

Provide capability to work with non flatten pages organisation for bot insertion,get and remove page

* Add comment capability

implements functions to add comments to a Page

* Remove Links / Annots

improve removeLinks into removeAnnots to be able to select which type of annots to remove
removeLinks kept for compatility

* add demo programs on comments

Add 2 tests programs for test and demo about comments (merging comments and Listing them in an excel sheet)
  • Loading branch information
pubpub-zz committed Jun 21, 2020
1 parent a2561ce commit f72745e
Show file tree
Hide file tree
Showing 9 changed files with 1,239 additions and 113 deletions.
12 changes: 8 additions & 4 deletions pypdf/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
from pypdf.pdf import PdfFileReader, PdfFileWriter
from pypdf.merger import PdfFileMerger
from pypdf.pagerange import PageRange
from pypdf._version import __version__
from .pdf import PdfFileReader, PdfFileWriter
from .generic import *
from .merger import PdfFileMerger
from .pagerange import PageRange
from ._version import __version__


__all__ = [
# Basic PyPDF elements
"PdfFileReader", "PdfFileWriter", "PdfFileMerger", "PageRange",
# most used elements from generic
"BooleanObject","ArrayObject","IndirectObject","FloatObject","NumberObject","createStringObject",
"TextStringObject","NameObject","DictionaryObject","TreeObject","Destination","PageLabel","Bookmark",
# PyPDF modules
"pdf", "generic", "utils", "filters", "merger", "pagerange", "xmp"
]
2 changes: 1 addition & 1 deletion pypdf/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '1.27.0'
__version__ = '1.27.0PPzz'
10 changes: 5 additions & 5 deletions pypdf/filters.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# -*- coding: UTF-8 -*-
# vim: sw=4:expandtab:foldmethod=marker
#
# Copyright (c) 2006, Mathieu Fenniak
Expand Down Expand Up @@ -31,13 +30,14 @@
Implementation of stream filters for PDF.
"""

import math
import base64
import struct
from sys import version_info

from pypdf import generic
from pypdf.generic import *
from pypdf.utils import PdfReadError, pypdfOrd, paethPredictor, PdfStreamError
from . import generic
from .generic import *
from .utils import PdfReadError, pypdfOrd, paethPredictor,PdfStreamError

try:
import zlib
Expand Down Expand Up @@ -193,7 +193,7 @@ def decode(data, decodeParms=None):

prev_rowdata = rowdata

for d in rowdata:
for d in rowdata[1:]: ##ppZZ ???? err in latest version
if version_info < (3, 0):
output.write(chr(d))
else:
Expand Down
197 changes: 192 additions & 5 deletions pypdf/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,9 @@
import warnings
from io import BytesIO

from pypdf.utils import *
from pypdf.utils import pypdfBytes as b_, pypdfUnicode as u_
#from . import utils
from .utils import *
from .utils import pypdfUnicode as u_, pypdfBytes as b_

__author__ = "Mathieu Fenniak"
__author_email__ = "biziqe@mathieu.fenniak.net"
Expand Down Expand Up @@ -98,6 +99,10 @@ def getObject(self):
"""Resolves indirect references."""
return self

def clone(self,pdfD): #PPzz
""" clone object into pdfD """
raise Exception("clone PdfObject")
return self

# TO-DO Add __repr_() implementations to the *Object classes
class NullObject(PdfObject):
Expand All @@ -114,10 +119,19 @@ def readFromStream(stream):
return NullObject()


def clone(self,pdfD): #PPzz
""" clone object into pdfD """
return NullObject()


class BooleanObject(PdfObject):
def __init__(self, value):
self.value = value

def clone(self,pdfD): #PPzz
""" clone object into pdfD """
return BooleanObject(self.value)

def writeToStream(self, stream, encryption_key):
if self.value:
stream.write(b_("true"))
Expand All @@ -139,6 +153,17 @@ def readFromStream(stream):


class ArrayObject(list, PdfObject):

def clone(self,pdfD): #PPzz
""" clone object into pdfD """
arr = ArrayObject()
for data in self:
if 'clone' in dir(data):
arr.append(data.clone(pdfD))
else:
arr.append(data)
return arr

def writeToStream(self, stream, encryption_key):
stream.write(b_("["))

Expand Down Expand Up @@ -196,6 +221,22 @@ def __init__(self, idnum, generation, pdf):
self.generation = generation
self.pdf = pdf

def clone(self,pdfD): #PPzz
""" clone object into pdfD """
try: pdfD._IdTranslated
except:
pdfD._IdTranslated={}
try:
n=pdfD._IdTranslated[self.idnum]
except:
n=len(pdfD._objects)+1
pdfD._IdTranslated[self.idnum]=n
pdfD._objects.append("%d NotInit"%n)
o=self.getObject().clone(pdfD)
pdfD._objects[n-1]=o

return IndirectObject(n,0,pdfD)

def getObject(self):
return self.pdf.getObject(self).getObject()

Expand Down Expand Up @@ -265,6 +306,10 @@ def __new__(cls, value="0", context=None):
except:
return decimal.Decimal.__new__(cls, str(value))

def clone(self,pdfD): #PPzz
""" clone object into pdfD """
return FloatObject(self.asNumeric())

def __repr__(self):
if self == self.to_integral():
return str(self.quantize(decimal.Decimal(1)))
Expand Down Expand Up @@ -294,6 +339,10 @@ def __new__(cls, value):
except OverflowError:
return int.__new__(cls, 0)

def clone(self,pdfD): #PPzz
""" clone object into pdfD """
return NumberObject(self.asNumeric())

def asNumeric(self):
return int(b_(repr(self)))

Expand Down Expand Up @@ -444,6 +493,10 @@ class ByteStringObject(bytes_type, PdfObject):
# returns self.
original_bytes = property(lambda self: self)

def clone(self,pdfD): #PPzz
""" clone object into pdfD """
return ByteStringObject(self)

def writeToStream(self, stream, encryption_key):
bytearr = self

Expand Down Expand Up @@ -471,6 +524,10 @@ class TextStringObject(string_type, PdfObject):
# back-calculate what the original encoded bytes were.
original_bytes = property(lambda self: self.getOriginalBytes())

def clone(self,pdfD): #PPzz
""" clone object into pdfD """
return createStringObject(self)

def getOriginalBytes(self):
# We're a text string object, but the library is trying to get our raw
# bytes. This can happen if we auto-detected this string as text, but
Expand Down Expand Up @@ -513,6 +570,10 @@ class NameObject(str, PdfObject):
delimiterPattern = re.compile(b_(r"\s+|[\(\)<>\[\]{}/%]"))
surfix = b_("/")

def clone(self,pdfD): #PPzz
""" clone object into pdfD """
return NameObject(self)

def writeToStream(self, stream, encryption_key):
stream.write(b_(self))

Expand Down Expand Up @@ -549,6 +610,15 @@ def readFromStream(stream, pdf):


class DictionaryObject(dict, PdfObject):

def clone(self,pdfD): #PPzz
""" clone object into pdfD """
d=DictionaryObject()
for k,v in self.items():
d.update({(k.clone(k) if 'clone' in dir(k) else k):
(v.clone(pdfD) if 'clone' in dir(v) else v) })
return d

def rawGet(self, key):
return dict.__getitem__(self, key)

Expand Down Expand Up @@ -734,7 +804,15 @@ def readFromStream(stream, pdf):

class TreeObject(DictionaryObject):
def __init__(self):
DictionaryObject.__init__()
DictionaryObject.__init__(self)

def clone(self,pdfD): #PPzz
""" clone object into pdfD """
raise Exception("clone TreeObject",self)
obj=TreeObject()
for k,v in self.items():
obj.addChild(v.clone(pdfD),pdfD)
return obj

def hasChildren(self):
return '/First' in self
Expand Down Expand Up @@ -875,6 +953,14 @@ def __init__(self):
self._data = None
self.decodedSelf = None

def clone(self,pdfD): #PPzz
""" clone object into pdfD """
st=self.__class__()
st._data=self._data
st.decodedSelf=self.decodedSelf
st.update(self)
return self

def writeToStream(self, stream, encryption_key):
self[NameObject("/Length")] = NumberObject(len(self._data))
DictionaryObject.writeToStream(self, stream, encryption_key)
Expand Down Expand Up @@ -906,7 +992,7 @@ def initializeFromDictionary(data):
return retval

def flateEncode(self):
from pypdf.filters import FlateCodec
from .filters import FlateCodec

if "/Filter" in self:
f = self["/Filter"]
Expand Down Expand Up @@ -934,7 +1020,7 @@ def __init__(self):
self.decodedSelf = None

def getData(self):
from pypdf.filters import decodeStreamData
from .filters import decodeStreamData

if self.decodedSelf:
# Cached version of decoded object
Expand Down Expand Up @@ -2098,6 +2184,7 @@ def __init__(self, title, page, typ, *args):
self[NameObject("/Title")] = title
self[NameObject("/Page")] = page
self[NameObject("/Type")] = typ
self.parent=None #PPzz

# from table 8.2 of the PDF 1.7 reference.
if typ == "/XYZ":
Expand Down Expand Up @@ -2195,6 +2282,106 @@ def writeToStream(self, stream, encryption_key):
:rtype: ``int``, or ``None`` if not available.
"""

class PageLabel():
def __init__(self,pn=0,defObject=None):
"""
:param
integer pn: 1st Page of the group
defObject: tuple (1stPage,prefix,increment) or DictionnaryObject from the file
"""

if defObject is None:
defObject = DictionaryObject()

try:
if type(defObject) != tuple:
self.prefix=defObject['/P']
else:
self.prefix=defObject[1]+""#None will induce and error and reach default value
except:
self.prefix=''

try:
if type(defObject) != tuple:
self.numbering=defObject['/S']
else:
self.numbering=defObject[2]+""#None will induce and error and reach default value
except:
self.numbering='/D' if self.prefix == "" else ""

self.pn=pn #1st page of the range
try:
if type(defObject) != tuple:
self.first=int(defObject['/St'])-pn
else:
self.first=max(1,int(defObject[0]))-pn #None will induce and error and reach default value
except:
self.first=1-pn

def __repr__(self):
return "PageLabel Obj(@%r :%s-%s)" % (self.first, self.prefix, self.numbering)

def buildDefinition(self,pn=None):
"""
build the DictionnaryObjecgt to inject into the PDF
"""
o=DictionaryObject()
if self.numbering!='/D' or self.prefix!='':
o.update({ NameObject("/S"):NameObject(self.numbering) })
if self.prefix!='':
o.update({ NameObject("/P"):NameObject(self.prefix) })
if pn==None:
o.update({ NameObject("/St"):NumberObject(self.first+self.pn) })
elif pn==0:
pass; #No start value
else:
o.update({ NameObject("/St"):NumberObject(pn) })
return o

def getLabel(self,pn):
def int_to_Roman(num):
val = [
1000, 900, 500, 400,
100, 90, 50, 40,
10, 9, 5, 4,
1
]
syb = [
"M", "CM", "D", "CD",
"C", "XC", "L", "XL",
"X", "IX", "V", "IV",
"I"
]
roman_num = ''
i = 0
while num > 0:
for _ in range(num // val[i]):
roman_num += syb[i]
num -= val[i]
i += 1
return roman_num

def int_to_Alpha(num):
t=""
while(num>0):
num=num-1
t=chr(num%26+65)+t
num=num//26
return t
if self.numbering=='/D':
st=str(pn+self.first)
elif self.numbering=='/R':
st=int_to_Roman(pn+self.first)
elif self.numbering=='/r':
st=int_to_Roman(pn+self.first).lower()
elif self.numbering=='/A':
st=int_to_Alpha(pn+self.first)
elif self.numbering=='/a':
st=int_to_Alpha(pn+self.first).lower()
else:
st=''
return self.prefix+st


class Bookmark(Destination):
def writeToStream(self, stream, encryption_key):
Expand Down
7 changes: 5 additions & 2 deletions pypdf/merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,9 +207,12 @@ def append(self, fileobj, bookmark=None, pages=None, importBookmarks=True):
"""
self.merge(len(self._pages), fileobj, bookmark, pages, importBookmarks)

def write(self):
def write(self, fileobj=None):
"""
Writes all data that has been merged to the given output file.
:param fileobj: Output file. Can be a filename or any kind of
file-like object.
"""
for page in self._pages:
self._writer.addPage(page.pagedata)
Expand All @@ -222,7 +225,7 @@ def write(self):
self._writeBookmarks()

# Write the output to the file
self._writer.write()
self._writer.write(fileobj)

def close(self):
"""
Expand Down
Loading

0 comments on commit f72745e

Please sign in to comment.