Skip to content

Commit

Permalink
Flate png filters (#114)
Browse files Browse the repository at this point in the history
* Addition of Flate PNG reverse filters: Up, Average and Paeth

* Notes to run test-cases

* flate_png tests with filters

* Reverse filters are applied to every pixel except the first of each scanline.
Fix dict key.

* Apply filter to the whole scanline

* Reverting to original flate_png to assert all tests pass

* When flate_png_orig returns error, data is None

* Can't use print in python 3

* Can't use print in python 3

* import correct xrange based on python version

* During flate_png reconstruction, previous row must be the previous scanline reconstructed

* subfilter must read the same byte from pixel to its left: https://www.w3.org/TR/2003/REC-PNG-20031110/#9FtIntro

* Addition of tests for flate_png_impl: flate_png_impl doesn't do array to X conversion but instead it returns array.array('B')

* Fix flate_png_impl tests for filters 2 (sub), 3 (avg), and 4 (paeth) as they require at least 2 scanlines. Simplify UP filter.

* Use ord() when applicable

* Remove debugging code

* Tests for flate_png_impl using http://www.schaik.com/pngsuite/pngsuite_fil_png.html f01n2c08, f02n2c08, f03n2c08, f04n2c08

* Missing png.log files

* Fix file path for local .png.log files

* Simplify filters and add more tests

* Addition of Jupyter IPython notebook capable of rendering buffers (rasters) used/produced by test_flate_png.py tests: requires matplotlib

* Comments on ipython notebook

* Assert that flate_png_orig produced different output (yet correct looking PDF War of the Worlds) when png data was compressed and filtered using Sub (f=1). New flate_png also generates correct looking PDF War of the Worlds.

* Cleanup: remove flate_png_orig, update expected.txt checksums, and add basn0g08.png.log and its test

* Remove Encrypt check from test_roundtrip. If 'expected.txt' has a valid hash, the test is expected to pass and produce a file with same hash. Revert all PDFs with Encrypted content to 'skip' because the roundtrip PDF is 'blank'
  • Loading branch information
Henddher Pedroza authored and pmaupin committed Feb 16, 2018
1 parent 8774f15 commit 6c89216
Show file tree
Hide file tree
Showing 14 changed files with 2,817 additions and 35 deletions.
1 change: 1 addition & 0 deletions LICENSE.txt
Expand Up @@ -24,6 +24,7 @@ Copyright (c) 2016 Edward Betts. All rights reserved.
Copyright (c) 2016 Patrick Mazulo. All rights reserved.
Copyright (c) 2017 Haochen Wu. All rights reserved.
Copyright (c) 2017 Jon Lund Steffensen. All rights reserved.
Copyright (c) 2017 Henddher Pedroza. All rights reserved.


MIT License:
Expand Down
16 changes: 15 additions & 1 deletion README.rst
Expand Up @@ -657,10 +657,24 @@ To run the tests:
* cd into the tests directory, and then clone the package
github.com/pmaupin/static_pdfs into a subdirectory (also named
static_pdfs).
* Now the tests may be run from that directory using unittest, or
* Now the tests may be run from tests directory using unittest, or
py.test, or nose.
* travisci is used at github, and runs the tests with py.test

.. code-block:: bash
$ pip install pytest
$ pip install reportlab
$ pwd
<...>/pdfrw/tests
$ git clone https://github.com/pmaupin/static_pdfs
$ ln -s ../pdfrw
$ pytest
To run a single test-case:

.. code-block:: bash
$ pytest test_roundtrip.py -k "test_compress_9f98322c243fe67726d56ccfa8e0885b.pdf"
Other libraries
=====================

Expand Down
2 changes: 1 addition & 1 deletion pdfrw/objects/pdfdict.py
Expand Up @@ -144,7 +144,7 @@ def get(self, key, dictget=dict.get, isinstance=isinstance,
if value is not None:
dict.__setitem__(self, key, value)
else:
del self[name]
del self[key]
return value

def __getitem__(self, key):
Expand Down
119 changes: 98 additions & 21 deletions pdfrw/uncompress.py
Expand Up @@ -15,7 +15,7 @@
from .objects import PdfDict, PdfName, PdfArray
from .errors import log
from .py23_diffs import zlib, xrange, from_array, convert_load, convert_store

import math

def streamobjects(mylist, isinstance=isinstance, PdfDict=PdfDict):
for obj in mylist:
Expand Down Expand Up @@ -81,6 +81,98 @@ def uncompress(mylist, leave_raw=False, warnings=set(),
ok = False
return ok

def flate_png_impl(data, predictor=1, columns=1, colors=1, bpc=8):

# http://www.libpng.org/pub/png/spec/1.2/PNG-Filters.html
# https://www.w3.org/TR/2003/REC-PNG-20031110/#9Filters
# Reconstruction functions
# x: the byte being filtered;
# a: the byte corresponding to x in the pixel immediately before the pixel containing x (or the byte immediately before x, when the bit depth is less than 8);
# b: the byte corresponding to x in the previous scanline;
# c: the byte corresponding to b in the pixel immediately before the pixel containing b (or the byte immediately before b, when the bit depth is less than 8).

def subfilter(data, prior_row_data, start, length, pixel_size):
# filter type 1: Sub
# Recon(x) = Filt(x) + Recon(a)
for i in xrange(pixel_size, length):
left = data[start + i - pixel_size]
data[start + i] = (data[start + i] + left) % 256

def upfilter(data, prior_row_data, start, length, pixel_size):
# filter type 2: Up
# Recon(x) = Filt(x) + Recon(b)
for i in xrange(length):
up = prior_row_data[i]
data[start + i] = (data[start + i] + up) % 256

def avgfilter(data, prior_row_data, start, length, pixel_size):
# filter type 3: Avg
# Recon(x) = Filt(x) + floor((Recon(a) + Recon(b)) / 2)
for i in xrange(length):
left = data[start + i - pixel_size] if i >= pixel_size else 0
up = prior_row_data[i]
floor = math.floor((left + up) / 2)
data[start + i] = (data[start + i] + int(floor)) % 256

def paethfilter(data, prior_row_data, start, length, pixel_size):
# filter type 4: Paeth
# Recon(x) = Filt(x) + PaethPredictor(Recon(a), Recon(b), Recon(c))
def paeth_predictor(a, b, c):
p = a + b - c
pa = abs(p - a)
pb = abs(p - b)
pc = abs(p - c)
if pa <= pb and pa <= pc:
return a
elif pb <= pc:
return b
else:
return c
for i in xrange(length):
left = data[start + i - pixel_size] if i >= pixel_size else 0
up = prior_row_data[i]
up_left = prior_row_data[i - pixel_size] if i >= pixel_size else 0
data[start + i] = (data[start + i] + paeth_predictor(left, up, up_left)) % 256

columnbytes = ((columns * colors * bpc) + 7) // 8
pixel_size = (colors * bpc + 7) // 8
data = array.array('B', data)
rowlen = columnbytes + 1
if predictor == 15:
padding = (rowlen - len(data)) % rowlen
data.extend([0] * padding)
assert len(data) % rowlen == 0

rows = xrange(0, len(data), rowlen)
prior_row_data = [ 0 for i in xrange(columnbytes) ]
for row_index in rows:

filter_type = data[row_index]

if filter_type == 0: # None filter
pass

elif filter_type == 1: # Sub filter
subfilter(data, prior_row_data, row_index + 1, columnbytes, pixel_size)

elif filter_type == 2: # Up filter
upfilter(data, prior_row_data, row_index + 1, columnbytes, pixel_size)

elif filter_type == 3: # Average filter
avgfilter(data, prior_row_data, row_index + 1, columnbytes, pixel_size)

elif filter_type == 4: # Paeth filter
paethfilter(data, prior_row_data, row_index + 1, columnbytes, pixel_size)

else:
return None, 'Unsupported PNG filter %d' % filter_type

prior_row_data = data[row_index + 1 : row_index + 1 + columnbytes] # without filter_type

for row_index in reversed(rows):
data.pop(row_index)

return data, None

def flate_png(data, predictor=1, columns=1, colors=1, bpc=8):
''' PNG prediction is used to make certain kinds of data
Expand All @@ -95,23 +187,8 @@ def flate_png(data, predictor=1, columns=1, colors=1, bpc=8):
this technique for Xref stream objects, which are
quite regular.
'''
columnbytes = ((columns * colors * bpc) + 7) // 8
data = array.array('B', data)
rowlen = columnbytes + 1
if predictor == 15:
padding = (rowlen - len(data)) % rowlen
data.extend([0] * padding)
assert len(data) % rowlen == 0
rows = xrange(0, len(data), rowlen)
for row_index in rows:
offset = data[row_index]
if offset >= 2:
if offset > 2:
return None, 'Unsupported PNG filter %d' % offset
offset = rowlen if row_index else 0
if offset:
for index in xrange(row_index + 1, row_index + rowlen):
data[index] = (data[index] + data[index - offset]) % 256
for row_index in reversed(rows):
data.pop(row_index)
return from_array(data), None
d, e = flate_png_impl(data, predictor, columns, colors, bpc)
if d is not None:
d = from_array(d)
return d, e

0 comments on commit 6c89216

Please sign in to comment.