From 430db6d57f18bf735090a2e6bafddcd7bdc3a76f Mon Sep 17 00:00:00 2001 From: Maksym Polshcha Date: Fri, 20 Dec 2019 10:01:20 -0500 Subject: [PATCH 01/10] version updated --- pdfreader/__init__.py | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pdfreader/__init__.py b/pdfreader/__init__.py index 3a18bce..72577e4 100644 --- a/pdfreader/__init__.py +++ b/pdfreader/__init__.py @@ -5,4 +5,4 @@ register_pdf_encodings() #: package version -__version__ = version = '0.1.3rc1' +__version__ = version = '0.1.3' diff --git a/setup.py b/setup.py index d1a94ea..764eb5a 100755 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ if sys.version_info[:2] < (3, 6): warnings.warn("Python version >= 3.6 required.") -version = '0.1.3rc1' +version = '0.1.3' import os.path From 23b18bff42e2a7a9b46318b0886e95e6a018c1f1 Mon Sep 17 00:00:00 2001 From: Maksym Polshcha Date: Fri, 20 Dec 2019 10:09:29 -0500 Subject: [PATCH 02/10] changelog added --- pdfreader/CHANGELOG.txt | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 pdfreader/CHANGELOG.txt diff --git a/pdfreader/CHANGELOG.txt b/pdfreader/CHANGELOG.txt new file mode 100644 index 0000000..d1c82d7 --- /dev/null +++ b/pdfreader/CHANGELOG.txt @@ -0,0 +1,6 @@ +pdfreader 0.1.3, 2019-12-20 +--------------------------- + - CMap-based text decoding issue fixed + - Multiple filters support for inline images added + - nbspace added to MacRomanEncoding and WinAnsiEncoding + - GraphicsState issue issue related to loosing some state attributes fixed From ff01b931f48fb411f83a7654b2508151f3cfe97b Mon Sep 17 00:00:00 2001 From: Maksym Polshcha Date: Fri, 20 Dec 2019 10:15:48 -0500 Subject: [PATCH 03/10] manifest fixed --- MANIFEST.in | 1 + 1 file changed, 1 insertion(+) diff --git a/MANIFEST.in b/MANIFEST.in index c164a8d..ff00ae3 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -10,6 +10,7 @@ recursive-include doc *.conf recursive-include doc *.png recursive-include doc *.pdf prune venv +prune .venv prune doc/_build prune dist exclude example-crash-markdown.txt extract-logo.png fax-from-p8.png mask.png sample-cmap.txt sample-font.type1 \ No newline at end of file From 986d04440d396032a0acd4a00f271dbe33b78e25 Mon Sep 17 00:00:00 2001 From: Maksym Polshcha Date: Fri, 20 Dec 2019 10:22:49 -0500 Subject: [PATCH 04/10] minor doctest improvement --- doc/examples/extract_form_text.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/examples/extract_form_text.rst b/doc/examples/extract_form_text.rst index ceddbd0..a278131 100644 --- a/doc/examples/extract_form_text.rst +++ b/doc/examples/extract_form_text.rst @@ -53,7 +53,7 @@ listed under page resources. The viewer puts them on canvas: .. doctest:: - >>> list(viewer.canvas.forms.keys()) + >>> sorted(list(viewer.canvas.forms.keys())) ['Fm1', 'Fm2', ... 'Fm29', 'Fm30', 'Fm31'] As Form is a kind of "sub-document" every entry in *viewer.canvas.forms* dictionary maps to From 906b043c7813f9daf8e2473454f07775813af6e7 Mon Sep 17 00:00:00 2001 From: Maksym Polshcha Date: Fri, 20 Dec 2019 10:31:37 -0500 Subject: [PATCH 05/10] minor doctest improvement --- doc/examples/extract_fonts.rst | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/doc/examples/extract_fonts.rst b/doc/examples/extract_fonts.rst index 3ac6bbf..82a0123 100644 --- a/doc/examples/extract_fonts.rst +++ b/doc/examples/extract_fonts.rst @@ -32,13 +32,12 @@ Now let's see what fonts the very first page uses: .. doctest:: >>> page = next(doc.pages()) - >>> page.Resources.Font - {'T1_0': , ... } - + >>> page.Resources.Font.keys() + ['T1_0', 'T1_1', 'T1_2', 'TT0', 'TT1'] We see 5 fonts named `T1_0`, `T1_1`, `T1_2`, `TT0` and `TT1`. -As *pdfreader* is a lazy reader the font data has not been read yet. We just see the names and -the references to the objects. +As *pdfreader* is a lazy reader the font data has not been read yet. +We just see the names and the references to the objects. Let's have a look at font `T1_0`. From 6be51a7b9aa48d5107de7e41364630403419b376 Mon Sep 17 00:00:00 2001 From: Maksym Polshcha Date: Fri, 20 Dec 2019 10:34:09 -0500 Subject: [PATCH 06/10] minor doctest improvement --- doc/examples/extract_form_text.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/examples/extract_form_text.rst b/doc/examples/extract_form_text.rst index a278131..acccaae 100644 --- a/doc/examples/extract_form_text.rst +++ b/doc/examples/extract_form_text.rst @@ -54,7 +54,7 @@ listed under page resources. The viewer puts them on canvas: .. doctest:: >>> sorted(list(viewer.canvas.forms.keys())) - ['Fm1', 'Fm2', ... 'Fm29', 'Fm30', 'Fm31'] + ['Fm1', 'Fm10', 'Fm11', 'Fm12', 'Fm13', 'Fm14' ...'] As Form is a kind of "sub-document" every entry in *viewer.canvas.forms* dictionary maps to :class:`~pdfreader.viewer.SimpleCanvas` instance: From e8d60fb398533296e0daa3bdb7f36b37e3468d19 Mon Sep 17 00:00:00 2001 From: Maksym Polshcha Date: Fri, 20 Dec 2019 10:35:04 -0500 Subject: [PATCH 07/10] minor doctest improvement --- doc/examples/extract_fonts.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/examples/extract_fonts.rst b/doc/examples/extract_fonts.rst index 82a0123..a0deb0b 100644 --- a/doc/examples/extract_fonts.rst +++ b/doc/examples/extract_fonts.rst @@ -32,7 +32,7 @@ Now let's see what fonts the very first page uses: .. doctest:: >>> page = next(doc.pages()) - >>> page.Resources.Font.keys() + >>> sorted(page.Resources.Font.keys()) ['T1_0', 'T1_1', 'T1_2', 'TT0', 'TT1'] We see 5 fonts named `T1_0`, `T1_1`, `T1_2`, `TT0` and `TT1`. From f4b501542fa42236dbe29e3cb376e063a379938e Mon Sep 17 00:00:00 2001 From: Maksym Polshcha Date: Fri, 20 Dec 2019 10:40:01 -0500 Subject: [PATCH 08/10] minor doctest improvement --- doc/examples/extract_form_text.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/examples/extract_form_text.rst b/doc/examples/extract_form_text.rst index acccaae..769e82b 100644 --- a/doc/examples/extract_form_text.rst +++ b/doc/examples/extract_form_text.rst @@ -54,7 +54,7 @@ listed under page resources. The viewer puts them on canvas: .. doctest:: >>> sorted(list(viewer.canvas.forms.keys())) - ['Fm1', 'Fm10', 'Fm11', 'Fm12', 'Fm13', 'Fm14' ...'] + ['Fm1', 'Fm10', 'Fm11', 'Fm12', 'Fm13', 'Fm14',...] As Form is a kind of "sub-document" every entry in *viewer.canvas.forms* dictionary maps to :class:`~pdfreader.viewer.SimpleCanvas` instance: From be1024ca6e40c4526c33f4079fda69f1b7da73e5 Mon Sep 17 00:00:00 2001 From: Maksym Polshcha Date: Fri, 20 Dec 2019 10:51:11 -0500 Subject: [PATCH 09/10] tests fixed --- pdfreader/parsers/base.py | 7 +++++-- pdfreader/parsers/document.py | 6 ++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/pdfreader/parsers/base.py b/pdfreader/parsers/base.py index cb3f216..08f32a2 100644 --- a/pdfreader/parsers/base.py +++ b/pdfreader/parsers/base.py @@ -361,8 +361,11 @@ def dictionary(self): ... /Item4 (OK) ... >> ... >>''' - >>> BasicTypesParser(s, 0).dictionary() - {'Type': 'Example', 'Subtype': 'DictExample', 'Version': Decimal('0.01'), 'IntegerItem': 12, 'StringItem': b'a string', 'ArrayItem': [1, 2], 'ObjRef': , 'SubDictionary': {'Item1': True, 'Item2': False, 'Item3': None, 'Item4': b'OK'}} + >>> expected = {'Type': 'Example', 'Subtype': 'DictExample', 'Version': Decimal('0.01'), 'IntegerItem': 12, + ... 'StringItem': b'a string', 'ArrayItem': [1, 2], 'ObjRef': IndirectReference(12, 0), + ... 'SubDictionary': {'Item1': True, 'Item2': False, 'Item3': None, 'Item4': b'OK'}} + >>> BasicTypesParser(s, 0).dictionary() == expected + True """ pfx = self.read(2) diff --git a/pdfreader/parsers/document.py b/pdfreader/parsers/document.py index 4b10447..73c807f 100644 --- a/pdfreader/parsers/document.py +++ b/pdfreader/parsers/document.py @@ -194,8 +194,10 @@ def trailer(self): ... /ID [<0102AA> <0102BB>] ... >>''' >>> p = PDFParser(s, 0) - >>> p.trailer() - , 'Info': , 'ID': ['0102AA', '0102BB']}> + >>> expected_params = {'Size': 22, 'Root': IndirectReference(2,0), + ... 'Info': IndirectReference(1,0), 'ID': ['0102AA', '0102BB']} + >>> p.trailer() == Trailer(expected_params) + True """ token = self.read(7) if token != b'trailer': From d49cc052bd2e3bd99d41584a727526a72dbeb167 Mon Sep 17 00:00:00 2001 From: Maksym Polshcha Date: Fri, 20 Dec 2019 10:57:04 -0500 Subject: [PATCH 10/10] language update --- doc/examples/extract_fonts.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/examples/extract_fonts.rst b/doc/examples/extract_fonts.rst index a0deb0b..b8c80a5 100644 --- a/doc/examples/extract_fonts.rst +++ b/doc/examples/extract_fonts.rst @@ -37,7 +37,7 @@ Now let's see what fonts the very first page uses: We see 5 fonts named `T1_0`, `T1_1`, `T1_2`, `TT0` and `TT1`. As *pdfreader* is a lazy reader the font data has not been read yet. -We just see the names and the references to the objects. +We just have the names and the references to the objects. Let's have a look at font `T1_0`.