Skip to content

Commit

Permalink
Merge branch 'main' into decimal-precision
Browse files Browse the repository at this point in the history
  • Loading branch information
programmarchy committed Sep 5, 2022
2 parents a71d15b + 4073b2a commit 2cfe102
Show file tree
Hide file tree
Showing 26 changed files with 642 additions and 119 deletions.
38 changes: 35 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,37 @@
# CHANGELOG

## Version 2.10.5, 2022-09-04

### New Features (ENH)
- Process XRefStm (#1297)
- Auto-detect RTL for text extraction (#1309)

### Bug Fixes (BUG)
- Avoid scaling cropbox twice (#1314)

### Robustness (ROB)
- Fix offset correction in revised PDF (#1318)
- Crop data of /U and /O in encryption dictionary to 48 bytes (#1317)
- MultiLine bfrange in cmap (#1299)
- Cope with 2 digit codes in bfchar (#1310)
- Accept '/annn' charset as ASCII code (#1316)
- Log errors during Float / NumberObject initialization (#1315)
- Cope with corrupted entries in xref table (#1300)

### Documentation (DOC)
- Migration guide (PyPDF2 1.x ➔ 2.x) (#1324)
- Creating a coverage report (#1319)
- Fix AnnotationBuilder.free_text example (#1311)
- Fix usage of page.scale by replacing it with page.scale_by (#1313)

### Maintenance (MAINT)
- PdfReaderProtocol (#1303)
- Throw PdfReadError if Trailer can't be read (#1298)
- Remove catching OverflowException (#1302)

Full Changelog: https://github.com/py-pdf/PyPDF2/compare/2.10.4...2.10.5


## Version 2.10.4, 2022-08-28

### Robustness (ROB)
Expand Down Expand Up @@ -419,7 +451,7 @@ The highlight of this release is improved support for file encryption
- Apply improvements to _utils suggested by perflint (#993)

### Robustness (ROB)
- utf-16-be\' codec can\'t decode (...) (#995)
- utf-16-be codec can't decode (...) (#995)

### Documentation (DOC)
- Remove reference to Scripts (#987)
Expand Down Expand Up @@ -465,7 +497,7 @@ e.g. Russian / Chinese / Japanese / Korean / Arabic.
- Optimize read_next_end_line (#646)

### Bug Fixes (BUG)
- Adobe Acrobat \'Would you like to save this file?\' (#970)
- Adobe Acrobat 'Would you like to save this file?' (#970)

### Documentation (DOC)
- Notes on annotations (#982)
Expand Down Expand Up @@ -905,7 +937,7 @@ large PDF files (#808) 🎉

### Maintenance (MAINT)
- Validate PDF magic byte in strict mode (#814)
- Make PdfFileMerger.addBookmark() behave life PdfFileWriters\' (#339)
- Make PdfFileMerger.addBookmark() behave life PdfFileWriters' (#339)
- Quadratic runtime while parsing reduced to linear (#808)

### Testing (TST)
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ clean:
rm -rf tests/__pycache__ PyPDF2/__pycache__ Image9.png htmlcov docs/_build dist dont_commit_merged.pdf dont_commit_writer.pdf PyPDF2.egg-info PyPDF2_pdfLocation.txt .pytest_cache .mypy_cache .benchmarks

test:
pytest tests --cov --cov-report term-missing -vv --cov-report html --durations=3 --timeout=30
pytest tests --cov --cov-report term-missing -vv --cov-report html --durations=3 --timeout=60 PyPDF2

testtype:
pytest tests --cov --cov-report term-missing -vv --cov-report html --durations=3 --timeout=30 --typeguard-packages=PyPDF2
Expand Down
77 changes: 53 additions & 24 deletions PyPDF2/_cmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,10 +180,13 @@ def parse_to_unicode(
return {}, space_code, []
process_rg: bool = False
process_char: bool = False
multiline_rg: Union[
None, Tuple[int, int]
] = None # tuple = (current_char, remaining size) ; cf #1285 for example of file
cm = prepare_cm(ft)
for l in cm.split(b"\n"):
process_rg, process_char = process_cm_line(
l.strip(b" "), process_rg, process_char, map_dict, int_entry
process_rg, process_char, multiline_rg = process_cm_line(
l.strip(b" "), process_rg, process_char, multiline_rg, map_dict, int_entry
)

for a, value in map_dict.items():
Expand Down Expand Up @@ -228,11 +231,12 @@ def process_cm_line(
l: bytes,
process_rg: bool,
process_char: bool,
multiline_rg: Union[None, Tuple[int, int]],
map_dict: Dict[Any, Any],
int_entry: List[int],
) -> Tuple[bool, bool]:
) -> Tuple[bool, bool, Union[None, Tuple[int, int]]]:
if l in (b"", b" ") or l[0] == 37: # 37 = %
return process_rg, process_char
return process_rg, process_char, multiline_rg
if b"beginbfrange" in l:
process_rg = True
elif b"endbfrange" in l:
Expand All @@ -242,22 +246,29 @@ def process_cm_line(
elif b"endbfchar" in l:
process_char = False
elif process_rg:
parse_bfrange(l, map_dict, int_entry)
multiline_rg = parse_bfrange(l, map_dict, int_entry, multiline_rg)
elif process_char:
parse_bfchar(l, map_dict, int_entry)
return process_rg, process_char
return process_rg, process_char, multiline_rg


def parse_bfrange(l: bytes, map_dict: Dict[Any, Any], int_entry: List[int]) -> None:
def parse_bfrange(
l: bytes,
map_dict: Dict[Any, Any],
int_entry: List[int],
multiline_rg: Union[None, Tuple[int, int]],
) -> Union[None, Tuple[int, int]]:
lst = [x for x in l.split(b" ") if x]
a = int(lst[0], 16)
b = int(lst[1], 16)
closure_found = False
nbi = len(lst[0])
map_dict[-1] = nbi // 2
fmt = b"%%0%dX" % nbi
if lst[2] == b"[":
for sq in lst[3:]:
if multiline_rg is not None:
a = multiline_rg[0] # a, b not in the current line
b = multiline_rg[1]
for sq in lst[1:]:
if sq == b"]":
closure_found = True
break
map_dict[
unhexlify(fmt % a).decode(
Expand All @@ -268,18 +279,36 @@ def parse_bfrange(l: bytes, map_dict: Dict[Any, Any], int_entry: List[int]) -> N
int_entry.append(a)
a += 1
else:
c = int(lst[2], 16)
fmt2 = b"%%0%dX" % max(4, len(lst[2]))
while a <= b:
map_dict[
unhexlify(fmt % a).decode(
"charmap" if map_dict[-1] == 1 else "utf-16-be",
"surrogatepass",
)
] = unhexlify(fmt2 % c).decode("utf-16-be", "surrogatepass")
int_entry.append(a)
a += 1
c += 1
a = int(lst[0], 16)
b = int(lst[1], 16)
if lst[2] == b"[":
for sq in lst[3:]:
if sq == b"]":
closure_found = True
break
map_dict[
unhexlify(fmt % a).decode(
"charmap" if map_dict[-1] == 1 else "utf-16-be",
"surrogatepass",
)
] = unhexlify(sq).decode("utf-16-be", "surrogatepass")
int_entry.append(a)
a += 1
else: # case without list
c = int(lst[2], 16)
fmt2 = b"%%0%dX" % max(4, len(lst[2]))
closure_found = True
while a <= b:
map_dict[
unhexlify(fmt % a).decode(
"charmap" if map_dict[-1] == 1 else "utf-16-be",
"surrogatepass",
)
] = unhexlify(fmt2 % c).decode("utf-16-be", "surrogatepass")
int_entry.append(a)
a += 1
c += 1
return None if closure_found else (a, b)


def parse_bfchar(l: bytes, map_dict: Dict[Any, Any], int_entry: List[int]) -> None:
Expand All @@ -290,7 +319,7 @@ def parse_bfchar(l: bytes, map_dict: Dict[Any, Any], int_entry: List[int]) -> No
# placeholder (see above) means empty string
if lst[1] != b".":
map_to = unhexlify(lst[1]).decode(
"utf-16-be", "surrogatepass"
"charmap" if len(lst[1]) < 4 else "utf-16-be", "surrogatepass"
) # join is here as some cases where the code was split
map_dict[
unhexlify(lst[0]).decode(
Expand Down
10 changes: 10 additions & 0 deletions PyPDF2/_codecs/adobe_glyphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -13425,3 +13425,13 @@
"/zukatakana": "\u30BA",
"/zwarakay": "\u0659",
}


def _complete() -> None:
global adobe_glyphs
for i in range(256):
adobe_glyphs[f"/a{i}"] = chr(i)
adobe_glyphs["/.notdef"] = "□"


_complete()
11 changes: 7 additions & 4 deletions PyPDF2/_encryption.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def __init__(self, key: bytes) -> None:
self.S[i], self.S[j] = self.S[j], self.S[i]

def encrypt(self, data: bytes) -> bytes:
S = [x for x in self.S]
S = list(self.S)
out = list(0 for _ in range(len(data)))
i, j = 0, 0
for k in range(len(data)):
Expand Down Expand Up @@ -516,10 +516,13 @@ def verify_owner_password(
should match the value in the P key.
"""
password = password[:127]
if AlgV5.calculate_hash(R, password, o_value[32:40], u_value) != o_value[:32]:
if (
AlgV5.calculate_hash(R, password, o_value[32:40], u_value[:48])
!= o_value[:32]
):
return b""
iv = bytes(0 for _ in range(16))
tmp_key = AlgV5.calculate_hash(R, password, o_value[40:], u_value)
tmp_key = AlgV5.calculate_hash(R, password, o_value[40:48], u_value[:48])
key = AES_CBC_decrypt(tmp_key, iv, oe_value)
return key

Expand All @@ -532,7 +535,7 @@ def verify_user_password(
if AlgV5.calculate_hash(R, password, u_value[32:40], b"") != u_value[:32]:
return b""
iv = bytes(0 for _ in range(16))
tmp_key = AlgV5.calculate_hash(R, password, u_value[40:], b"")
tmp_key = AlgV5.calculate_hash(R, password, u_value[40:48], b"")
return AES_CBC_decrypt(tmp_key, iv, ue_value)

@staticmethod
Expand Down
22 changes: 11 additions & 11 deletions PyPDF2/_merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,21 +541,22 @@ def _write_outline_item_on_page(
)

def _associate_dests_to_pages(self, pages: List[_MergedPage]) -> None:
for nd in self.named_dests:
for named_dest in self.named_dests:
pageno = None
np = nd["/Page"]
np = named_dest["/Page"]

if isinstance(np, NumberObject):
continue

for p in pages:
if np.get_object() == p.pagedata.get_object():
pageno = p.id
for page in pages:
if np.get_object() == page.pagedata.get_object():
pageno = page.id

if pageno is not None:
nd[NameObject("/Page")] = NumberObject(pageno)
else:
raise ValueError(f"Unresolved named destination '{nd['/Title']}'")
if pageno is None:
raise ValueError(
f"Unresolved named destination '{named_dest['/Title']}'"
)
named_dest[NameObject("/Page")] = NumberObject(pageno)

@deprecate_bookmark(bookmarks="outline")
def _associate_outline_items_to_pages(
Expand Down Expand Up @@ -612,12 +613,11 @@ def find_bookmark(
self,
outline_item: Dict[str, Any],
root: Optional[OutlineType] = None,
) -> Optional[List[int]]:
) -> Optional[List[int]]: # pragma: no cover
"""
.. deprecated:: 2.9.0
Use :meth:`find_outline_item` instead.
"""

return self.find_outline_item(outline_item, root)

def add_outline_item(
Expand Down

0 comments on commit 2cfe102

Please sign in to comment.