diff --git a/Tests/images/expected_to_read.jp2 b/Tests/images/expected_to_read.jp2 new file mode 100644 index 00000000000..d8029a0d3a0 Binary files /dev/null and b/Tests/images/expected_to_read.jp2 differ diff --git a/Tests/images/invalid_header_length.jp2 b/Tests/images/invalid_header_length.jp2 new file mode 100644 index 00000000000..c0c14f42160 Binary files /dev/null and b/Tests/images/invalid_header_length.jp2 differ diff --git a/Tests/images/not_enough_data.jp2 b/Tests/images/not_enough_data.jp2 new file mode 100644 index 00000000000..2d28bb5e96b Binary files /dev/null and b/Tests/images/not_enough_data.jp2 differ diff --git a/Tests/images/zero_dpi.jp2 b/Tests/images/zero_dpi.jp2 new file mode 100644 index 00000000000..079271fc6d8 Binary files /dev/null and b/Tests/images/zero_dpi.jp2 differ diff --git a/Tests/test_file_jpeg2k.py b/Tests/test_file_jpeg2k.py index 20280a57918..2abb2bcfbe0 100644 --- a/Tests/test_file_jpeg2k.py +++ b/Tests/test_file_jpeg2k.py @@ -4,7 +4,7 @@ import pytest -from PIL import Image, ImageFile, Jpeg2KImagePlugin, features +from PIL import Image, ImageFile, Jpeg2KImagePlugin, UnidentifiedImageError, features from .helper import ( assert_image_equal, @@ -151,6 +151,28 @@ def test_reduce(): assert im.size == (40, 30) +def test_load_dpi(): + with Image.open("Tests/images/test-card-lossless.jp2") as im: + assert im.info["dpi"] == (71.9836, 71.9836) + + with Image.open("Tests/images/zero_dpi.jp2") as im: + assert "dpi" not in im.info + + +def test_header_errors(): + for path in ( + "Tests/images/invalid_header_length.jp2", + "Tests/images/not_enough_data.jp2", + ): + with pytest.raises(UnidentifiedImageError): + with Image.open(path): + pass + + with pytest.raises(OSError): + with Image.open("Tests/images/expected_to_read.jp2"): + pass + + def test_layers_type(tmp_path): outfile = str(tmp_path / "temp_layers.jp2") for quality_layers in [[100, 50, 10], (100, 50, 10), None]: diff --git a/src/PIL/Jpeg2KImagePlugin.py b/src/PIL/Jpeg2KImagePlugin.py index 0b0d433db41..0904b241be7 100644 --- a/src/PIL/Jpeg2KImagePlugin.py +++ b/src/PIL/Jpeg2KImagePlugin.py @@ -6,6 +6,7 @@ # # History: # 2014-03-12 ajh Created +# 2021-06-30 rogermb Extract dpi information from the 'resc' header box # # Copyright (c) 2014 Coriolis Systems Limited # Copyright (c) 2014 Alastair Houghton @@ -19,6 +20,79 @@ from . import Image, ImageFile +class BoxReader: + """ + A small helper class to read fields stored in JPEG2000 header boxes + and to easily step into and read sub-boxes. + """ + + def __init__(self, fp, length=-1): + self.fp = fp + self.has_length = length >= 0 + self.length = length + self.remaining_in_box = -1 + + def _can_read(self, num_bytes): + if self.has_length and self.fp.tell() + num_bytes > self.length: + # Outside box: ensure we don't read past the known file length + return False + if self.remaining_in_box >= 0: + # Inside box contents: ensure read does not go past box boundaries + return num_bytes <= self.remaining_in_box + else: + return True # No length known, just read + + def _read_bytes(self, num_bytes): + if not self._can_read(num_bytes): + raise SyntaxError("Not enough data in header") + + data = self.fp.read(num_bytes) + if len(data) < num_bytes: + raise OSError( + f"Expected to read {num_bytes} bytes but only got {len(data)}." + ) + + if self.remaining_in_box > 0: + self.remaining_in_box -= num_bytes + return data + + def read_fields(self, field_format): + size = struct.calcsize(field_format) + data = self._read_bytes(size) + return struct.unpack(field_format, data) + + def read_boxes(self): + size = self.remaining_in_box + data = self._read_bytes(size) + return BoxReader(io.BytesIO(data), size) + + def has_next_box(self): + if self.has_length: + return self.fp.tell() + self.remaining_in_box < self.length + else: + return True + + def next_box_type(self): + # Skip the rest of the box if it has not been read + if self.remaining_in_box > 0: + self.fp.seek(self.remaining_in_box, os.SEEK_CUR) + self.remaining_in_box = -1 + + # Read the length and type of the next box + lbox, tbox = self.read_fields(">I4s") + if lbox == 1: + lbox = self.read_fields(">Q")[0] + hlen = 16 + else: + hlen = 8 + + if lbox < hlen or not self._can_read(lbox - hlen): + raise SyntaxError("Invalid header length") + + self.remaining_in_box = lbox - hlen + return tbox + + def _parse_codestream(fp): """Parse the JPEG 2000 codestream to extract the size and component count from the SIZ marker segment, returning a PIL (size, mode) tuple.""" @@ -53,55 +127,45 @@ def _parse_codestream(fp): return (size, mode) +def _res_to_dpi(num, denom, exp): + """Convert JPEG2000's (numerator, denominator, exponent-base-10) resolution, + calculated as (num / denom) * 10^exp and stored in dots per meter, + to floating-point dots per inch.""" + if denom != 0: + return (254 * num * (10 ** exp)) / (10000 * denom) + + def _parse_jp2_header(fp): - """Parse the JP2 header box to extract size, component count and - color space information, returning a (size, mode, mimetype) tuple.""" + """Parse the JP2 header box to extract size, component count, + color space information, and optionally DPI information, + returning a (size, mode, mimetype, dpi) tuple.""" # Find the JP2 header box + reader = BoxReader(fp) header = None mimetype = None - while True: - lbox, tbox = struct.unpack(">I4s", fp.read(8)) - if lbox == 1: - lbox = struct.unpack(">Q", fp.read(8))[0] - hlen = 16 - else: - hlen = 8 - - if lbox < hlen: - raise SyntaxError("Invalid JP2 header length") + while reader.has_next_box(): + tbox = reader.next_box_type() if tbox == b"jp2h": - header = fp.read(lbox - hlen) + header = reader.read_boxes() break elif tbox == b"ftyp": - if fp.read(4) == b"jpx ": + if reader.read_fields(">4s")[0] == b"jpx ": mimetype = "image/jpx" - fp.seek(lbox - hlen - 4, os.SEEK_CUR) - else: - fp.seek(lbox - hlen, os.SEEK_CUR) - - if header is None: - raise SyntaxError("could not find JP2 header") size = None mode = None bpc = None nc = None + dpi = None # 2-tuple of DPI info, or None + unkc = 0 # Colorspace information unknown - hio = io.BytesIO(header) - while True: - lbox, tbox = struct.unpack(">I4s", hio.read(8)) - if lbox == 1: - lbox = struct.unpack(">Q", hio.read(8))[0] - hlen = 16 - else: - hlen = 8 - - content = hio.read(lbox - hlen) + while header.has_next_box(): + tbox = header.next_box_type() if tbox == b"ihdr": - height, width, nc, bpc, c, unkc, ipr = struct.unpack(">IIHBBBB", content) + height, width, nc, bpc, c, unkc, ipr = header.read_fields(">IIHBBBB") size = (width, height) if unkc: if nc == 1 and (bpc & 0x7F) > 8: @@ -114,11 +178,10 @@ def _parse_jp2_header(fp): mode = "RGB" elif nc == 4: mode = "RGBA" - break elif tbox == b"colr": - meth, prec, approx = struct.unpack_from(">BBB", content) - if meth == 1: - cs = struct.unpack_from(">I", content, 3)[0] + meth, prec, approx = header.read_fields(">BBB") + if meth == 1 and unkc == 0: + cs = header.read_fields(">I")[0] if cs == 16: # sRGB if nc == 1 and (bpc & 0x7F) > 8: mode = "I;16" @@ -128,7 +191,6 @@ def _parse_jp2_header(fp): mode = "RGB" elif nc == 4: mode = "RGBA" - break elif cs == 17: # grayscale if nc == 1 and (bpc & 0x7F) > 8: mode = "I;16" @@ -136,18 +198,27 @@ def _parse_jp2_header(fp): mode = "L" elif nc == 2: mode = "LA" - break elif cs == 18: # sYCC if nc == 3: mode = "RGB" elif nc == 4: mode = "RGBA" + elif tbox == b"res ": + res = header.read_boxes() + while res.has_next_box(): + tres = res.next_box_type() + if tres == b"resc": + vrcn, vrcd, hrcn, hrcd, vrce, hrce = res.read_fields(">HHHHBB") + hres = _res_to_dpi(hrcn, hrcd, hrce) + vres = _res_to_dpi(vrcn, vrcd, vrce) + if hres is not None and vres is not None: + dpi = (hres, vres) break if size is None or mode is None: - raise SyntaxError("Malformed jp2 header") + raise SyntaxError("Malformed JP2 header") - return (size, mode, mimetype) + return (size, mode, mimetype, dpi) ## @@ -169,7 +240,9 @@ def _open(self): if sig == b"\x00\x00\x00\x0cjP \x0d\x0a\x87\x0a": self.codec = "jp2" header = _parse_jp2_header(self.fp) - self._size, self.mode, self.custom_mimetype = header + self._size, self.mode, self.custom_mimetype, dpi = header + if dpi is not None: + self.info["dpi"] = dpi else: raise SyntaxError("not a JPEG 2000 file")