From 5454f861e2b3c96fa1e6430dc952544670955f69 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 7 Oct 2025 20:15:26 +0300 Subject: [PATCH] gh-139700: Check consistency of the zip64 end of central directory record (GH-139702) Support records with "zip64 extensible data" if there are no bytes prepended to the ZIP file. (cherry picked from commit 162997bb70e067668c039700141770687bc8f267) Co-authored-by: Serhiy Storchaka --- Lib/test/test_zipfile/test_core.py | 82 ++++++++++++++++++- Lib/zipfile/__init__.py | 51 +++++++----- ...-10-07-19-31-34.gh-issue-139700.vNHU1O.rst | 3 + 3 files changed, 113 insertions(+), 23 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2025-10-07-19-31-34.gh-issue-139700.vNHU1O.rst diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py index c033059a515db6..6acfefc74d6665 100644 --- a/Lib/test/test_zipfile/test_core.py +++ b/Lib/test/test_zipfile/test_core.py @@ -898,6 +898,8 @@ def make_zip64_file( self, file_size_64_set=False, file_size_extra=False, compress_size_64_set=False, compress_size_extra=False, header_offset_64_set=False, header_offset_extra=False, + extensible_data=b'', + end_of_central_dir_size=None, offset_to_end_of_central_dir=None, ): """Generate bytes sequence for a zip with (incomplete) zip64 data. @@ -951,6 +953,12 @@ def make_zip64_file( central_dir_size = struct.pack(' 2: inferred = concat + offset_cd @@ -289,16 +286,15 @@ def _EndRecData64(fpin, offset, endrec): """ Read the ZIP64 end-of-archive records and use that to update endrec """ - try: - fpin.seek(offset - sizeEndCentDir64Locator, 2) - except OSError: - # If the seek fails, the file is not large enough to contain a ZIP64 + offset -= sizeEndCentDir64Locator + if offset < 0: + # The file is not large enough to contain a ZIP64 # end-of-archive record, so just return the end record we were given. return endrec - + fpin.seek(offset) data = fpin.read(sizeEndCentDir64Locator) if len(data) != sizeEndCentDir64Locator: - return endrec + raise OSError("Unknown I/O error") sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data) if sig != stringEndArchive64Locator: return endrec @@ -306,16 +302,33 @@ def _EndRecData64(fpin, offset, endrec): if diskno != 0 or disks > 1: raise BadZipFile("zipfiles that span multiple disks are not supported") - # Assume no 'zip64 extensible data' - fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2) + offset -= sizeEndCentDir64 + if reloff > offset: + raise BadZipFile("Corrupt zip64 end of central directory locator") + # First, check the assumption that there is no prepended data. + fpin.seek(reloff) + extrasz = offset - reloff data = fpin.read(sizeEndCentDir64) if len(data) != sizeEndCentDir64: - return endrec + raise OSError("Unknown I/O error") + if not data.startswith(stringEndArchive64) and reloff != offset: + # Since we already have seen the Zip64 EOCD Locator, it's + # possible we got here because there is prepended data. + # Assume no 'zip64 extensible data' + fpin.seek(offset) + extrasz = 0 + data = fpin.read(sizeEndCentDir64) + if len(data) != sizeEndCentDir64: + raise OSError("Unknown I/O error") + if not data.startswith(stringEndArchive64): + raise BadZipFile("Zip64 end of central directory record not found") + sig, sz, create_version, read_version, disk_num, disk_dir, \ dircount, dircount2, dirsize, diroffset = \ struct.unpack(structEndArchive64, data) - if sig != stringEndArchive64: - return endrec + if (diroffset + dirsize != reloff or + sz + 12 != sizeEndCentDir64 + extrasz): + raise BadZipFile("Corrupt zip64 end of central directory record") # Update the original endrec using data from the ZIP64 record endrec[_ECD_SIGNATURE] = sig @@ -325,6 +338,7 @@ def _EndRecData64(fpin, offset, endrec): endrec[_ECD_ENTRIES_TOTAL] = dircount2 endrec[_ECD_SIZE] = dirsize endrec[_ECD_OFFSET] = diroffset + endrec[_ECD_LOCATION] = offset - extrasz return endrec @@ -358,7 +372,7 @@ def _EndRecData(fpin): endrec.append(filesize - sizeEndCentDir) # Try to read the "Zip64 end of central directory" structure - return _EndRecData64(fpin, -sizeEndCentDir, endrec) + return _EndRecData64(fpin, filesize - sizeEndCentDir, endrec) # Either this is not a ZIP file, or it is a ZIP file with an archive # comment. Search the end of the file for the "end of central directory" @@ -382,8 +396,7 @@ def _EndRecData(fpin): endrec.append(maxCommentStart + start) # Try to read the "Zip64 end of central directory" structure - return _EndRecData64(fpin, maxCommentStart + start - filesize, - endrec) + return _EndRecData64(fpin, maxCommentStart + start, endrec) # Unable to find a valid end of central directory structure return None @@ -2142,7 +2155,7 @@ def _write_end_record(self): " would require ZIP64 extensions") zip64endrec = struct.pack( structEndArchive64, stringEndArchive64, - 44, 45, 45, 0, 0, centDirCount, centDirCount, + sizeEndCentDir64 - 12, 45, 45, 0, 0, centDirCount, centDirCount, centDirSize, centDirOffset) self.fp.write(zip64endrec) diff --git a/Misc/NEWS.d/next/Security/2025-10-07-19-31-34.gh-issue-139700.vNHU1O.rst b/Misc/NEWS.d/next/Security/2025-10-07-19-31-34.gh-issue-139700.vNHU1O.rst new file mode 100644 index 00000000000000..a8e7a1f1878c6b --- /dev/null +++ b/Misc/NEWS.d/next/Security/2025-10-07-19-31-34.gh-issue-139700.vNHU1O.rst @@ -0,0 +1,3 @@ +Check consistency of the zip64 end of central directory record. Support +records with "zip64 extensible data" if there are no bytes prepended to the +ZIP file.