From c0f2302a29a5625f80c4777862fd8cfa3600c1b7 Mon Sep 17 00:00:00 2001 From: gchqdev227 <62302861+gchqdev227@users.noreply.github.com> Date: Mon, 9 Dec 2024 11:59:20 +0000 Subject: [PATCH 1/7] Allowed mtime to be passed as a datetime --- Lib/gzip.py | 7 +++++++ Lib/test/test_gzip.py | 23 +++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/Lib/gzip.py b/Lib/gzip.py index 1a3c82ce7e0711..e43c6aef0a6da8 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -5,6 +5,7 @@ # based on Andrew Kuchling's minigzip.py distributed with the zlib module +from datetime import datetime import struct, sys, time, os import zlib import builtins @@ -224,6 +225,8 @@ def __init__(self, filename=None, mode=None, -zlib.MAX_WBITS, zlib.DEF_MEM_LEVEL, 0) + if isinstance(mtime, datetime): + mtime = mtime.timestamp() self._write_mtime = mtime self._buffer_size = _WRITE_BUFFER_SIZE self._buffer = io.BufferedWriter(_WriteBufferStream(self), @@ -278,6 +281,8 @@ def _write_gzip_header(self, compresslevel): mtime = self._write_mtime if mtime is None: mtime = time.time() + elif isinstance(mtime, datetime): + mtime = mtime.timestamp() write32u(self.fileobj, int(mtime)) if compresslevel == _COMPRESS_LEVEL_BEST: xfl = b'\002' @@ -591,6 +596,8 @@ def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=0): gzip_data = zlib.compress(data, level=compresslevel, wbits=31) if mtime is None: mtime = time.time() + elif isinstance(mtime, datetime): + mtime = mtime.timestamp() # Reuse gzip header created by zlib, replace mtime and OS byte for # consistency. header = struct.pack("<4sLBB", gzip_data, int(mtime), gzip_data[8], 255) diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py index bf6e1703db8451..c93e9a0f28094c 100644 --- a/Lib/test/test_gzip.py +++ b/Lib/test/test_gzip.py @@ -8,6 +8,7 @@ import struct import sys import unittest +from datetime import datetime, timezone from subprocess import PIPE, Popen from test.support import import_helper from test.support import os_helper @@ -316,6 +317,17 @@ def test_mtime(self): self.assertEqual(dataRead, data1) self.assertEqual(fRead.mtime, mtime) + def test_mtime_as_datetime(self): + mtime = datetime(1973, 11, 29, 21, 33, 9, tzinfo=timezone.utc) + with gzip.GzipFile(self.filename, 'w', mtime = mtime) as fWrite: + fWrite.write(data1) + with gzip.GzipFile(self.filename) as fRead: + self.assertTrue(hasattr(fRead, 'mtime')) + self.assertIsNone(fRead.mtime) + dataRead = fRead.read() + self.assertEqual(dataRead, data1) + self.assertEqual(fRead.mtime, int(mtime.timestamp())) + def test_metadata(self): mtime = 123456789 @@ -713,6 +725,17 @@ def test_compress_mtime(self): f.read(1) # to set mtime attribute self.assertEqual(f.mtime, mtime) + def test_compress_mtime_as_datetime(self): + mtime = datetime(1973, 11, 29, 21, 33, 9, tzinfo=timezone.utc) + for data in [data1, data2]: + for args in [(), (1,), (6,), (9,)]: + with self.subTest(data=data, args=args): + datac = gzip.compress(data, *args, mtime=mtime) + self.assertEqual(type(datac), bytes) + with gzip.GzipFile(fileobj=io.BytesIO(datac), mode="rb") as f: + f.read(1) # to set mtime attribute + self.assertEqual(f.mtime, int(mtime.timestamp())) + def test_compress_mtime_default(self): # test for gh-125260 datac = gzip.compress(data1, mtime=0) From 0e565bcf6c7733f6ffc37f94d7beb544379c2b14 Mon Sep 17 00:00:00 2001 From: gchqdev227 <62302861+gchqdev227@users.noreply.github.com> Date: Tue, 7 Jan 2025 14:08:48 +0000 Subject: [PATCH 2/7] GzipFile now stores mtime internally as a datetime object --- Lib/gzip.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Lib/gzip.py b/Lib/gzip.py index e43c6aef0a6da8..12ea212135fbda 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -5,7 +5,7 @@ # based on Andrew Kuchling's minigzip.py distributed with the zlib module -from datetime import datetime +from datetime import datetime, timezone import struct, sys, time, os import zlib import builtins @@ -225,8 +225,6 @@ def __init__(self, filename=None, mode=None, -zlib.MAX_WBITS, zlib.DEF_MEM_LEVEL, 0) - if isinstance(mtime, datetime): - mtime = mtime.timestamp() self._write_mtime = mtime self._buffer_size = _WRITE_BUFFER_SIZE self._buffer = io.BufferedWriter(_WriteBufferStream(self), @@ -242,7 +240,8 @@ def __init__(self, filename=None, mode=None, @property def mtime(self): """Last modification time read from stream, or None""" - return self._buffer.raw._last_mtime + mtime = self._buffer.raw._last_mtime + return int(mtime.timestamp()) if mtime is not None else None def __repr__(self): s = repr(self.fileobj) @@ -484,7 +483,7 @@ def _read_gzip_header(fp): break if flag & FHCRC: _read_exact(fp, 2) # Read & discard the 16-bit header CRC - return last_mtime + return datetime.fromtimestamp(last_mtime, tz=timezone.utc) class _GzipReader(_compression.DecompressReader): From 9c24e77c3b8c4829dfbe5b6b1c8f5e0fa0315c47 Mon Sep 17 00:00:00 2001 From: gchqdev227 <62302861+gchqdev227@users.noreply.github.com> Date: Tue, 7 Jan 2025 14:09:17 +0000 Subject: [PATCH 3/7] Added indications in the docs that mtime can be a datetime object --- Doc/library/gzip.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Doc/library/gzip.rst b/Doc/library/gzip.rst index f24e73517e5767..c28143f0fe6d5e 100644 --- a/Doc/library/gzip.rst +++ b/Doc/library/gzip.rst @@ -105,6 +105,10 @@ The module defines the following items: If *mtime* is omitted or ``None``, the current time is used. Use *mtime* = 0 to generate a compressed stream that does not depend on creation time. + .. versionchanged:: 3.14 + The ``mtime`` parameter can now be a :class:`~datetime.datetime` object as well + as a :class:`float`. + See below for the :attr:`mtime` attribute that is set when decompressing. Calling a :class:`GzipFile` object's :meth:`!close` method does not close @@ -209,6 +213,10 @@ The module defines the following items: For the previous behaviour of using the current time, pass ``None`` to *mtime*. + .. versionchanged:: 3.14 + The ``mtime`` parameter can now be a :class:`~datetime.datetime` object as well + as a :class:`float`. + .. function:: decompress(data) Decompress the *data*, returning a :class:`bytes` object containing the From 85f8fa4874aa173ef2fab14f9f4a9c3473d27071 Mon Sep 17 00:00:00 2001 From: gchqdev227 <62302861+gchqdev227@users.noreply.github.com> Date: Tue, 7 Jan 2025 15:32:28 +0000 Subject: [PATCH 4/7] Added news entry --- .../next/Library/2025-01-07-15-31-37.gh-issue-128584.RNjQh2.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-01-07-15-31-37.gh-issue-128584.RNjQh2.rst diff --git a/Misc/NEWS.d/next/Library/2025-01-07-15-31-37.gh-issue-128584.RNjQh2.rst b/Misc/NEWS.d/next/Library/2025-01-07-15-31-37.gh-issue-128584.RNjQh2.rst new file mode 100644 index 00000000000000..9179a93a289705 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-01-07-15-31-37.gh-issue-128584.RNjQh2.rst @@ -0,0 +1,2 @@ +Allow the mtime parameters in gzip.compress and gzip.GzipFile to be datetime +objects. From e12f0217295bcf0477b5c976b531fb6caae66a67 Mon Sep 17 00:00:00 2001 From: gchqdev227 <62302861+gchqdev227@users.noreply.github.com> Date: Thu, 16 Jan 2025 10:13:39 +0000 Subject: [PATCH 5/7] Addressed general triage comments --- Doc/library/gzip.rst | 8 ++++---- .../2025-01-07-15-31-37.gh-issue-128584.RNjQh2.rst | 3 +-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/Doc/library/gzip.rst b/Doc/library/gzip.rst index c28143f0fe6d5e..d38f7eb8e8bf71 100644 --- a/Doc/library/gzip.rst +++ b/Doc/library/gzip.rst @@ -105,8 +105,8 @@ The module defines the following items: If *mtime* is omitted or ``None``, the current time is used. Use *mtime* = 0 to generate a compressed stream that does not depend on creation time. - .. versionchanged:: 3.14 - The ``mtime`` parameter can now be a :class:`~datetime.datetime` object as well + .. versionchanged:: next + The *mtime* parameter can now be a :class:`~datetime.datetime` object as well as a :class:`float`. See below for the :attr:`mtime` attribute that is set when decompressing. @@ -213,8 +213,8 @@ The module defines the following items: For the previous behaviour of using the current time, pass ``None`` to *mtime*. - .. versionchanged:: 3.14 - The ``mtime`` parameter can now be a :class:`~datetime.datetime` object as well + .. versionchanged:: next + The *mtime* parameter can now be a :class:`~datetime.datetime` object as well as a :class:`float`. .. function:: decompress(data) diff --git a/Misc/NEWS.d/next/Library/2025-01-07-15-31-37.gh-issue-128584.RNjQh2.rst b/Misc/NEWS.d/next/Library/2025-01-07-15-31-37.gh-issue-128584.RNjQh2.rst index 9179a93a289705..1dd6925960994f 100644 --- a/Misc/NEWS.d/next/Library/2025-01-07-15-31-37.gh-issue-128584.RNjQh2.rst +++ b/Misc/NEWS.d/next/Library/2025-01-07-15-31-37.gh-issue-128584.RNjQh2.rst @@ -1,2 +1 @@ -Allow the mtime parameters in gzip.compress and gzip.GzipFile to be datetime -objects. +Allow the *mtime* parameters in :func:`gzip.compress` and :class:`gzip.GzipFile` to be :class:`datetime.datetime` objects. From 431c18b4fbe59926c508c38138b6e47a792e6c40 Mon Sep 17 00:00:00 2001 From: gchqdev227 <62302861+gchqdev227@users.noreply.github.com> Date: Thu, 30 Jan 2025 10:11:32 +0000 Subject: [PATCH 6/7] ValueError is now raised if mtime is a naive datetime object --- Doc/library/gzip.rst | 6 ++++-- Lib/gzip.py | 4 ++++ Lib/test/test_gzip.py | 16 ++++++++++++++++ 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/Doc/library/gzip.rst b/Doc/library/gzip.rst index d38f7eb8e8bf71..1a3b094b590447 100644 --- a/Doc/library/gzip.rst +++ b/Doc/library/gzip.rst @@ -107,7 +107,8 @@ The module defines the following items: .. versionchanged:: next The *mtime* parameter can now be a :class:`~datetime.datetime` object as well - as a :class:`float`. + as a :class:`float`. If not :ref:`timezone aware ` then a + :class:`ValueError` will be raised. See below for the :attr:`mtime` attribute that is set when decompressing. @@ -215,7 +216,8 @@ The module defines the following items: .. versionchanged:: next The *mtime* parameter can now be a :class:`~datetime.datetime` object as well - as a :class:`float`. + as a :class:`float`. If not :ref:`timezone aware ` then a + :class:`ValueError` will be raised. .. function:: decompress(data) diff --git a/Lib/gzip.py b/Lib/gzip.py index 12ea212135fbda..17a83a2323818c 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -281,6 +281,8 @@ def _write_gzip_header(self, compresslevel): if mtime is None: mtime = time.time() elif isinstance(mtime, datetime): + if mtime.tzinfo is None: + raise ValueError("Refusing to write naive datetime to Gzip header") mtime = mtime.timestamp() write32u(self.fileobj, int(mtime)) if compresslevel == _COMPRESS_LEVEL_BEST: @@ -596,6 +598,8 @@ def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=0): if mtime is None: mtime = time.time() elif isinstance(mtime, datetime): + if mtime.tzinfo is None: + raise ValueError("Refusing to write naive datetime to Gzip header") mtime = mtime.timestamp() # Reuse gzip header created by zlib, replace mtime and OS byte for # consistency. diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py index c93e9a0f28094c..b5bb88f4c28ba2 100644 --- a/Lib/test/test_gzip.py +++ b/Lib/test/test_gzip.py @@ -328,6 +328,13 @@ def test_mtime_as_datetime(self): self.assertEqual(dataRead, data1) self.assertEqual(fRead.mtime, int(mtime.timestamp())) + def test_mtime_as_datetime_no_timezone(self): + mtime = datetime(1973, 11, 29, 21, 33, 9) + self.assertIsNone(mtime.tzinfo) + with self.assertRaises(ValueError): + with gzip.GzipFile(self.filename, 'w', mtime = mtime) as fWrite: + fWrite.write(data1) + def test_metadata(self): mtime = 123456789 @@ -736,6 +743,15 @@ def test_compress_mtime_as_datetime(self): f.read(1) # to set mtime attribute self.assertEqual(f.mtime, int(mtime.timestamp())) + def test_compress_mtime_as_datetime_no_timezone(self): + mtime = datetime(1973, 11, 29, 21, 33, 9) + self.assertIsNone(mtime.tzinfo) + for data in [data1, data2]: + for args in [(), (1,), (6,), (9,)]: + with self.subTest(data=data, args=args): + with self.assertRaises(ValueError): + gzip.compress(data, *args, mtime=mtime) + def test_compress_mtime_default(self): # test for gh-125260 datac = gzip.compress(data1, mtime=0) From 0ebcf6c6fec0f69f461fe1342a8b999cdfa0bb17 Mon Sep 17 00:00:00 2001 From: gchqdev227 <62302861+gchqdev227@users.noreply.github.com> Date: Thu, 30 Jan 2025 16:17:27 +0000 Subject: [PATCH 7/7] Checking for naive datetime now occurs in GzipFile.__init__ instead of GzipFile._write_gzip_header to avoid file staying open on raising ValueError. --- Lib/gzip.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/gzip.py b/Lib/gzip.py index 17a83a2323818c..0b3c3a6a3236e7 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -225,6 +225,8 @@ def __init__(self, filename=None, mode=None, -zlib.MAX_WBITS, zlib.DEF_MEM_LEVEL, 0) + if isinstance(mtime, datetime) and mtime.tzinfo is None: + raise ValueError("Refusing to write naive datetime to Gzip header") self._write_mtime = mtime self._buffer_size = _WRITE_BUFFER_SIZE self._buffer = io.BufferedWriter(_WriteBufferStream(self), @@ -281,8 +283,6 @@ def _write_gzip_header(self, compresslevel): if mtime is None: mtime = time.time() elif isinstance(mtime, datetime): - if mtime.tzinfo is None: - raise ValueError("Refusing to write naive datetime to Gzip header") mtime = mtime.timestamp() write32u(self.fileobj, int(mtime)) if compresslevel == _COMPRESS_LEVEL_BEST: