Skip to content

Commit

Permalink
[3.11] gh-115809: Improve TimedRotatingFileHandler.getFilesToDelete() (
Browse files Browse the repository at this point in the history
…GH-115812) (GH-116262)

Improve algorithm for computing which rolled-over log files to delete
in logging.TimedRotatingFileHandler. It is now reliable for handlers
without namer and with arbitrary deterministic namer that leaves
the datetime part in the file name unmodified.
(cherry picked from commit 87faec2)

Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
  • Loading branch information
miss-islington and serhiy-storchaka committed Mar 3, 2024
1 parent 82e7692 commit d3756ed
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 36 deletions.
51 changes: 24 additions & 27 deletions Lib/logging/handlers.py
Expand Up @@ -232,19 +232,19 @@ def __init__(self, filename, when='h', interval=1, backupCount=0,
if self.when == 'S':
self.interval = 1 # one second
self.suffix = "%Y-%m-%d_%H-%M-%S"
self.extMatch = r"^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}(\.\w+)?$"
extMatch = r"(?<!\d)\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}(?!\d)"
elif self.when == 'M':
self.interval = 60 # one minute
self.suffix = "%Y-%m-%d_%H-%M"
self.extMatch = r"^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}(\.\w+)?$"
extMatch = r"(?<!\d)\d{4}-\d{2}-\d{2}_\d{2}-\d{2}(?!\d)"
elif self.when == 'H':
self.interval = 60 * 60 # one hour
self.suffix = "%Y-%m-%d_%H"
self.extMatch = r"^\d{4}-\d{2}-\d{2}_\d{2}(\.\w+)?$"
extMatch = r"(?<!\d)\d{4}-\d{2}-\d{2}_\d{2}(?!\d)"
elif self.when == 'D' or self.when == 'MIDNIGHT':
self.interval = 60 * 60 * 24 # one day
self.suffix = "%Y-%m-%d"
self.extMatch = r"^\d{4}-\d{2}-\d{2}(\.\w+)?$"
extMatch = r"(?<!\d)\d{4}-\d{2}-\d{2}(?!\d)"
elif self.when.startswith('W'):
self.interval = 60 * 60 * 24 * 7 # one week
if len(self.when) != 2:
Expand All @@ -253,11 +253,17 @@ def __init__(self, filename, when='h', interval=1, backupCount=0,
raise ValueError("Invalid day specified for weekly rollover: %s" % self.when)
self.dayOfWeek = int(self.when[1])
self.suffix = "%Y-%m-%d"
self.extMatch = r"^\d{4}-\d{2}-\d{2}(\.\w+)?$"
extMatch = r"(?<!\d)\d{4}-\d{2}-\d{2}(?!\d)"
else:
raise ValueError("Invalid rollover interval specified: %s" % self.when)

self.extMatch = re.compile(self.extMatch, re.ASCII)
# extMatch is a pattern for matching a datetime suffix in a file name.
# After custom naming, it is no longer guaranteed to be separated by
# periods from other parts of the filename. The lookup statements
# (?<!\d) and (?!\d) ensure that the datetime suffix (which itself
# starts and ends with digits) is not preceded or followed by digits.
# This reduces the number of false matches and improves performance.
self.extMatch = re.compile(extMatch, re.ASCII)
self.interval = self.interval * interval # multiply by units requested
# The following line added because the filename passed in could be a
# path object (see Issue #27493), but self.baseFilename will be a string
Expand Down Expand Up @@ -376,31 +382,22 @@ def getFilesToDelete(self):
for fileName in fileNames:
if fileName[:plen] == prefix:
suffix = fileName[plen:]
if self.extMatch.match(suffix):
if self.extMatch.fullmatch(suffix):
result.append(os.path.join(dirName, fileName))
else:
# See bpo-44753: Don't use the extension when computing the prefix.
n, e = os.path.splitext(baseName)
prefix = n + '.'
plen = len(prefix)
for fileName in fileNames:
# Our files could be just about anything after custom naming, but
# likely candidates are of the form
# foo.log.DATETIME_SUFFIX or foo.DATETIME_SUFFIX.log
if (not fileName.startswith(baseName) and fileName.endswith(e) and
len(fileName) > (plen + 1) and not fileName[plen+1].isdigit()):
continue
# Our files could be just about anything after custom naming,
# but they should contain the datetime suffix.
# Try to find the datetime suffix in the file name and verify
# that the file name can be generated by this handler.
m = self.extMatch.search(fileName)
while m:
dfn = self.namer(self.baseFilename + "." + m[0])
if os.path.basename(dfn) == fileName:
result.append(os.path.join(dirName, fileName))
break
m = self.extMatch.search(fileName, m.start() + 1)

if fileName[:plen] == prefix:
suffix = fileName[plen:]
# See bpo-45628: The date/time suffix could be anywhere in the
# filename

parts = suffix.split('.')
for part in parts:
if self.extMatch.match(part):
result.append(os.path.join(dirName, fileName))
break
if len(result) < self.backupCount:
result = []
else:
Expand Down
37 changes: 28 additions & 9 deletions Lib/test/test_logging.py
Expand Up @@ -5834,7 +5834,7 @@ def test_compute_files_to_delete(self):
for i in range(10):
times.append(dt.strftime('%Y-%m-%d_%H-%M-%S'))
dt += datetime.timedelta(seconds=5)
prefixes = ('a.b', 'a.b.c', 'd.e', 'd.e.f')
prefixes = ('a.b', 'a.b.c', 'd.e', 'd.e.f', 'g')
files = []
rotators = []
for prefix in prefixes:
Expand All @@ -5847,10 +5847,22 @@ def test_compute_files_to_delete(self):
if prefix.startswith('a.b'):
for t in times:
files.append('%s.log.%s' % (prefix, t))
else:
rotator.namer = lambda name: name.replace('.log', '') + '.log'
elif prefix.startswith('d.e'):
def namer(filename):
dirname, basename = os.path.split(filename)
basename = basename.replace('.log', '') + '.log'
return os.path.join(dirname, basename)
rotator.namer = namer
for t in times:
files.append('%s.%s.log' % (prefix, t))
elif prefix == 'g':
def namer(filename):
dirname, basename = os.path.split(filename)
basename = 'g' + basename[6:] + '.oldlog'
return os.path.join(dirname, basename)
rotator.namer = namer
for t in times:
files.append('g%s.oldlog' % t)
# Create empty files
for fn in files:
p = os.path.join(wd, fn)
Expand All @@ -5860,18 +5872,23 @@ def test_compute_files_to_delete(self):
for i, prefix in enumerate(prefixes):
rotator = rotators[i]
candidates = rotator.getFilesToDelete()
self.assertEqual(len(candidates), 3)
self.assertEqual(len(candidates), 3, candidates)
if prefix.startswith('a.b'):
p = '%s.log.' % prefix
for c in candidates:
d, fn = os.path.split(c)
self.assertTrue(fn.startswith(p))
else:
elif prefix.startswith('d.e'):
for c in candidates:
d, fn = os.path.split(c)
self.assertTrue(fn.endswith('.log'))
self.assertTrue(fn.endswith('.log'), fn)
self.assertTrue(fn.startswith(prefix + '.') and
fn[len(prefix) + 2].isdigit())
elif prefix == 'g':
for c in candidates:
d, fn = os.path.split(c)
self.assertTrue(fn.endswith('.oldlog'))
self.assertTrue(fn.startswith('g') and fn[1].isdigit())

def test_compute_files_to_delete_same_filename_different_extensions(self):
# See GH-93205 for background
Expand All @@ -5895,6 +5912,8 @@ def test_compute_files_to_delete_same_filename_different_extensions(self):
rotators.append(rotator)
for t in times:
files.append('%s.%s' % (prefix, t))
for t in times:
files.append('a.log.%s.c' % t)
# Create empty files
for f in files:
(wd / f).touch()
Expand All @@ -5903,11 +5922,11 @@ def test_compute_files_to_delete_same_filename_different_extensions(self):
backupCount = i+1
rotator = rotators[i]
candidates = rotator.getFilesToDelete()
self.assertEqual(len(candidates), n_files - backupCount)
matcher = re.compile(r"^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}(\.\w+)?$")
self.assertEqual(len(candidates), n_files - backupCount, candidates)
matcher = re.compile(r"^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}\Z")
for c in candidates:
d, fn = os.path.split(c)
self.assertTrue(fn.startswith(prefix))
self.assertTrue(fn.startswith(prefix+'.'))
suffix = fn[(len(prefix)+1):]
self.assertRegex(suffix, matcher)

Expand Down
@@ -0,0 +1,4 @@
Improve algorithm for computing which rolled-over log files to delete in
:class:`logging.TimedRotatingFileHandler`. It is now reliable for handlers
without ``namer`` and with arbitrary deterministic ``namer`` that leaves the
datetime part in the file name unmodified.

0 comments on commit d3756ed

Please sign in to comment.