Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-115809: Improve TimedRotatingFileHandler.getFilesToDelete() #115812

Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
43 changes: 16 additions & 27 deletions Lib/logging/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,19 +232,19 @@ def __init__(self, filename, when='h', interval=1, backupCount=0,
if self.when == 'S':
self.interval = 1 # one second
self.suffix = "%Y-%m-%d_%H-%M-%S"
self.extMatch = r"^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}(\.\w+)?$"
extMatch = r"(?<!\d)\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}(?!\d)"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are the negative lookaheads just to capture the preceding and following parts around the date-time suffix? Please add a comment about why they're used. Otherwise, looks good, thanks!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They are not capturing. They are to ensure that the suffix is not preceded or followed by digits, that decreases the chance of accident wrong matches. In "12024-03-2024-03-01", it should not match "12024-03-2024-03-01", but "12024-03-2024-03-01". This code no longer requires that the datetime suffix is separated by dots from other parts, "foo2024-03-01bar.log" is now supported. Although, it is merely an optimization, because the following check filters out false matches.

I am not sure what kind of comment is needed here.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure what kind of comment is needed here.

Just what you said in your comment above, nothing more.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. Does it look good to you?

elif self.when == 'M':
self.interval = 60 # one minute
self.suffix = "%Y-%m-%d_%H-%M"
self.extMatch = r"^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}(\.\w+)?$"
extMatch = r"(?<!\d)\d{4}-\d{2}-\d{2}_\d{2}-\d{2}(?!\d)"
elif self.when == 'H':
self.interval = 60 * 60 # one hour
self.suffix = "%Y-%m-%d_%H"
self.extMatch = r"^\d{4}-\d{2}-\d{2}_\d{2}(\.\w+)?$"
extMatch = r"(?<!\d)\d{4}-\d{2}-\d{2}_\d{2}(?!\d)"
elif self.when == 'D' or self.when == 'MIDNIGHT':
self.interval = 60 * 60 * 24 # one day
self.suffix = "%Y-%m-%d"
self.extMatch = r"^\d{4}-\d{2}-\d{2}(\.\w+)?$"
extMatch = r"(?<!\d)\d{4}-\d{2}-\d{2}(?!\d)"
elif self.when.startswith('W'):
self.interval = 60 * 60 * 24 * 7 # one week
if len(self.when) != 2:
Expand All @@ -253,11 +253,11 @@ def __init__(self, filename, when='h', interval=1, backupCount=0,
raise ValueError("Invalid day specified for weekly rollover: %s" % self.when)
self.dayOfWeek = int(self.when[1])
self.suffix = "%Y-%m-%d"
self.extMatch = r"^\d{4}-\d{2}-\d{2}(\.\w+)?$"
extMatch = r"(?<!\d)\d{4}-\d{2}-\d{2}(?!\d)"
else:
raise ValueError("Invalid rollover interval specified: %s" % self.when)

self.extMatch = re.compile(self.extMatch, re.ASCII)
self.extMatch = re.compile(extMatch, re.ASCII)
self.interval = self.interval * interval # multiply by units requested
# The following line added because the filename passed in could be a
# path object (see Issue #27493), but self.baseFilename will be a string
Expand Down Expand Up @@ -375,31 +375,20 @@ def getFilesToDelete(self):
for fileName in fileNames:
if fileName[:plen] == prefix:
suffix = fileName[plen:]
if self.extMatch.match(suffix):
if self.extMatch.fullmatch(suffix):
result.append(os.path.join(dirName, fileName))
else:
# See bpo-44753: Don't use the extension when computing the prefix.
n, e = os.path.splitext(baseName)
prefix = n + '.'
plen = len(prefix)
for fileName in fileNames:
# Our files could be just about anything after custom naming, but
# likely candidates are of the form
# foo.log.DATETIME_SUFFIX or foo.DATETIME_SUFFIX.log
if (not fileName.startswith(baseName) and fileName.endswith(e) and
len(fileName) > (plen + 1) and not fileName[plen+1].isdigit()):
continue
# Our files could be just about anything after custom naming,
# they should contain the datetime suffix.
m = self.extMatch.search(fileName)
while m:
dfn = self.namer(self.baseFilename + "." + m[0])
if os.path.basename(dfn) == fileName:
result.append(os.path.join(dirName, fileName))
break
m = self.extMatch.search(fileName, m.start() + 1)

if fileName[:plen] == prefix:
suffix = fileName[plen:]
# See bpo-45628: The date/time suffix could be anywhere in the
# filename

parts = suffix.split('.')
for part in parts:
if self.extMatch.match(part):
result.append(os.path.join(dirName, fileName))
break
if len(result) < self.backupCount:
result = []
else:
Expand Down
37 changes: 28 additions & 9 deletions Lib/test/test_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -6154,7 +6154,7 @@ def test_compute_files_to_delete(self):
for i in range(10):
times.append(dt.strftime('%Y-%m-%d_%H-%M-%S'))
dt += datetime.timedelta(seconds=5)
prefixes = ('a.b', 'a.b.c', 'd.e', 'd.e.f')
prefixes = ('a.b', 'a.b.c', 'd.e', 'd.e.f', 'g')
files = []
rotators = []
for prefix in prefixes:
Expand All @@ -6167,10 +6167,22 @@ def test_compute_files_to_delete(self):
if prefix.startswith('a.b'):
for t in times:
files.append('%s.log.%s' % (prefix, t))
else:
rotator.namer = lambda name: name.replace('.log', '') + '.log'
elif prefix.startswith('d.e'):
def namer(filename):
dirname, basename = os.path.split(filename)
basename = basename.replace('.log', '') + '.log'
return os.path.join(dirname, basename)
rotator.namer = namer
for t in times:
files.append('%s.%s.log' % (prefix, t))
elif prefix == 'g':
def namer(filename):
dirname, basename = os.path.split(filename)
basename = 'g' + basename[6:] + '.oldlog'
return os.path.join(dirname, basename)
rotator.namer = namer
for t in times:
files.append('g%s.oldlog' % t)
# Create empty files
for fn in files:
p = os.path.join(wd, fn)
Expand All @@ -6180,18 +6192,23 @@ def test_compute_files_to_delete(self):
for i, prefix in enumerate(prefixes):
rotator = rotators[i]
candidates = rotator.getFilesToDelete()
self.assertEqual(len(candidates), 3)
self.assertEqual(len(candidates), 3, candidates)
if prefix.startswith('a.b'):
p = '%s.log.' % prefix
for c in candidates:
d, fn = os.path.split(c)
self.assertTrue(fn.startswith(p))
else:
elif prefix.startswith('d.e'):
for c in candidates:
d, fn = os.path.split(c)
self.assertTrue(fn.endswith('.log'))
self.assertTrue(fn.endswith('.log'), fn)
self.assertTrue(fn.startswith(prefix + '.') and
fn[len(prefix) + 2].isdigit())
elif prefix == 'g':
for c in candidates:
d, fn = os.path.split(c)
self.assertTrue(fn.endswith('.oldlog'))
self.assertTrue(fn.startswith('g') and fn[1].isdigit())

def test_compute_files_to_delete_same_filename_different_extensions(self):
# See GH-93205 for background
Expand All @@ -6215,6 +6232,8 @@ def test_compute_files_to_delete_same_filename_different_extensions(self):
rotators.append(rotator)
for t in times:
files.append('%s.%s' % (prefix, t))
for t in times:
files.append('a.log.%s.c' % t)
# Create empty files
for f in files:
(wd / f).touch()
Expand All @@ -6223,11 +6242,11 @@ def test_compute_files_to_delete_same_filename_different_extensions(self):
backupCount = i+1
rotator = rotators[i]
candidates = rotator.getFilesToDelete()
self.assertEqual(len(candidates), n_files - backupCount)
matcher = re.compile(r"^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}(\.\w+)?$")
self.assertEqual(len(candidates), n_files - backupCount, candidates)
matcher = re.compile(r"^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}\Z")
for c in candidates:
d, fn = os.path.split(c)
self.assertTrue(fn.startswith(prefix))
self.assertTrue(fn.startswith(prefix+'.'))
suffix = fn[(len(prefix)+1):]
self.assertRegex(suffix, matcher)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Improve algorithm for computing which rolled-over log files to delete in
:class:`logging.TimedRotatingFileHandler`. It is now reliable for handlers
without ``namer`` and with arbitrary deterministic ``namer`` that leaves the
datetime part in the file name unmodified.