Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improvements around test_pidfile_contents #1328

Draft
wants to merge 12 commits into
base: master
Choose a base branch
from
Empty file added newsfragments/3983.minor
Empty file.
Empty file added newsfragments/3984.minor
Empty file.
Empty file added newsfragments/4058.minor
Empty file.
64 changes: 55 additions & 9 deletions src/allmydata/test/cli/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@
from __future__ import annotations

import re
from functools import partial
from sys import float_info
from six.moves import (
StringIO,
)

from hypothesis.strategies import text
from hypothesis import given, assume
from hypothesis.strategies import text, floats, integers, one_of
from hypothesis import given

from testtools.matchers import (
Contains,
Expand Down Expand Up @@ -50,6 +52,18 @@
SyncTestCase,
)

def not_found(pattern: re.Pattern, haystack: str) -> bool:
"""
Determine whether a pattern can be found in a string.

:param pattern: The pattern to search for.
:param haystack: The string to search for the pattern.

:return: True if and only if the pattern is *not* found.
"""
return pattern.search(haystack) is None


class DaemonizeTheRealServiceTests(SyncTestCase):
"""
Tests for ``DaemonizeTheRealService``.
Expand Down Expand Up @@ -264,17 +278,49 @@ def test_non_numeric_pid(self):
self.assertThat(runs, Equals([]))
self.assertThat(result_code, Equals(1))

good_file_content_re = re.compile(r"\w[0-9]*\w[0-9]*\w")
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I greatly elaborated this regular expression and removed some subtle inconsistencies with the real parser but note that apart from getting much longer a significant change here is going from \w ("word character") to \s ("whitespace character") - a pretty dramatic inconsistency, I think.


@given(text())
def test_pidfile_contents(self, content):
good_file_content_re = re.compile(
r"^" # start at the beginning
r"\s*" # any amount of whitespace
r"[1-9][0-9]*" # a pid is a positive integer

r"\s+" # any amount of whitespace separates the two
# fields

r"[0-9]+(\.[0-9]*)?" # a time is a base ten floating point
# representation that's an integer-like sequence
# optionally followed by a decimal point and
# another integer-like sequence. 0.0 is allowed
# so there is no requirement for the integer-like
# sequences to begin with non-zero.

r"\s*" # any amount of whitespace
r"$", # stop at the end

re.MULTILINE, # If the whitespace happens to be
# newline-flavored, deal with it anyway.
)

@given(one_of(
# Search the whole space for an input that will trip us up
text().filter(
# But we're testing the exception path so exclude strings we
# specifically know are valid.
partial(not_found, good_file_content_re),
),
# Also try some more focused strategies where at least the structure
# and one field are valid.
integers(max_value=0).map(lambda p: f"{p} 123.45"),
floats(max_value=-float_info.min).map(lambda t: f"123 {t}"),
))
def test_pidfile_contents(self, content: str) -> None:
"""
invalid contents for a pidfile raise errors
"""
assume(not self.good_file_content_re.match(content))
pidfile = FilePath("pidfile")
pidfile.setContent(content.encode("utf8"))

# Since we've excluded structurally invalid values from the serialized
# string as well as syntactically invalid strings the only possible
# result should be InvalidPidFile.
with self.assertRaises(InvalidPidFile):
with check_pid_process(pidfile):
pass
check_pid_process(pidfile)
55 changes: 40 additions & 15 deletions src/allmydata/util/pid.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,18 @@
"""
Process IDentification-related helpers.
"""

from __future__ import annotations

import psutil
from attrs import frozen

# the docs are a little misleading, but this is either WindowsFileLock
# or UnixFileLock depending upon the platform we're currently on
from filelock import FileLock, Timeout

from twisted.python.filepath import FilePath


class ProcessInTheWay(Exception):
"""
Expand All @@ -23,36 +32,40 @@ class CannotRemovePidFile(Exception):
"""


def _pidfile_to_lockpath(pidfile):
def _pidfile_to_lockpath(pidfile: FilePath) -> FilePath:
"""
internal helper.
:returns FilePath: a path to use for file-locking the given pidfile
:returns: a path to use for file-locking the given pidfile
"""
return pidfile.sibling("{}.lock".format(pidfile.basename()))


def parse_pidfile(pidfile):
def parse_pidfile(pidfile: FilePath) -> tuple[int, float]:
"""
:param FilePath pidfile:
:returns tuple: 2-tuple of pid, creation-time as int, float
:param pidfile: The path to the file to parse.

:return: 2-tuple of pid, creation-time as int, float

:raises InvalidPidFile: on error
"""
with pidfile.open("r") as f:
content = f.read().decode("utf8").strip()
try:
pid, starttime = content.split()
pid = int(pid)
starttime = float(starttime)
pid_str, starttime_str = content.split()
pid = int(pid_str)
starttime = float(starttime_str)
except ValueError:
raise InvalidPidFile(
"found invalid PID file in {}".format(
pidfile
)
)
if pid <= 0 or starttime < 0:
raise InvalidPidFile(f"Found value out of bounds: pid={pid} time={starttime}")
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

even though these values pass through our simple parser above, we know that they are semantically nonsensical so we can exclude them here rather than letting them reach other parts of the system.

return pid, starttime


def check_pid_process(pidfile):
def check_pid_process(pidfile: FilePath) -> None:
"""
If another instance appears to be running already, raise an
exception. Otherwise, write our PID + start time to the pidfile
Expand All @@ -68,7 +81,7 @@ def check_pid_process(pidfile):
# a short timeout is fine, this lock should only be active
# while someone is reading or deleting the pidfile .. and
# facilitates testing the locking itself.
with FileLock(lock_path.path, timeout=2):
with FileLock(_CouldBeBytesPath(lock_path.path), timeout=2):
# check if we have another instance running already
if pidfile.exists():
pid, starttime = parse_pidfile(pidfile)
Expand All @@ -85,7 +98,7 @@ def check_pid_process(pidfile):
except psutil.NoSuchProcess:
print(
"'{pidpath}' refers to {pid} that isn't running".format(
pidpath=pidfile.path,
pidpath=pidfile.asTextMode().path,
pid=pid,
)
)
Expand All @@ -98,23 +111,35 @@ def check_pid_process(pidfile):
f.write("{} {}\n".format(proc.pid, proc.create_time()).encode("utf8"))
except Timeout:
raise ProcessInTheWay(
"Another process is still locking {}".format(pidfile.path)
"Another process is still locking {}".format(pidfile.asTextMode().path)
)


def cleanup_pidfile(pidfile):
def cleanup_pidfile(pidfile: FilePath) -> None:
"""
Remove the pidfile specified (respecting locks). If anything at
all goes wrong, `CannotRemovePidFile` is raised.
"""
lock_path = _pidfile_to_lockpath(pidfile)
with FileLock(lock_path.path):
with FileLock(_CouldBeBytesPath(lock_path.path)):
try:
pidfile.remove()
except Exception as e:
raise CannotRemovePidFile(
"Couldn't remove '{pidfile}': {err}.".format(
pidfile=pidfile.path,
pidfile=pidfile.asTextMode().path,
err=e,
)
)

@frozen
class _CouldBeBytesPath(object):
"""
Sneak bytes past `FileLock.__init__`.

See https://github.com/tox-dev/py-filelock/issues/258
"""
_path: str | bytes

def __fspath__(self) -> str | bytes:
return self._path
Loading