Skip to content

Commit

Permalink
Merge pull request #115 from lsst/tickets/DM-20970
Browse files Browse the repository at this point in the history
DM-20970: Fix units in max_rss field for logInfo
  • Loading branch information
timj committed Feb 28, 2022
2 parents 45db490 + fe22f4a commit 4a0571d
Show file tree
Hide file tree
Showing 5 changed files with 113 additions and 28 deletions.
2 changes: 2 additions & 0 deletions doc/changes/DM-20970.api.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
The values for max resident set size stored in metadata are now consistently reported as bytes.
Previously the units were platform specific (kibibytes on Liux and bytes on macOS).
2 changes: 2 additions & 0 deletions doc/lsst.utils/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,5 @@ Python API reference
:no-main-docstr:
.. automodapi:: lsst.utils.packages
:no-main-docstr:
.. automodapi:: lsst.utils.usage
:no-main-docstr:
33 changes: 16 additions & 17 deletions python/lsst/utils/timer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import functools
import inspect
import logging
import resource
import time
from contextlib import contextmanager
from typing import (
Expand All @@ -38,7 +37,7 @@

from astropy import units as u

from .usage import get_current_mem_usage, get_peak_mem_usage
from .usage import _get_current_rusage, get_current_mem_usage, get_peak_mem_usage

if TYPE_CHECKING:
from .logging import LsstLoggers
Expand Down Expand Up @@ -232,13 +231,18 @@ def logInfo(
timestamps).
- ``CpuTime``: System + User CPU time (seconds). This should only be used
in differential measurements; the time reference point is undefined.
- ``MaxRss``: maximum resident set size.
- ``MaxRss``: maximum resident set size. Always in bytes.
All logged resource information is only for the current process; child
processes are excluded.
The metadata will be updated with a ``__version__`` field to indicate the
version of the items stored. If there is no version number it is assumed
to be version 0.
* Version 0: ``MaxResidentSetSize`` units are platform-dependent.
* Version 1: ``MaxResidentSetSize`` will be stored in bytes.
"""
cpuTime = time.process_time()
res = resource.getrusage(resource.RUSAGE_SELF)
if metadata is None and obj is not None:
try:
metadata = obj.metadata
Expand All @@ -248,24 +252,19 @@ def logInfo(
# Log messages already have timestamps.
utcStr = datetime.datetime.utcnow().isoformat()
_add_to_metadata(metadata, name=prefix + "Utc", value=utcStr)

# Force a version number into the metadata.
# v1: Ensure that max_rss field is always bytes.
metadata["__version__"] = 1
if stacklevel is not None:
# Account for the caller of this routine not knowing that we
# are going one down in the stack.
stacklevel += 1

usage = _get_current_rusage()
logPairs(
obj=obj,
pairs=[
(prefix + "CpuTime", cpuTime),
(prefix + "UserTime", res.ru_utime),
(prefix + "SystemTime", res.ru_stime),
(prefix + "MaxResidentSetSize", int(res.ru_maxrss)),
(prefix + "MinorPageFaults", int(res.ru_minflt)),
(prefix + "MajorPageFaults", int(res.ru_majflt)),
(prefix + "BlockInputs", int(res.ru_inblock)),
(prefix + "BlockOutputs", int(res.ru_oublock)),
(prefix + "VoluntaryContextSwitches", int(res.ru_nvcsw)),
(prefix + "InvoluntaryContextSwitches", int(res.ru_nivcsw)),
],
pairs=[(prefix + k[0].upper() + k[1:], v) for k, v in usage.dict().items()],
logLevel=logLevel,
metadata=metadata,
logger=logger,
Expand Down
102 changes: 91 additions & 11 deletions python/lsst/utils/usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,40 @@
"""Utilities for measuring resource consumption.
"""

__all__ = ["get_current_mem_usage", "get_peak_mem_usage"]

import dataclasses
import platform
import resource
from typing import Tuple
import time
from typing import Dict, Tuple, Union

import astropy.units as u
import psutil
from astropy import units as u

__all__ = ["get_current_mem_usage", "get_peak_mem_usage"]

def _get_rusage_multiplier() -> int:
"""Return the multiplier to use for memory usage returned by getrusage.
Returns
-------
unit : `int`
The multiplier that should be applied to the memory usage numbers
returned by `resource.getrusage` to convert them to bytes.
"""
system = platform.system().lower()
if system == "darwin":
# MacOS uses bytes
return 1
elif "solaris" in system or "sunos" in system:
# Solaris and SunOS use pages
return resource.getpagesize()
else:
# Assume Linux/FreeBSD etc, which use kibibytes
return 1024


_RUSAGE_MEMORY_MULTIPLIER = _get_rusage_multiplier()


def get_current_mem_usage() -> Tuple[u.Quantity, u.Quantity]:
Expand All @@ -40,8 +66,9 @@ def get_current_mem_usage() -> Tuple[u.Quantity, u.Quantity]:
not reflect the actual memory allocated to the process and its children.
"""
proc = psutil.Process()
usage_main = proc.memory_info().rss * u.byte
usage_child = sum([child.memory_info().rss for child in proc.children()]) * u.byte
with proc.oneshot():
usage_main = proc.memory_info().rss * u.byte
usage_child = sum([child.memory_info().rss for child in proc.children()]) * u.byte
return usage_main, usage_child


Expand All @@ -61,10 +88,63 @@ def get_peak_mem_usage() -> Tuple[u.Quantity, u.Quantity]:
a proxy. As such the value it reports is capped at available physical RAM
and may not reflect the actual maximal value.
"""
# Units getrusage(2) uses to report the maximum resident set size are
# platform dependent (kilobytes on Linux, bytes on OSX).
unit = u.kibibyte if platform.system() == "Linux" else u.byte

peak_main = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss * unit
peak_child = resource.getrusage(resource.RUSAGE_CHILDREN).ru_maxrss * unit
peak_main = _get_current_rusage().maxResidentSetSize * u.byte
peak_child = _get_current_rusage(for_children=True).maxResidentSetSize * u.byte
return peak_main, peak_child


@dataclasses.dataclass(frozen=True)
class _UsageInfo:
"""Summary of process usage."""

cpuTime: float
"""CPU time in seconds."""
userTime: float
"""User time in seconds."""
systemTime: float
"""System time in seconds."""
maxResidentSetSize: int
"""Maximum resident set size in bytes."""
minorPageFaults: int
majorPageFaults: int
blockInputs: int
blockOutputs: int
voluntaryContextSwitches: int
involuntaryContextSwitches: int

def dict(self) -> Dict[str, Union[float, int]]:
return dataclasses.asdict(self)


def _get_current_rusage(for_children: bool = False) -> _UsageInfo:
"""Get information about this (or the child) process.
Parameters
----------
for_children : `bool`, optional
Whether the information should be requested for child processes.
Default is for the current process.
Returns
-------
info : `_UsageInfo`
The information obtained from the process.
"""
who = resource.RUSAGE_CHILDREN if for_children else resource.RUSAGE_SELF
res = resource.getrusage(who)

# Convert the memory usage to bytes.
max_rss = res.ru_maxrss * _RUSAGE_MEMORY_MULTIPLIER

return _UsageInfo(
cpuTime=time.process_time(),
userTime=res.ru_utime,
systemTime=res.ru_stime,
maxResidentSetSize=max_rss,
minorPageFaults=res.ru_minflt,
majorPageFaults=res.ru_majflt,
blockInputs=res.ru_inblock,
blockOutputs=res.ru_oublock,
voluntaryContextSwitches=res.ru_nvcsw,
involuntaryContextSwitches=res.ru_nivcsw,
)
2 changes: 2 additions & 0 deletions tests/test_timer.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ def testLogInfo(self):
logInfo(None, prefix="Prefix", metadata=metadata, logger=logger, logLevel=logging.INFO)
self.assertEqual(cm.records[0].filename, THIS_FILE)
self.assertIn("PrefixUtc", metadata)
self.assertIn("PrefixMaxResidentSetSize", metadata)
self.assertEqual(metadata["__version__"], 1)

# Again with no log output.
logInfo(None, prefix="Prefix", metadata=metadata)
Expand Down

0 comments on commit 4a0571d

Please sign in to comment.