Skip to content

Commit

Permalink
Put memory measuring functions in a new module
Browse files Browse the repository at this point in the history
Moved functions related to measuring memory consumption in a separate
module lsst.utils.usage.
  • Loading branch information
mxk62 committed Feb 21, 2022
1 parent 14e8a07 commit a8a791b
Show file tree
Hide file tree
Showing 6 changed files with 190 additions and 67 deletions.
4 changes: 4 additions & 0 deletions mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ ignore_errors = True
ignore_missing_imports = True
ignore_errors = True

[mypy-astropy.*]
ignore_missing_imports = True
ignore_errors = True

[mypy-eups.*]
ignore_missing_imports = True

Expand Down
98 changes: 40 additions & 58 deletions python/lsst/utils/timer.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from __future__ import annotations

__all__ = ["logInfo", "timeMethod", "time_this", "get_mem_usage"]
__all__ = ["logInfo", "timeMethod", "time_this"]

import datetime
import functools
Expand All @@ -36,7 +36,9 @@
Tuple,
)

import psutil
from astropy import units as u

from .usage import get_current_mem_usage, get_peak_mem_usage

if TYPE_CHECKING:
from .logging import LsstLoggers
Expand Down Expand Up @@ -113,34 +115,6 @@ def _find_outside_stacklevel() -> int:
return stacklevel


def get_mem_usage(include_children: bool = False) -> int:
"""Report current memory usage.
Parameters
----------
include_children : `bool`, optional
Flag indicating if memory usage by child processes (if any) should be
included in the report. Be default, only memory usage of the calling
process is reported.
Returns
-------
mem : `int`
Memory usage expressed in bytes.
Notes
-----
Function reports current memory usage using resident set size as a proxy.
As such the value it reports is capped at available physical RAM and may
not reflect the actual memory allocated to the process.
"""
proc = psutil.Process()
usage = proc.memory_info().rss
if include_children:
usage += sum([child.memory_info().rss for child in proc.children()])
return usage


def logPairs(
obj: Any,
pairs: Collection[Tuple[str, Any]],
Expand Down Expand Up @@ -395,7 +369,8 @@ def time_this(
args: Iterable[Any] = (),
mem_usage: bool = False,
mem_child: bool = False,
mem_units: str = "B",
mem_unit: u.Quantity = u.byte,
mem_fmt: str = ".0f",
) -> Iterator[None]:
"""Time the enclosed block and issue a log message.
Expand All @@ -422,8 +397,10 @@ def time_this(
Defaults, to False.
mem_child : `bool`, optional
Flag indication whether to include memory usage of the child processes.
mem_units : {"B", "KB", "MB", "GB"}, optional
Units to use when reporting the memory usage. Defaults to bytes ("B").
mem_unit : `astropy.units.Unit`, optional
Unit to use when reporting the memory usage. Defaults to bytes.
mem_fmt : `str`, optional
Format specifier to use when displaying values related to memory usage.
"""
if log is None:
log = logging.getLogger()
Expand All @@ -434,14 +411,14 @@ def time_this(
success = False
start = time.time()
if mem_usage:
mem_start = get_mem_usage(include_children=mem_child)
current_usages_start = get_current_mem_usage()
peak_usages_start = get_peak_mem_usage()

try:
yield
success = True
finally:
end = time.time()
if mem_usage:
mem_end = get_mem_usage(include_children=mem_child)

# The message is pre-inserted to allow the logger to expand
# the additional args provided. Make that easier by converting
Expand All @@ -456,26 +433,31 @@ def time_this(

# Specify stacklevel to ensure the message is reported from the
# caller (1 is this file, 2 is contextlib, 3 is user)
msg += "%sTook %.4f seconds; "
args += (": " if msg else "", end - start)
if mem_usage:
scales = {
"B": (1.0, "%d"),
"KB": (float(2**10), "%.1f"),
"MB": (float(2**20), "%.2f"),
"GB": (float(2**30), "%.3f"),
}
if mem_units not in scales:
mem_units = "B"
scale, fmt = scales[mem_units]
report = f"%sTook %.4f seconds; memory usage {fmt} {mem_units} (increment: {fmt} {mem_units})"
log.log(
level,
msg + report,
*args,
": " if msg else "",
end - start,
mem_end / scale,
(mem_end - mem_start) / scale,
stacklevel=3,
)
else:
log.log(level, msg + "%sTook %.4f seconds", *args, ": " if msg else "", end - start, stacklevel=3)
if log.isEnabledFor(level):
current_usages_end = get_current_mem_usage()
peak_usages_end = get_peak_mem_usage()

current_deltas = [end - start for end, start in zip(current_usages_end, current_usages_start)]
peak_deltas = [end - start for end, start in zip(peak_usages_end, peak_usages_start)]

current_usage = current_usages_end[0]
current_delta = current_deltas[0]
peak_delta = peak_deltas[0]
if mem_child:
current_usage += current_usages_end[1]
current_delta += current_deltas[1]
peak_delta += peak_deltas[1]

if not mem_unit.is_equivalent(u.byte):
mem_unit = u.byte

msg += "current memory usage %s (delta: %s, peak delta: %s)"
args += (
f"{current_usage.to(mem_unit):{mem_fmt}}",
f"{current_delta.to(mem_unit):{mem_fmt}}",
f"{peak_delta.to(mem_unit):{mem_fmt}}",
)
log.log(level, msg, *args, stacklevel=3)
72 changes: 72 additions & 0 deletions python/lsst/utils/usage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# This file is part of utils.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (https://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# Use of this source code is governed by a 3-clause BSD-style
# license that can be found in the LICENSE file.

"""Utilities for measuring resource consumption.
"""

import platform
import resource
from typing import Tuple

import psutil
from astropy import units as u

__all__ = ["get_current_mem_usage", "get_peak_mem_usage"]


def get_current_mem_usage() -> Tuple[u.Quantity, u.Quantity]:
"""Report current memory usage.
Returns
-------
usage_main : `astropy.units.Quantity`
Current memory usage of the calling process expressed in bytes.
usage_child : `astropy.units.Quantity`
Current memory usage of the child processes (zero if there are none)
expressed in bytes.
Notes
-----
Function reports current memory usage using resident set size as a proxy.
As such the values it reports are capped at available physical RAM and may
not reflect the actual memory allocated to the process and its children.
"""
proc = psutil.Process()
usage_main = proc.memory_info().rss * u.byte
usage_child = sum([child.memory_info().rss for child in proc.children()]) * u.byte
return usage_main, usage_child


def get_peak_mem_usage() -> Tuple[u.Quantity, u.Quantity]:
"""Report peak memory usage.
Returns
-------
peak_main: `astropy.units.Quantity`
Peak memory usage (maximum resident set size) of the calling process,
in bytes.
peak_child: `astropy.units.Quantity`
Peak memory usage (resident set size) of the largest child process,
in bytes.
Notes
-----
Function reports peak memory usage using the maximum resident set size as
a proxy. As such the value it reports is capped at available physical RAM
and may not reflect the actual maximal value.
"""
# Units getrusage(2) uses to report the maximum resident set size are
# platform dependent (kilobytes on Linux, bytes on OSX).
unit = u.kibibyte if platform.system() == "Linux" else u.byte

peak_main = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss * unit
peak_child = resource.getrusage(resource.RUSAGE_CHILDREN).ru_maxrss * unit
return peak_main.to(u.byte), peak_child.to(u.byte)
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ numpy >= 1.17
psutil >= 5.7
deprecated >= 1.2
pyyaml >5.1
astropy >= 5.0
35 changes: 26 additions & 9 deletions tests/test_timer.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import unittest
from dataclasses import dataclass

from astropy import units as u
from lsst.utils.timer import logInfo, logPairs, time_this, timeMethod

log = logging.getLogger("test_timer")
Expand Down Expand Up @@ -196,29 +197,45 @@ def testTimer(self):
self.assertEqual(cm.records[0].levelname, "DEBUG")
self.assertIn("Took", cm.output[0])
self.assertIn("memory", cm.output[0])
self.assertIn("increment", cm.output[0])
self.assertIn("delta", cm.output[0])
self.assertIn("peak delta", cm.output[0])
self.assertIn("byte", cm.output[0])

# Report memory usage, use different, but known memory units.
# Report memory usage including child processes.
with self.assertLogs(level="DEBUG") as cm:
with time_this(level=logging.DEBUG, prefix=None, mem_usage=True, mem_units="GB"):
with time_this(level=logging.DEBUG, prefix=None, mem_usage=True, mem_child=True):
pass
self.assertEqual(cm.records[0].name, "root")
self.assertEqual(cm.records[0].levelname, "DEBUG")
self.assertIn("Took", cm.output[0])
self.assertIn("memory", cm.output[0])
self.assertIn("increment", cm.output[0])
self.assertIn(" GB", cm.output[0])
self.assertIn("delta", cm.output[0])
self.assertIn("peak delta", cm.output[0])
self.assertIn("byte", cm.output[0])

# Report memory usage, use unknown memory units.
# Report memory usage, use non-default, but a valid memory unit.
with self.assertLogs(level="DEBUG") as cm:
with time_this(level=logging.DEBUG, prefix=None, mem_usage=True, mem_units="foo"):
with time_this(level=logging.DEBUG, prefix=None, mem_usage=True, mem_unit=u.kilobyte):
pass
self.assertEqual(cm.records[0].name, "root")
self.assertEqual(cm.records[0].levelname, "DEBUG")
self.assertIn("Took", cm.output[0])
self.assertIn("memory", cm.output[0])
self.assertIn("increment", cm.output[0])
self.assertIn(" B", cm.output[0])
self.assertIn("delta", cm.output[0])
self.assertIn("peak delta", cm.output[0])
self.assertIn("kbyte", cm.output[0])

# Report memory usage, use an invalid memory unit.
with self.assertLogs(level="DEBUG") as cm:
with time_this(level=logging.DEBUG, prefix=None, mem_usage=True, mem_unit=u.gram):
pass
self.assertEqual(cm.records[0].name, "root")
self.assertEqual(cm.records[0].levelname, "DEBUG")
self.assertIn("Took", cm.output[0])
self.assertIn("memory", cm.output[0])
self.assertIn("delta", cm.output[0])
self.assertIn("peak delta", cm.output[0])
self.assertIn("byte", cm.output[0])

# Change logging level
with self.assertLogs(level="INFO") as cm:
Expand Down
47 changes: 47 additions & 0 deletions tests/test_usage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# This file is part of utils.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (https://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# Use of this source code is governed by a 3-clause BSD-style
# license that can be found in the LICENSE file.

import unittest

from astropy import units as u
from lsst.utils.usage import get_current_mem_usage, get_peak_mem_usage


class UsageTestCase(unittest.TestCase):
def testGetCurrentMemUsage(self):
main1, children1 = get_current_mem_usage()
self.assertGreater(main1, 0 * u.byte)
self.assertGreaterEqual(children1.value, 0)

self.assertEqual(main1.unit.name, "byte")
self.assertEqual(children1.unit.name, "byte")

# Allocate some memory.
arr = [None] * 1_000_000 # noqa: F841

main2, children2 = get_current_mem_usage()
self.assertGreater(main2, main1)
self.assertGreaterEqual(children2, children1)

def testGetPeakMemUsage(self):
main1, child1 = get_peak_mem_usage()
self.assertGreater(main1, 0 * u.byte)
self.assertGreaterEqual(child1, 0 * u.byte)

self.assertEqual(main1.unit.name, "byte")
self.assertEqual(child1.unit.name, "byte")

# Allocate some memory.
arr = [None] * 2_000_000 # noqa: F841

main2, child2 = get_peak_mem_usage()
self.assertGreater(main2, main1)
self.assertGreaterEqual(child2, child1)

0 comments on commit a8a791b

Please sign in to comment.