reframe-hpc · vkarak · Aug 23, 2021 · Jun 28, 2021 · Jun 30, 2021 · Jun 30, 2021
diff --git a/cscs-checks/microbenchmarks/gpu/gpu_burn/gpu_burn_test.py b/cscs-checks/microbenchmarks/gpu/gpu_burn/gpu_burn_test.py
@@ -3,10 +3,11 @@
 #
 # SPDX-License-Identifier: BSD-3-Clause
 
-import os
 
 import reframe as rfm
 import reframe.utility.sanity as sn
+import reframe.utility.osext as osext
+from reframe.core.exceptions import SanityError
 
 from hpctestlib.microbenchmarks.gpu.gpu_burn import GpuBurn
 import cscstests.microbenchmarks.gpu.hooks as hooks
@@ -24,30 +25,29 @@ class gpu_burn_check(GpuBurn):
     num_tasks = 0
     reference = {
         'dom:gpu': {
-            'perf': (4115, -0.10, None, 'Gflop/s'),
+            'min_perf': (4115, -0.10, None, 'Gflop/s'),
         },
         'daint:gpu': {
-            'perf': (4115, -0.10, None, 'Gflop/s'),
+            'min_perf': (4115, -0.10, None, 'Gflop/s'),
         },
         'arolla:cn': {
-            'perf': (5861, -0.10, None, 'Gflop/s'),
+            'min_perf': (5861, -0.10, None, 'Gflop/s'),
         },
         'tsa:cn': {
-            'perf': (5861, -0.10, None, 'Gflop/s'),
+            'min_perf': (5861, -0.10, None, 'Gflop/s'),
         },
         'ault:amda100': {
-            'perf': (15000, -0.10, None, 'Gflop/s'),
+            'min_perf': (15000, -0.10, None, 'Gflop/s'),
         },
         'ault:amdv100': {
-            'perf': (5500, -0.10, None, 'Gflop/s'),
+            'min_perf': (5500, -0.10, None, 'Gflop/s'),
         },
         'ault:intelv100': {
-            'perf': (5500, -0.10, None, 'Gflop/s'),
+            'min_perf': (5500, -0.10, None, 'Gflop/s'),
         },
         'ault:amdvega': {
-            'perf': (3450, -0.10, None, 'Gflop/s'),
+            'min_perf': (3450, -0.10, None, 'Gflop/s'),
         },
-        '*': {'temp': (0, None, None, 'degC')}
     }
 
     maintainers = ['AJ', 'TM']
@@ -63,16 +63,25 @@ def set_num_gpus_per_node(self):
         hooks.set_num_gpus_per_node(self)
 
     @run_before('performance')
-    def report_nid_with_smallest_flops(self):
-        regex = r'\[(\S+)\] GPU\s+\d\(OK\): (\d+) GF/s'
-        rptf = os.path.join(self.stagedir, sn.evaluate(self.stdout))
-        self.nids = sn.extractall(regex, rptf, 1)
-        self.flops = sn.extractall(regex, rptf, 2, float)
+    def report_slow_nodes(self):
+        '''Report the base perf metrics and also all the slow nodes.'''
+
+        # Only report the nodes that don't meet the perf reference
+        with osext.change_dir(self.stagedir):
+            key = f'{self.current_partition.fullname}:min_perf'
+            if key in self.reference:
+                regex = r'\[(\S+)\] GPU\s+\d\(OK\): (\d+) GF/s'
+                nids = set(sn.extractall(regex, self.stdout, 1))
+
+                # Get the references
+                ref, lt, ut, *_ = self.reference[key]
+
+                # Flag the slow nodes
+                for nid in nids:
+                    try:
+                        node_perf = self.min_perf(nid)
+                        val = node_perf.evaluate(cache=True)
+                        sn.assert_reference(val, ref, lt, ut).evaluate()
+                    except SanityError:
+                        self.perf_variables[nid] = node_perf
 
-        # Find index of smallest flops and update reference dictionary to
-        # include our patched units
-        index = self.flops.evaluate().index(min(self.flops))
-        unit = f'GF/s ({self.nids[index]})'
-        for key, ref in self.reference.items():
-            if not key.endswith(':temp'):
-                self.reference[key] = (*ref[:3], unit)
diff --git a/cscs-checks/system/jobreport/gpu_report.py b/cscs-checks/system/jobreport/gpu_report.py
@@ -93,8 +93,12 @@ def gpu_usage_sanity(self):
             sn.assert_ge(sn.min(time_reported), self.burn_time)
         ])
 
+    @performance_function('nodes')
+    def total_nodes_reported(self):
+        return sn.count(self.nodes_reported)
+
     @run_before('performance')
-    def set_perf_patterns(self):
+    def set_perf_variables(self):
         '''The number of reported nodes can be used as a perf metric.
 
         For now, the low limit can go to zero, but this can be set to a more
@@ -103,9 +107,9 @@ def set_perf_patterns(self):
 
         self.reference = {
             '*': {
-                'nodes_reported': (self.num_tasks, self.perf_floor, 0, 'nodes')
+                'nodes_reported': (self.num_tasks, self.perf_floor, 0)
             },
         }
-        self.perf_patterns = {
-            'nodes_reported': sn.count(self.nodes_reported)
+        self.perf_variables = {
+            'nodes_reported': self.total_nodes_reported()
         }
diff --git a/docs/deferrable_functions_reference.rst b/docs/deferrable_functions_reference.rst
@@ -7,10 +7,20 @@ Deferrable Functions Reference
 *Deferrable functions* are the functions whose execution may be postponed to a later time after they are called.
 The key characteristic of these functions is that they store their arguments when they are called, and the execution itself does not occur until the function is evaluated either explicitly or implicitly.
 
+ReFrame provides an ample set of deferrable utilities and it also allows users to write their own deferrable functions when needed.
+Please refer to ":doc:`deferrables`" for a hands-on explanation on how deferrable functions work and how to create custom deferrable functions.
+
+
 Explicit evaluation of deferrable functions
 -------------------------------------------
 
 Deferrable functions may be evaluated at any time by calling :func:`evaluate` on their return value or by passing the deferred function itself to the :func:`~reframe.utility.sanity.evaluate()` free function.
+These :func:`evaluate` functions take an optional :class:`bool` argument ``cache``, which can be used to cache the evaluation of the deferrable function.
+Hence, if caching is enabled on a given deferrable function, any subsequent calls to :func:`evaluate` will simply return the previously cached results.
+
+.. versionchanged:: 3.8.0
+   Support of cached evaluation is added.
+
 
 Implicit evaluation of deferrable functions
 -------------------------------------------
@@ -48,9 +58,24 @@ Currently ReFrame provides three broad categories of deferrable functions:
    They include, but are not limited to, functions to iterate over regex matches in a file, extracting and converting values from regex matches, computing statistical information on series of data etc.
 
 
-Users can write their own deferrable functions as well.
-The page ":doc:`deferrables`" explains in detail how deferrable functions work and how users can write their own.
+ .. _deferrable-performance-functions:
+
+
+--------------------------------
+Deferrable performance functions
+--------------------------------
+
+.. versionadded:: 3.8.0
+
+Deferrable performance functions are a special type of deferrable functions which are intended for measuring a given quantity.
+Therefore, this kind of deferrable functions have an associated unit that can be used to interpret the return values from these functions.
+The unit of a deferrable performance function can be accessed through the public member :attr:`unit`.
+Regular deferrable functions can be promoted to deferrable performance functions using the :func:`~reframe.utility.sanity.make_performance_function` utility.
+Also, this utility allows to create performance functions directly from any callable.
+
 
+List of deferrable functions and utilities
+------------------------------------------
 
 .. py:decorator:: reframe.utility.sanity.deferrable(func)
 

diff --git a/docs/regression_test_api.rst b/docs/regression_test_api.rst
@@ -322,6 +322,21 @@ Built-in functions
 
   .. versionadded:: 3.7.0
 
+.. py:decorator:: RegressionMixin.performance_function(unit, *, perf_key=None)
+
+   Decorate a member function as a performance function of the test.
+
+   This decorator converts the decorated method into a performance deferrable function (see ":ref:`deferrable-performance-functions`" for more details) whose evaluation is deferred to the performance stage of the regression test.
+   The decorated function must take a single argument without a default value (i.e. ``self``) and any number of arguments with default values.
+   A test may decorate multiple member functions as performance functions, where each of the decorated functions must be provided with the units of the performance quantitites to be extracted from the test.
+   These performance units must be of type :class:`str`.
+   Any performance function may be overridden in a derived class and multiple bases may define their own performance functions.
+   In the event of a name conflict, the derived class will follow Python's `MRO <https://docs.python.org/3/library/stdtypes.html#class.__mro__>`_ to choose the appropriate performance function.
+   However, defining more than one performance function with the same name in the same class is disallowed.
+
+   The full set of performance functions of a regression test is stored under :attr:`~reframe.core.pipeline.RegressionTest.perf_variables` as key-value pairs, where, by default, the key is the name of the decorated member function, and the value is the deferred performance function itself.
+   Optionally, the key under which a performance function is stored in :attr:`~reframe.core.pipeline.RegressionTest.perf_variables` can be customised by passing the desired key as the ``perf_key`` argument to this decorator.
+
 .. py:decorator:: RegressionMixin.deferrable(func)
 
   Converts the decorated method into a deferrable function.

diff --git a/hpctestlib/microbenchmarks/gpu/gpu_burn/__init__.py b/hpctestlib/microbenchmarks/gpu/gpu_burn/__init__.py
@@ -51,12 +51,6 @@ class GpuBurn(rfm.RegressionTest, pin_prefix=True):
     build_system = 'Make'
     executable = './gpu_burn.x'
     num_tasks_per_node = 1
-    reference = {
-        '*': {
-            'perf': (0, None, None, 'Gflop/s'),
-            'temp': (0, None, None, 'degC')
-        }
-    }
 
     @run_before('compile')
     def set_gpu_build(self):
@@ -83,7 +77,6 @@ def set_gpu_build(self):
             raise ValueError('unknown gpu_build option')
 
     @property
-    @deferrable
     def num_tasks_assigned(self):
         '''Total number of times the gpu burn will run.
 
@@ -103,17 +96,27 @@ def count_successful_burns(self):
             r'^\s*\[[^\]]*\]\s*GPU\s*\d+\(OK\)', self.stdout)
         ), self.num_tasks_assigned)
 
-    @run_before('performance')
-    def set_perf_patterns(self):
-        '''Extract the minimum performance and maximum temperature recorded.
+    def _extract_perf_metric(self, metric, nid=None):
+        '''Utility to extract performance metrics.'''
 
-        The performance and temperature data are reported in Gflops/s and
-        deg. Celsius respectively.
-        '''
+        if metric not in {'perf', 'temp'}:
+            raise ValueError(
+                f"unsupported value in 'metric' argument: {metric!r}"
+            )
+
+        if nid is None:
+            nid = r'[^\]]*'
+
+        patt = (rf'^\s*\[{nid}\]\s*GPU\s+\d+\(\S*\):\s+(?P<perf>\S*)\s+GF\/s'
+                rf'\s+(?P<temp>\S*)\s+Celsius')
+        return sn.extractall(patt, self.stdout, metric, float)
+
+    @performance_function('Gflop/s')
+    def min_perf(self, nid=None):
+        '''Lowest performance recorded.'''
+        return sn.min(self._extract_perf_metric('perf', nid))
 
-        patt = (r'^\s*\[[^\]]*\]\s*GPU\s+\d+\(\S*\):\s+(?P<perf>\S*)\s+GF\/s'
-                r'\s+(?P<temp>\S*)\s+Celsius')
-        self.perf_patterns = {
-            'perf': sn.min(sn.extractall(patt, self.stdout, 'perf', float)),
-            'temp': sn.max(sn.extractall(patt, self.stdout, 'temp', float)),
-        }
+    @performance_function('degC')
+    def max_temp(self, nid=None):
+        '''Maximum temperature recorded.'''
+        return sn.max(self._extract_perf_metric('temp', nid))
diff --git a/reframe/core/deferrable.py b/reframe/core/deferrable.py
@@ -44,31 +44,36 @@ def __init__(self, fn, *args, **kwargs):
 
         # We cache the value of the last evaluation inside a tuple.
         # We don't cache the value directly, because it can be any.
-
-        # NOTE: The cache for the moment is only used by
-        # `__rfm_json_encode__`. Enabling caching in the evaluation is a
-        # reasonable optimization, but might break compatibility, so it needs
-        # to be thought thoroughly and communicated properly in the
-        # documentation.
         self._cached = ()
+        self._return_cached = False
+
+    def evaluate(self, cache=False):
+        # Return the cached value (if any)
+        if self._return_cached and not cache:
+            return self._cached[0]
+        elif cache:
+            self._return_cached = cache
 
-    def evaluate(self):
         fn_args = []
         for arg in self._args:
             fn_args.append(
-                arg.evaluate() if isinstance(arg, type(self)) else arg
+                arg.evaluate() if isinstance(arg, _DeferredExpression) else arg
             )
 
         fn_kwargs = {}
         for k, v in self._kwargs.items():
             fn_kwargs[k] = (
-                v.evaluate() if isinstance(v, type(self)) else v
+                v.evaluate() if isinstance(v, _DeferredExpression) else v
             )
 
         ret = self._fn(*fn_args, **fn_kwargs)
-        if isinstance(ret, type(self)):
+
+        # Evaluate the return for as long as a deferred expression returns
+        # another deferred expression.
+        while isinstance(ret, _DeferredExpression):
             ret = ret.evaluate()
 
+        # Cache the results for any subsequent evaluate calls.
         self._cached = (ret,)
         return ret
 
@@ -355,3 +360,34 @@ def __abs__(a):
     @deferrable
     def __invert__(a):
         return ~a
+
+
+class _DeferredPerformanceExpression(_DeferredExpression):
+    '''Represents a performance function whose evaluation has been deferred.
+
+    It extends the :class:`_DeferredExpression` class by adding the ``unit``
+    attribute. This attribute represents the unit of the performance
+    metric to be extracted by the performance function.
+    '''
+
+    def __init__(self, fn, unit, *args, **kwargs):
+        super().__init__(fn, *args, **kwargs)
+
+        if not isinstance(unit, str):
+            raise TypeError(
+                'performance units must be a string'
+            )
+
+        self._unit = unit
+
+    @classmethod
+    def construct_from_deferred_expr(cls, expr, unit):
+        if not isinstance(expr, _DeferredExpression):
+            raise TypeError("'expr' argument is not an instance of the "
+                            "_DeferredExpression class")
+
+        return cls(expr._fn, unit, *(expr._args), **(expr._kwargs))
+
+    @property
+    def unit(self):
+        return self._unit
diff --git a/reframe/core/hooks.py b/reframe/core/hooks.py
@@ -128,25 +128,6 @@ def __repr__(self):
 class HookRegistry:
     '''Global hook registry.'''
 
-    @classmethod
-    def create(cls, namespace):
-        '''Create a hook registry from a class namespace.
-
-        Hook functions have an `_rfm_attach` attribute that specify the stages
-        of the pipeline where they must be attached. Dependencies will be
-        resolved first in the post-setup phase if not assigned elsewhere.
-        '''
-
-        local_hooks = util.OrderedSet()
-        for v in namespace.values():
-            if hasattr(v, '_rfm_attach'):
-                local_hooks.add(Hook(v))
-            elif hasattr(v, '_rfm_resolve_deps'):
-                v._rfm_attach = ['post_setup']
-                local_hooks.add(Hook(v))
-
-        return cls(local_hooks)
-
     def __init__(self, hooks=None):
         self.__hooks = util.OrderedSet()
         if hooks is not None:
@@ -161,6 +142,20 @@ def __getattr__(self, name):
     def __iter__(self):
         return iter(self.__hooks)
 
+    def add(self, v):
+        '''Add value to the hook registry if it meets the conditions.
+
+        Hook functions have an `_rfm_attach` attribute that specify the stages
+        of the pipeline where they must be attached. Dependencies will be
+        resolved first in the post-setup phase if not assigned elsewhere.
+        '''
+
+        if hasattr(v, '_rfm_attach'):
+            self.__hooks.add(Hook(v))
+        elif hasattr(v, '_rfm_resolve_deps'):
+            v._rfm_attach = ['post_setup']
+            self.__hooks.add(Hook(v))
+
     def update(self, hooks, *, denied_hooks=None):
         '''Update the hook registry with the hooks from another hook registry.