diff --git a/cscs-checks/apps/jupyter/check_ipcmagic.py b/cscs-checks/apps/jupyter/check_ipcmagic.py index 1596c54ad6..40a5d3fc1b 100644 --- a/cscs-checks/apps/jupyter/check_ipcmagic.py +++ b/cscs-checks/apps/jupyter/check_ipcmagic.py @@ -11,55 +11,79 @@ @rfm.simple_test class IPCMagicCheck(rfm.RunOnlyRegressionTest): - def __init__(self): - self.descr = 'Distributed training with TensorFlow using ipyparallel' - self.valid_systems = ['daint:gpu', 'dom:gpu'] - self.valid_prog_environs = ['PrgEnv-gnu'] - self.modules = [ - # FIXME: Use the default ipcmagic version when fixed - f'ipcmagic/0.1-CrayGNU-{osext.cray_cdt_version()}', - f'Horovod/0.21.0-CrayGNU-{osext.cray_cdt_version()}-tf-2.4.0' - ] - self.num_tasks = 2 - self.num_tasks_per_node = 1 - self.executable = 'ipython' - self.executable_opts = ['tf-hvd-sgd-ipc-tf2.py'] - nids = sn.extractall(r'nid(?P\d+)', - self.stdout, 'nid', str) - self.sanity_patterns = sn.all([ - sn.assert_ne(nids, []), - sn.assert_ne(nids[0], nids[1]) - ]) - self.reference = { - 'daint:gpu': { - 'slope': (2.0, -0.1, 0.1, None), - 'offset': (0.0, -0.1, 0.1, None), - 'retries': (0, None, None, None), - 'time': (10, None, None, 's'), - }, - 'dom:gpu': { - 'slope': (2.0, -0.1, 0.1, None), - 'offset': (0.0, -0.1, 0.1, None), - 'retries': (0, None, None, None), - 'time': (10, None, None, 's'), - } - } - self.perf_patterns = { - 'slope': sn.extractsingle(r'slope=(?P\S+)', - self.stdout, 'slope', float), - 'offset': sn.extractsingle(r'offset=(?P\S+)', - self.stdout, 'offset', float), - 'retries': 4 - sn.count(sn.findall(r'IPCluster is already running', - self.stdout)), - 'time': sn.extractsingle(r'IPCluster is ready\!\s+' - r'\((?P