diff --git a/cscs-checks/system/io/ior_check.py b/cscs-checks/system/io/ior_check.py index cd2b6aab87..4abdda5d9d 100644 --- a/cscs-checks/system/io/ior_check.py +++ b/cscs-checks/system/io/ior_check.py @@ -1,231 +1,156 @@ import os +import getpass import reframe as rfm import reframe.utility.sanity as sn class IorCheck(rfm.RegressionTest): - def __init__(self, fs_mount_point): + def __init__(self, base_dir): super().__init__() - self.descr = 'IOR check (%s)' % fs_mount_point - self.tags = {'ops', fs_mount_point} - - if fs_mount_point == '/scratch/snx1600': - self.valid_systems = ['daint:gpu'] - self.num_tasks = 2400 - self.num_tasks_per_node = 12 - elif fs_mount_point == '/scratch/snx1600tds': - self.valid_systems = ['dom:gpu'] - self.num_tasks = 192 - self.num_tasks_per_node = 12 - elif fs_mount_point == '/scratch/snx2000': - self.valid_systems = ['dom:gpu'] - if self.current_system.name == 'dom': - self.num_tasks = 192 - self.num_tasks_per_node = 12 - else: - self.num_tasks = 1 - self.num_tasks_per_node = 1 - elif fs_mount_point == '/scratch/snx2000tds': - self.valid_systems = ['dom:gpu'] - if self.current_system.name == 'dom': - self.num_tasks = 192 - self.num_tasks_per_node = 12 - else: - self.num_tasks = 1 - self.num_tasks_per_node = 1 - elif fs_mount_point == '/scratch/snx3000': - self.valid_systems = ['daint:gpu'] - self.num_tasks = 720 - self.num_tasks_per_node = 12 - elif fs_mount_point == '/users': - self.valid_systems = ['daint:gpu', 'dom:gpu'] - self.num_tasks = 1 - self.num_tasks_per_node = 1 - self.tags |= {'maintenance'} - elif fs_mount_point == '/apps': - self.valid_systems = ['daint:gpu', 'dom:gpu'] - self.num_tasks = 1 - self.num_tasks_per_node = 1 - - self.valid_prog_environs = ['PrgEnv-cray'] - self.sourcesdir = os.path.join(self.current_system.resourcesdir, 'IOR') - self.executable = os.path.join('src', 'C', 'IOR') - self.build_system = 'Make' - self.build_system.options = ['posix', 'mpiio'] - self.build_system.max_concurrency = 1 - self.num_gpus_per_node = 0 - self.fs_mount_point = fs_mount_point - self.fs_reference = { - '/scratch/snx1600': { - 'read_bw': (64326, -0.2, None), - 'write_bw': (151368, -0.2, None) - }, + self.descr = 'IOR check (%s)' % base_dir + self.tags = {'ops', base_dir} + + self.base_dir = base_dir + self.username = getpass.getuser() + self.test_dir = os.path.join(self.base_dir, self.username, '.ior') + self.pre_run = ['mkdir -p ' + self.test_dir] + self.test_file = os.path.join(self.test_dir, 'ior.dat') + self.fs = { '/scratch/snx1600tds': { - 'read_bw': (1, -0.5, None), - 'write_bw': (1, -0.5, None) + 'valid_systems': ['dom:gpu'], + 'dom': { + 'num_tasks': 2, + # We will be using 1 task per node to avoid cache + # effects on read. The option "-C" could be used + # with many tasks per node. 8 tasks are enough + # to get ~peak perf (write 5.4 GB/s, read 4.3 GB/s) + } }, - '/scratch/snx2000': { - 'read_bw': (12310, -0.2, None), - 'write_bw': (11380, -0.2, None) + '/scratch/snx1600': { + 'valid_systems': ['daint:gpu'], + 'daint': {} }, - '/scratch/snx2000tds': { - 'read_bw': (1, -0.5, None), - 'write_bw': (1, -0.5, None) + '/scratch/snx3000tds': { + 'valid_systems': ['dom:gpu'], + 'dom': { + 'num_tasks': 4, + } }, '/scratch/snx3000': { - 'read_bw': (70753, -0.2, None), - 'write_bw': (83552, -0.2, None) - }, - '/apps': { - 'read_bw': (204, -1.0, None), - 'write_bw': (319, -1.0, None) + 'valid_systems': ['daint:gpu'], + 'daint': {} }, '/users': { - 'read_bw': (83, -1.0, None), - 'write_bw': (304, -1.0, None) + 'valid_systems': ['daint:gpu', 'dom:gpu', 'fulen:normal'], + 'ior_block_size': '8g', + 'daint': {}, + 'dom': {}, + 'fulen': { + 'valid_prog_environs': ['PrgEnv-gnu'] + } }, - '/mnt/lnec': { - 'read_bw': (0, None, None), - 'write_bw': (0, None, None) + '/scratch/shared/fulen': { + 'valid_systems': ['fulen:normal'], + 'ior_block_size': '48g', + 'fulen': { + 'num_tasks': 8, + 'valid_prog_environs': ['PrgEnv-gnu'] + } } } + # Setting some default values + for data in self.fs.values(): + data.setdefault('ior_block_size', '24g') + data.setdefault('ior_access_type', 'MPIIO') + data.setdefault( + 'reference', + { + 'read_bw': (0, None, None, 'MiB/s'), + 'write_bw': (0, None, None, 'MiB/s') + } + ) + data.setdefault('dummy', {}) # entry for unknown systems + + cur_sys = self.current_system.name + if cur_sys not in self.fs[base_dir]: + cur_sys = 'dummy' + + self.valid_systems = self.fs[base_dir]['valid_systems'] + self.num_tasks = self.fs[base_dir][cur_sys].get('num_tasks', 1) + tpn = self.fs[base_dir][cur_sys].get('num_tasks_per_node', 1) + self.num_tasks_per_node = tpn + + self.ior_block_size = self.fs[base_dir]['ior_block_size'] + self.ior_access_type = self.fs[base_dir]['ior_access_type'] + self.executable_opts = ['-B', '-F', '-C ', '-Q 1', '-t 4m', '-D 30', + '-b', self.ior_block_size, + '-a', self.ior_access_type, + '-o', self.test_file] + self.sourcesdir = os.path.join(self.current_system.resourcesdir, 'IOR') + self.executable = os.path.join('src', 'C', 'IOR') + self.build_system = 'Make' + + vpe = 'valid_prog_environs' + penv = self.fs[base_dir][cur_sys].get(vpe, ['PrgEnv-cray']) + self.valid_prog_environs = penv + + self.build_system.options = ['posix', 'mpiio'] + self.build_system.max_concurrency = 1 + self.num_gpus_per_node = 0 + # Default umask is 0022, which generates file permissions -rw-r--r-- # we want -rw-rw-r-- so we set umask to 0002 os.umask(2) - self.time_limit = (0, 7, 0) + self.time_limit = (0, 5, 0) # Our references are based on fs types but regression needs reference # per system. self.reference = { - '*': self.fs_reference[self.fs_mount_point] + '*': self.fs[base_dir]['reference'] } - self.maintainers = ['SO', 'MP'] - + self.maintainers = ['SO', 'GLR'] -@rfm.parameterized_test(['/scratch/snx1600', 'MPIIO'], - ['/scratch/snx1600tds', 'MPIIO'], - ['/scratch/snx2000', 'MPIIO'], - ['/scratch/snx2000tds', 'MPIIO'], - ['/scratch/snx3000', 'MPIIO'], - ['/users', 'POSIX'], - ['/apps', 'POSIX']) -class IorReadCheck(IorCheck): - def __init__(self, fs_mount_point, ior_type): - super().__init__(fs_mount_point) - self.test_file = os.path.join(self.fs_mount_point, '.ior', 'read', - 'ior_write.dat') - if ior_type == 'MPIIO': - self.executable_opts = ['-r', '-a MPIIO', '-B', '-E', '-F', - '-t 64m', '-b 32g', '-D 300', '-k', - '-o', self.test_file] - elif ior_type == 'POSIX': - self.executable_opts = ['-r', '-a POSIX', '-B', '-E', '-F', - '-t 1m', '-b 100m', '-D 60', '-k', - '-o', self.test_file] - - self.sanity_patterns = sn.assert_found(r'^Max Read: ', self.stdout) - self.perf_patterns = { - 'read_bw': sn.extractsingle( - r'^Max Read:\s+(?P\S+) MiB/sec', self.stdout, - 'read_bw', float) - } - self.tags |= {'read'} + if self.current_system.name == 'dom': + self.tags = {'production'} -@rfm.parameterized_test(['/scratch/snx1600', 'MPIIO'], - ['/scratch/snx1600tds', 'MPIIO'], - ['/scratch/snx2000', 'MPIIO'], - ['/scratch/snx2000tds', 'MPIIO'], - ['/scratch/snx3000', 'MPIIO'], - ['/users', 'POSIX'], - ['/apps', 'POSIX']) +@rfm.parameterized_test(['/scratch/snx1600tds'], + ['/scratch/snx1600'], + ['/scratch/snx3000tds'], + ['/scratch/snx3000'], + ['/users'], + ['/scratch/shared/fulen']) class IorWriteCheck(IorCheck): - def __init__(self, fs_mount_point, ior_type): - super().__init__(fs_mount_point) - self.test_file = os.path.join(self.fs_mount_point, '.ior', 'write', - 'ior_write.dat') - if ior_type == 'MPIIO': - self.executable_opts = ['-w', '-a MPIIO', '-B', '-E', '-F', - '-t 64m', '-b 46g', '-D 300', - '-o', self.test_file] - elif ior_type == 'POSIX': - self.executable_opts = ['-w', '-a POSIX', '-B', '-E', '-F', - '-t 1m', '-b 100m', '-D 60' - '-o', self.test_file] - + def __init__(self, base_dir): + super().__init__(base_dir) + self.executable_opts += ['-w', '-k'] self.sanity_patterns = sn.assert_found(r'^Max Write: ', self.stdout) self.perf_patterns = { 'write_bw': sn.extractsingle( r'^Max Write:\s+(?P\S+) MiB/sec', self.stdout, 'write_bw', float) } + # Convert from MiB/s to bytes/s self.tags |= {'write'} -# FIXME: This test is obsolete; it is kept only for reference. -class IoMonchAcceptanceBase(IorCheck): - def __init__(self, fs_mount_point, ior_type, num_tasks): - super().__init__(fs_mount_point) - self.test_file = os.path.join(self.fs_mount_point, os.getenv('USER'), - 'ior_write.dat') - self.valid_systems = ['monch:compute'] - self.valid_prog_environs = ['PrgEnv-gnu'] - self.num_tasks = num_tasks - self.num_tasks_per_node = 20 - reference_by_num_tasks = { - 40: { - 'read_bw': (4343.66, -0.2, None), - 'write_bw': (3625.23, -0.2, None) - }, - 80: { - 'read_bw': (9208.83, -0.2, None), - 'write_bw': (6725.39, -0.2, None) - }, - 160: { - 'read_bw': (12365.53, -0.2, None), - 'write_bw': (8073.84, -0.2, None) - }, - } - self.reference = { - 'monch:compute': reference_by_num_tasks[self.num_tasks] - } - self.tags = {'monch_acceptance'} - - -# FIXME: This test is obsolete; it is kept only for reference. -@rfm.parameterized_test(*(['/mnt/lnec', 'MPIIO', num_tasks] - for num_tasks in [40, 80, 160])) -class IorReadScratchMonchAcceptanceCheck(IoMonchAcceptanceBase): - def __init__(self, fs_mount_point, ior_type, num_tasks): - super().__init__(fs_mount_point, ior_type, num_tasks) - if ior_type == 'MPIIO': - self.executable_opts = ['-r', '-a MPIIO', '-B', '-E', '-F', - '-t 16m', '-b 8g', '-D 10', '-k', - '-o', self.test_file] +@rfm.parameterized_test(['/scratch/snx1600tds'], + ['/scratch/snx1600'], + ['/scratch/snx3000tds'], + ['/scratch/snx3000'], + ['/users'], + ['/scratch/shared/fulen']) +class IorReadCheck(IorCheck): + def __init__(self, base_dir): + super().__init__(base_dir) + self.executable_opts += ['-r'] self.sanity_patterns = sn.assert_found(r'^Max Read: ', self.stdout) self.perf_patterns = { 'read_bw': sn.extractsingle( r'^Max Read:\s+(?P\S+) MiB/sec', self.stdout, 'read_bw', float) } + # Convert from MiB/s to bytes/s self.tags |= {'read'} - - -# FIXME: This test is obsolete; it is kept only for reference. -@rfm.parameterized_test(*(['/mnt/lnec', 'MPIIO', num_tasks] - for num_tasks in [40, 80, 160])) -class IorWriteScratchMonchAcceptanceCheck(IoMonchAcceptanceBase): - def __init__(self, fs_mount_point, ior_type, num_tasks): - super().__init__(fs_mount_point, ior_type, num_tasks) - if ior_type == 'MPIIO': - self.executable_opts = ['-w', '-a MPIIO', '-B', '-E', '-F', - '-t 16m', '-b 8g', '-o', self.test_file] - self.sanity_patterns = sn.assert_found(r'^Max Write: ', self.stdout) - self.perf_patterns = { - 'write_bw': sn.extractsingle( - r'^Max Write:\s+(?P\S+) MiB/sec', self.stdout, - 'write_bw', float) - } - self.tags |= {'write'}