-
Notifications
You must be signed in to change notification settings - Fork 98
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #774 from jgphpc/UES-213_run
[test] Add sde roofline check
- Loading branch information
Showing
2 changed files
with
280 additions
and
0 deletions.
There are no files selected for viewing
151 changes: 151 additions & 0 deletions
151
cscs-checks/tools/profiling_and_debugging/intel_sde_berkeley_stream.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,151 @@ | ||
import os | ||
|
||
import reframe as rfm | ||
import reframe.utility.sanity as sn | ||
|
||
|
||
class SdeBaseTest(rfm.RegressionTest): | ||
'''This test checks the values reported by Intel SDE for roofline modeling: | ||
- https://software.intel.com/en-us/articles/ | ||
intel-software-development-emulator | ||
- https://bitbucket.org/dwdoerf/stream-ai-example/src/master/ | ||
- https://www.nersc.gov/ | ||
users/application-performance/measuring-arithmetic-intensity | ||
''' | ||
def __init__(self): | ||
super().__init__() | ||
self.descr = 'Roofline Analysis test with Intel SDE' | ||
self.sourcesdir = os.path.join(self.current_system.resourcesdir, | ||
'roofline', 'sde') | ||
self.build_system = 'SingleSource' | ||
self.sourcepath = 'stream_mpi.c' | ||
self.executable = 'sde' | ||
self.target_executable = './stream.exe' | ||
self.sde = '%s.sde' % self.target_executable | ||
self.rpt = '%s.rpt' % self.target_executable | ||
self.build_system.ldflags = ['-g', '-O3', '-qopenmp', '-restrict', | ||
'-qopt-streaming-stores', 'always'] | ||
exp = '/apps/dom/UES/jenkins/7.0.UP00/mc/easybuild/experimental' | ||
self.pre_run = [ | ||
'mv %s %s' % (self.executable, self.target_executable), | ||
'module use %s/modules/all' % exp, | ||
'module load sde', | ||
'sde -help' | ||
] | ||
self.sanity_patterns = sn.assert_found('Total FLOPs =', self.rpt) | ||
self.post_run = ['SDE/parse-sde.sh %s.* &> %s' % (self.sde, self.rpt)] | ||
self.maintainers = ['JG'] | ||
self.tags = {'scs'} | ||
|
||
@property | ||
@sn.sanity_function | ||
def arithmetic_intensity(self): | ||
flops = sn.extractsingle(r'^--->Total FLOPs = (?P<flops>\d+)', | ||
self.rpt, 'flops', int) | ||
bytes = sn.extractsingle(r'^--->Total Bytes = (?P<bytes>\d+)', | ||
self.rpt, 'bytes', int) | ||
return flops/bytes | ||
|
||
@property | ||
@sn.sanity_function | ||
def gflops(self): | ||
flops = sn.extractsingle(r'^--->Total FLOPs = (?P<flops>\d+)', | ||
self.rpt, 'flops', int) | ||
sec = sn.extractsingle(r'^Triad:\s+\d+\.\d+\s+(?P<avgtime>\d+\.\d+)', | ||
self.stdout, 'avgtime', float) | ||
step = sn.extractsingle(r'^Each kernel will be executed (?P<step>\d+)', | ||
self.stdout, 'step', int) | ||
return flops/(sec*step*10**9) | ||
|
||
def setup(self, partition, environ, **job_opts): | ||
self.executable_opts = self.sdeflags | ||
super().setup(partition, environ, **job_opts) | ||
if self.num_tasks != 36: | ||
self.job.options = ['--cpu-bind=verbose,none'] | ||
else: | ||
self.job.options = ['--cpu-bind=verbose'] | ||
|
||
|
||
@rfm.parameterized_test(*[[num_ranks, arraysize] | ||
for num_ranks in [2] | ||
for arraysize in [100000000]]) | ||
# For parameter space study, you may want to use: | ||
# for num_ranks in [36, 18, 12, 9, 6, 4, 3, 2, 1] | ||
# for arraysize in [400000000, 200000000, 100000000]]) | ||
class SdeBroadwellJ1Test(SdeBaseTest): | ||
def __init__(self, num_ranks, arraysize): | ||
super().__init__() | ||
ompthread = 36 // num_ranks | ||
self.valid_systems = ['dom:mc'] | ||
self.valid_prog_environs = ['PrgEnv-intel'] | ||
self.build_system.cppflags = [ | ||
'-D_SDE', | ||
'-DSTREAM_ARRAY_SIZE=%s' % arraysize, | ||
'-DNTIMES=50' | ||
] | ||
self.exclusive = True | ||
self.num_tasks = num_ranks | ||
self.num_tasks_per_node = num_ranks | ||
self.num_cpus_per_task = ompthread | ||
self.num_tasks_per_core = 1 | ||
self.use_multithreading = False | ||
self.name = 'sde_n.{:010d}_MPI.{:03d}_OpenMP.{:03d}_j.{:01d}'.format( | ||
arraysize, num_ranks, ompthread, self.num_tasks_per_core) | ||
self.variables = { | ||
'CRAYPE_LINK_TYPE': 'dynamic', | ||
'OMP_NUM_THREADS': str(self.num_cpus_per_task) | ||
} | ||
self.sdeflags = ['%s -d -iform 1 -omix %s -i -top_blocks 500 ' | ||
'-global_region -start_ssc_mark 111:repeat ' | ||
'-stop_ssc_mark 222:repeat -- %s' % | ||
('-bdw', self.sde, self.target_executable)] | ||
# References for Intel Broadwell CPU (E5-2695 v4): | ||
ai = 0.0825 | ||
gflops = 9.773 | ||
self.sanity_patterns = sn.all([ | ||
sn.assert_reference(self.gflops, gflops, -0.1, 0.3), | ||
sn.assert_reference(self.arithmetic_intensity, ai, -0.1, 0.3), | ||
]) | ||
|
||
|
||
@rfm.parameterized_test(*[[num_ranks, arraysize] | ||
for num_ranks in [2] | ||
for arraysize in [100000000]]) | ||
# For parameter space study, you may want to use: | ||
# for num_ranks in [72, 36, 24, 18, 12, 9, 8, 6, 4, 3, 2, | ||
# 1] | ||
# for arraysize in [400000000, 200000000, 100000000]]) | ||
class SdeBroadwellJ2Test(SdeBaseTest): | ||
def __init__(self, num_ranks, arraysize): | ||
super().__init__() | ||
ompthread = 72 // num_ranks | ||
self.valid_systems = ['dom:mc'] | ||
self.valid_prog_environs = ['PrgEnv-intel'] | ||
self.build_system.cppflags = [ | ||
'-D_SDE', | ||
'-DSTREAM_ARRAY_SIZE=%s' % arraysize, | ||
'-DNTIMES=50' | ||
] | ||
self.exclusive = True | ||
self.num_tasks = num_ranks | ||
self.num_tasks_per_node = num_ranks | ||
self.num_cpus_per_task = ompthread | ||
self.num_tasks_per_core = 2 | ||
self.use_multithreading = True | ||
self.name = 'sde_n.{:010d}_MPI.{:03d}_OpenMP.{:03d}_j.{:01d}'.format( | ||
arraysize, num_ranks, ompthread, self.num_tasks_per_core) | ||
self.variables = { | ||
'CRAYPE_LINK_TYPE': 'dynamic', | ||
'OMP_NUM_THREADS': str(self.num_cpus_per_task) | ||
} | ||
self.sdeflags = ['%s -d -iform 1 -omix %s -i -top_blocks 500 ' | ||
'-global_region -start_ssc_mark 111:repeat ' | ||
'-stop_ssc_mark 222:repeat -- %s' % | ||
('-bdw', self.sde, self.target_executable)] | ||
# References for Intel Broadwell CPU (E5-2695 v4): | ||
ai = 0.0822 | ||
gflops = 9.602 | ||
self.sanity_patterns = sn.all([ | ||
sn.assert_reference(self.gflops, gflops, -0.1, 0.3), | ||
sn.assert_reference(self.arithmetic_intensity, ai, -0.1, 0.3), | ||
]) |
129 changes: 129 additions & 0 deletions
129
cscs-checks/tools/profiling_and_debugging/intel_sde_roofline.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
import os | ||
|
||
import reframe as rfm | ||
import reframe.utility.sanity as sn | ||
|
||
|
||
@rfm.parameterized_test(*[[repeat, toolsversion, datalayout] | ||
for repeat in ['100000'] | ||
for toolsversion in ['8.35.0'] | ||
for datalayout in ['G3_AOS_SCALAR', 'G3_SOA_SCALAR', | ||
'G3_AOS_VECTOR', 'G3_SOA_VECTOR'] | ||
]) | ||
class IntelRooflineSdeTest(rfm.RegressionTest): | ||
'''This test checks the values reported by Intel SDE for roofline modeling: | ||
- https://software.intel.com/en-us/articles/ | ||
intel-software-development-emulator | ||
- https://bitbucket.org/dwdoerf/stream-ai-example/src/master/ | ||
- https://www.nersc.gov/ | ||
users/application-performance/measuring-arithmetic-intensity | ||
''' | ||
def __init__(self, repeat, toolsversion, datalayout): | ||
super().__init__() | ||
self.descr = 'Roofline Analysis test with Intel SDE' | ||
self.valid_systems = ['dom:mc'] | ||
# Reporting MFLOPS is not available on Intel Haswell cpus, see | ||
# https://www.intel.fr/content/dam/www/public/us/en/documents/manuals/ | ||
# 64-ia-32-architectures-software-developer-vol-1-manual.pdf | ||
self.valid_prog_environs = ['PrgEnv-intel'] | ||
self.sourcesdir = os.path.join(self.current_system.resourcesdir, | ||
'roofline', 'intel_advisor') | ||
self.build_system = 'SingleSource' | ||
self.sourcepath = '_roofline.cpp' | ||
self.executable = 'sde' | ||
self.target_executable = './roof.exe' | ||
self.sde = '%s.sde' % self.target_executable | ||
self.rpt = '%s.rpt' % self.target_executable | ||
self.prebuild_cmd = [ | ||
'patch < SDE/roofline_template.patch', | ||
'sed -e "s-XXXX-%s-" -e "s-YYYY-%s-" %s &> %s' % | ||
(repeat, datalayout, 'roofline_template.cpp', '_roofline.cpp') | ||
] | ||
self.build_system.cppflags = ['-D_SDE'] | ||
self.build_system.ldflags = ['-g', '-O3', '-qopenmp', '-restrict', | ||
'-qopt-streaming-stores', 'always', | ||
'-std=c++11'] | ||
self.exclusive = True | ||
self.num_tasks = 1 | ||
self.num_tasks_per_node = 1 | ||
self.num_cpus_per_task = 1 | ||
self.num_tasks_per_core = 1 | ||
self.use_multithreading = False | ||
self.variables = { | ||
'CRAYPE_LINK_TYPE': 'dynamic', | ||
'OMP_NUM_THREADS': str(self.num_cpus_per_task), | ||
} | ||
exp = '/apps/dom/UES/jenkins/7.0.UP00/mc/easybuild/experimental' | ||
self.pre_run = [ | ||
'mv %s %s' % (self.executable, self.target_executable), | ||
'module use %s/modules/all' % exp, | ||
'module load sde', | ||
'sde -help' | ||
] | ||
self.sdeflags = ['%s -d -iform 1 -omix %s -i -top_blocks 500 ' | ||
'-global_region -start_ssc_mark 111:repeat ' | ||
'-stop_ssc_mark 222:repeat -- %s' % | ||
('-bdw', self.sde, self.target_executable)] | ||
self.executable_opts = self.sdeflags | ||
self.sanity_patterns = sn.assert_found('Total FLOPs =', self.rpt) | ||
self.post_run = ['SDE/parse-sde.sh %s.* &> %s' % (self.sde, self.rpt)] | ||
self.maintainers = ['JG'] | ||
self.tags = {'scs'} | ||
self.sanity_patterns = sn.all([ | ||
sn.assert_eq(sn.extractsingle( | ||
r'^Intel\(R\) Software Development Emulator\. Version: ' | ||
r'(?P<toolsversion>\d+\.\d+\.\d+)', self.stdout, | ||
'toolsversion'), toolsversion), | ||
]) | ||
# References for Intel Broadwell CPU (E5-2695 v4): | ||
references = { | ||
'G3_AOS_SCALAR': { | ||
'dom:mc': { | ||
'gflops': (0.596, -0.1, 0.3, 'Gflop/s'), | ||
'ai': (0.16, -0.05, 0.05, 'flop/byte') | ||
} | ||
}, | ||
'G3_SOA_SCALAR': { | ||
'dom:mc': { | ||
'gflops': (0.612, -0.1, 0.3, 'Gflop/s'), | ||
'ai': (0.16, -0.05, 0.05, 'flop/byte') | ||
} | ||
}, | ||
'G3_AOS_VECTOR': { | ||
'dom:mc': { | ||
'gflops': (1.152, -0.1, 0.3, 'Gflop/s'), | ||
'ai': (0.125, -0.05, 0.05, 'flop/byte') | ||
} | ||
}, | ||
'G3_SOA_VECTOR': { | ||
'dom:mc': { | ||
'gflops': (1.125, -0.1, 0.3, 'Gflop/s'), | ||
'ai': (0.16, -0.05, 0.05, 'flop/byte') | ||
} | ||
}, | ||
} | ||
self.reference = references[datalayout] | ||
self.perf_patterns = { | ||
'gflops': self.gflops, | ||
'ai': self.arithmetic_intensity, | ||
} | ||
|
||
@property | ||
@sn.sanity_function | ||
def arithmetic_intensity(self): | ||
flops = sn.extractsingle(r'^--->Total FLOPs = (?P<flops>\d+)', | ||
self.rpt, 'flops', int) | ||
bytes = sn.extractsingle(r'^--->Total Bytes = (?P<bytes>\d+)', | ||
self.rpt, 'bytes', int) | ||
# debug: print('ai={}'.format(flops/bytes)) | ||
return flops/bytes | ||
|
||
@property | ||
@sn.sanity_function | ||
def gflops(self): | ||
flops = sn.extractsingle(r'^--->Total FLOPs = (?P<flops>\d+)', | ||
self.rpt, 'flops', int) | ||
msec = sn.extractsingle(r'^elapsed time: (?P<msec>\d+)ms', self.stdout, | ||
'msec', float) | ||
# debug: print('gflops={}'.format(flops/((msec/1000)*10**6))) | ||
return (flops/((msec/1000))/10**9) |