Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
4a45871
add container support to the pipeline
rsarm Jul 2, 2019
fbc5bc6
fix style
rsarm Jul 3, 2019
f1c4af7
fix bugs
rsarm Jul 3, 2019
1809cd8
fix offline comments
rsarm Jul 4, 2019
7827509
fix comments
rsarm Jul 4, 2019
f3fadee
add container_platform to cscs config
rsarm Jul 5, 2019
6e215f3
merge master and fix conflicts
rsarm Jul 5, 2019
e2ceff8
fix comments
rsarm Jul 5, 2019
f4f6d49
fix comments
rsarm Jul 8, 2019
f40496a
add config for container platform
rsarm Jul 29, 2019
46a1cb6
moving logic to base class
rsarm Sep 18, 2019
a0c19fd
add shifter and singularity
rsarm Sep 18, 2019
4f895f4
adding sarus
rsarm Sep 19, 2019
ff80b7f
fix bug
rsarm Sep 19, 2019
3aae3a7
Merge branch 'master' into containers-pipeline
Sep 20, 2019
257a32b
bug fix
rsarm Sep 20, 2019
6c97bd2
fix some of the comments
rsarm Sep 26, 2019
845684d
fix wrong mount syntax in singularity
rsarm Sep 26, 2019
10c82ae
fix comments
rsarm Sep 27, 2019
dab07db
fix comments
rsarm Sep 30, 2019
c0de8d7
Merge branch 'master' into containers-pipeline
Sep 30, 2019
e776916
fix comments
rsarm Oct 4, 2019
d005c50
Merge branch 'master' into containers-pipeline
Oct 10, 2019
c2f1c63
Fix PEP8 issues
Oct 10, 2019
0f5a95b
Rename some functions of the container platforms interface
Oct 10, 2019
ae0960d
Merge branch 'master' into containers-pipeline
Oct 11, 2019
9d30bdc
Fix PEP8 issues
Oct 11, 2019
27bc77c
Enhance unit tests
Oct 11, 2019
5907b36
Fix unit tests for Docker backend
Oct 13, 2019
04f12fa
Merge branch 'master' into containers-pipeline
Oct 13, 2019
cf33912
Remove unnecessary member variables
Oct 14, 2019
ac5d658
Merge branch 'containers-pipeline' of github.com:rsarm/reframe into c…
Oct 14, 2019
afac2ff
Skip Singularity unit test on Cray CLE6
Oct 14, 2019
e1a1683
Merge branch 'containers-pipeline' of github.com:rsarm/reframe into c…
Oct 14, 2019
b099fb4
Fix unit tests deprecation warning
Oct 14, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 32 additions & 1 deletion config/cscs.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,11 @@ class ReframeSettings:
},
'compute': {
'scheduler': 'nativeslurm',
'container_platforms': {
'ShifterNG': {
'modules': ['shifter-ng']
}
},
'environs': ['PrgEnv-cray', 'PrgEnv-gnu',
'PrgEnv-intel', 'PrgEnv-pgi'],
'descr': 'Intel Xeon Phi',
Expand All @@ -94,6 +99,14 @@ class ReframeSettings:

'gpu': {
'scheduler': 'nativeslurm',
'container_platforms': {
'ShifterNG': {
'modules': ['shifter-ng']
},
'Singularity': {
'modules': ['singularity']
}
},
'modules': ['daint-gpu'],
'access': ['--constraint=gpu'],
'environs': ['PrgEnv-cray', 'PrgEnv-gnu',
Expand All @@ -107,6 +120,14 @@ class ReframeSettings:

'mc': {
'scheduler': 'nativeslurm',
'container_platforms': {
'ShifterNG': {
'modules': ['shifter-ng']
},
'Singularity': {
'modules': ['singularity']
}
},
'modules': ['daint-mc'],
'access': ['--constraint=mc'],
'environs': ['PrgEnv-cray', 'PrgEnv-gnu',
Expand Down Expand Up @@ -140,6 +161,11 @@ class ReframeSettings:

'gpu': {
'scheduler': 'nativeslurm',
'container_platforms': {
'Singularity': {
'modules': ['singularity']
},
},
'modules': ['daint-gpu'],
'access': ['--constraint=gpu'],
'environs': ['PrgEnv-cray', 'PrgEnv-cray_classic',
Expand All @@ -154,6 +180,11 @@ class ReframeSettings:

'mc': {
'scheduler': 'nativeslurm',
'container_platforms': {
'Singularity': {
'modules': ['singularity']
},
},
'modules': ['daint-mc'],
'access': ['--constraint=mc'],
'environs': ['PrgEnv-cray', 'PrgEnv-cray_classic',
Expand Down Expand Up @@ -293,7 +324,7 @@ class ReframeSettings:
'modules': [],
'access': [],
'environs': ['builtin-gcc'],
'descr': 'Login nodes'
'descr': 'Login nodes',
}
}
}
Expand Down
29 changes: 20 additions & 9 deletions reframe/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,14 +199,25 @@ def create_env(system, partition, name):
part_access = partconfig.get('access', [])
part_resources = partconfig.get('resources', {})
part_max_jobs = partconfig.get('max_jobs', 1)
system.add_partition(SystemPartition(name=part_name,
descr=part_descr,
scheduler=part_scheduler,
launcher=part_launcher,
access=part_access,
environs=part_environs,
resources=part_resources,
local_env=part_local_env,
max_jobs=part_max_jobs))
part = SystemPartition(name=part_name,
descr=part_descr,
scheduler=part_scheduler,
launcher=part_launcher,
access=part_access,
environs=part_environs,
resources=part_resources,
local_env=part_local_env,
max_jobs=part_max_jobs)

container_platforms = partconfig.get('container_platforms', {})
for cp, env_spec in container_platforms.items():
cp_env = m_env.Environment(
name='__rfm_env_%s' % cp,
modules=env_spec.get('modules', []),
variables=env_spec.get('variables', {})
)
part.add_container_env(cp, cp_env)

system.add_partition(part)

self._systems[sys_name] = system
103 changes: 82 additions & 21 deletions reframe/core/containers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,24 @@ class ContainerPlatform(abc.ABC):
'''The abstract base class of any container platform.

Concrete container platforms inherit from this class and must override the
:func:`emit_prepare_cmds` and :func:`emit_launch_cmds` abstract functions.
:func:`emit_prepare_commands()` and :func:`launch_command()` abstract
methods.
'''

registry = fields.TypedField('registry', str, type(None))
image = fields.TypedField('image', str, type(None))
requires_mpi = fields.TypedField('requires_mpi', bool)
commands = fields.TypedField('commands', typ.List[str])
mount_points = fields.TypedField('mount_points',
typ.List[typ.Tuple[str, str]])
workdir = fields.TypedField('workdir', str, type(None))

def __init__(self):
self.registry = None
self.image = None
self.requires_mpi = False
self.commands = []
self.mount_points = []
self.workdir = None
self.workdir = '/rfm_workdir'

@abc.abstractmethod
def emit_prepare_cmds(self):
def emit_prepare_commands(self):
'''Returns commands that are necessary before running with this
container platform.

Expand All @@ -41,7 +38,7 @@ def emit_prepare_cmds(self):
'''

@abc.abstractmethod
def emit_launch_cmds(self):
def launch_command(self):
'''Returns the command for running with this container platform.

:raises: `ContainerError` in case of errors.
Expand All @@ -50,10 +47,7 @@ def emit_launch_cmds(self):
This method is relevant only to developers of new container
platforms.
'''
if self.registry:
self.image = '/'.join([self.registry, self.image])

@abc.abstractmethod
def validate(self):
'''Validates this container platform.

Expand All @@ -71,21 +65,88 @@ def validate(self):


class Docker(ContainerPlatform):
'''An implementation of ContainerPlatform to run containers with Docker.'''
'''An implementation of :class:`ContainerPlatform` for running containers
with Docker.'''

def emit_prepare_cmds(self):
pass
def emit_prepare_commands(self):
return []

def emit_launch_cmds(self):
super().emit_launch_cmds()
docker_opts = ['-v "%s":"%s"' % mp for mp in self.mount_points]
run_cmd = 'docker run %s %s bash -c ' % (' '.join(docker_opts),
self.image)
def launch_command(self):
super().launch_command()
run_opts = ['-v "%s":"%s"' % mp for mp in self.mount_points]
run_cmd = 'docker run --rm %s %s bash -c ' % (' '.join(run_opts),
self.image)
return run_cmd + "'" + '; '.join(
['cd ' + self.workdir] + self.commands) + "'"

def validate(self):
super().validate()

class ShifterNG(ContainerPlatform):
'''An implementation of :class:`ContainerPlatform` for running containers
with ShifterNG.'''

#: Add an option to the launch command to enable MPI support.
#:
#: :type: boolean
#: :default: :class:`False`
with_mpi = fields.TypedField('with_mpi', bool)

def __init__(self):
super().__init__()
self.with_mpi = False
self._command = 'shifter'

def emit_prepare_commands(self):
return [self._command + ' pull %s' % self.image]

def launch_command(self):
super().launch_command()
run_opts = ['--mount=type=bind,source="%s",destination="%s"' %
mp for mp in self.mount_points]
if self.with_mpi:
run_opts.append('--mpi')

run_cmd = self._command + ' run %s %s bash -c ' % (' '.join(run_opts),
self.image)
return run_cmd + "'" + '; '.join(
['cd ' + self.workdir] + self.commands) + "'"


class Sarus(ShifterNG):
'''An implementation of :class:`ContainerPlatform` for running containers
with Sarus.'''

def __init__(self):
super().__init__()
self._command = 'sarus'


class Singularity(ContainerPlatform):
'''An implementation of :class:`ContainerPlatform` for running containers
with Singularity.'''

#: Add an option to the launch command to enable CUDA support.
#:
#: :type: boolean
#: :default: :class:`False`
with_cuda = fields.TypedField('with_cuda', bool)

def __init__(self):
super().__init__()
self.with_cuda = False

def emit_prepare_commands(self):
return []

def launch_command(self):
super().launch_command()
exec_opts = ['-B"%s:%s"' % mp for mp in self.mount_points]
if self.with_cuda:
exec_opts.append('--nv')

run_cmd = 'singularity exec %s %s bash -c ' % (' '.join(exec_opts),
self.image)
return run_cmd + "'" + '; '.join(
['cd ' + self.workdir] + self.commands) + "'"


class ContainerPlatformField(fields.TypedField):
Expand Down
52 changes: 50 additions & 2 deletions reframe/core/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import reframe.utility.os_ext as os_ext
import reframe.utility.typecheck as typ
from reframe.core.buildsystems import BuildSystemField
from reframe.core.containers import ContainerPlatform, ContainerPlatformField
from reframe.core.deferrable import deferrable, _DeferredExpression, evaluate
from reframe.core.exceptions import (BuildError, DependencyError,
PipelineError, SanityError,
Expand Down Expand Up @@ -221,6 +222,19 @@ class RegressionTest(metaclass=RegressionTestMeta):
#: :default: ``[]``
executable_opts = fields.TypedField('executable_opts', typ.List[str])

#: The container platform to be used for this test.
#:
#: If the `self.container_platform` is defined on the test, both
#: `self.executable` and `self.executable_opts` are ignored.
#:
#: :type: :class:`str` or
#: :class:`reframe.core.containers.ContainerPlatform`.
#: :default: :class:`None`.
#:
#: .. versionadded:: 2.19
container_platform = ContainerPlatformField(
'container_platform', type(None))

#: List of shell commands to execute before launching this job.
#:
#: These commands do not execute in the context of ReFrame.
Expand Down Expand Up @@ -609,6 +623,7 @@ def _rfm_init(self, name=None, prefix=None):
self.tags = set()
self.maintainers = []
self._perfvalues = {}
self.container_platform = None

# Strict performance check, if applicable
self.strict_check = True
Expand Down Expand Up @@ -1071,6 +1086,27 @@ def run(self):
if not self.current_system or not self._current_partition:
raise PipelineError('no system or system partition is set')

if self.container_platform:
try:
cp_name = type(self.container_platform).__name__
cp_env = self._current_partition.container_environs[cp_name]
except KeyError as e:
raise PipelineError(
'container platform not configured '
'on the current partition: %s' % e) from None

self.container_platform.validate()
self.container_platform.mount_points += [
(self._stagedir, self.container_platform.workdir)
]

# We replace executable and executable_opts in case of containers
self.executable = self.container_platform.launch_command()
self.executable_opts = []
prepare_container = self.container_platform.emit_prepare_commands()
if prepare_container:
self.pre_run += prepare_container

self.job.num_tasks = self.num_tasks
self.job.num_tasks_per_node = self.num_tasks_per_node
self.job.num_tasks_per_core = self.num_tasks_per_core
Expand All @@ -1083,8 +1119,20 @@ def run(self):
commands = [*self.pre_run, ' '.join(exec_cmd), *self.post_run]
user_environ = env.Environment(type(self).__name__,
self.modules, self.variables.items())
environs = [self._current_partition.local_env, self._current_environ,
user_environ, self._cdt_environ]
environs = [
self._current_partition.local_env,
self._current_environ,
user_environ,
self._cdt_environ
]
if self.container_platform and cp_env:
environs = [
self._current_partition.local_env,
self._current_environ,
cp_env,
user_environ,
self._cdt_environ
]

with os_ext.change_dir(self._stagedir):
try:
Expand Down
10 changes: 10 additions & 0 deletions reframe/core/systems.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ class SystemPartition:
_environs = fields.TypedField('_environs', typ.List[Environment])
_resources = fields.TypedField('_resources', typ.Dict[str, typ.List[str]])
_local_env = fields.TypedField('_local_env', Environment, type(None))
_container_environs = fields.TypedField('_container_environs',
typ.Dict[str, Environment])

# maximum concurrent jobs
_max_jobs = fields.TypedField('_max_jobs', int)
Expand All @@ -35,6 +37,7 @@ def __init__(self, name, descr=None, scheduler=None, launcher=None,
self._resources = dict(resources)
self._max_jobs = max_jobs
self._local_env = local_env
self._container_environs = {}

# Parent system
self._system = None
Expand All @@ -52,6 +55,10 @@ def descr(self):
def environs(self):
return utility.SequenceView(self._environs)

@property
def container_environs(self):
return utility.MappingView(self._container_environs)

@property
def fullname(self):
'''Return the fully-qualified name of this partition.
Expand Down Expand Up @@ -111,6 +118,9 @@ def launcher(self):
'''
return self._launcher

def add_container_env(self, env_name, environ):
self._container_environs[env_name] = environ

# Instantiate managed resource `name` with `value`.
def get_resource(self, name, **values):
ret = []
Expand Down
Loading