Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions ci-scripts/ci-runner.bash
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,18 @@ else

checked_exec ./test_reframe.py --rfm-user-config=config/cscs.py

echo "==================================="
echo "Running unit tests with PBS backend"
echo "==================================="

if [[ $(hostname) =~ dom ]]; then
PATH_save=$PATH
export PATH=/apps/dom/UES/karakasv/slurm-wrappers/bin:$PATH
checked_exec ./test_reframe.py --rfm-user-config=config/cscs-pbs.py
export PATH=$PATH_save
fi


# Find modified or added user checks
userchecks=( $(git log --name-status --oneline --no-merges -1 | \
awk '/^[AM]/ { print $2 } /^R0[0-9][0-9]/ { print $3 }' | \
Expand Down
161 changes: 161 additions & 0 deletions config/cscs-pbs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
#
# Minimal CSCS configuration for testing the PBS backend
#


class ReframeSettings:
_reframe_module = 'reframe'
_job_poll_intervals = [1, 2, 3]
_job_submit_timeout = 60
_checks_path = ['checks/']
_checks_path_recurse = True
_site_configuration = {
'systems': {
'dom': {
'descr': 'Dom TDS',
'hostnames': ['dom'],
'modules_system': 'tmod',
'resourcesdir': '/apps/common/regression/resources',
'partitions': {
'login': {
'scheduler': 'local',
'modules': [],
'access': [],
'environs': ['PrgEnv-cray', 'PrgEnv-gnu',
'PrgEnv-intel', 'PrgEnv-pgi'],
'descr': 'Login nodes',
'max_jobs': 4
},

'gpu': {
'scheduler': 'pbs+mpiexec',
'modules': ['daint-gpu'],
'access': ['proc=gpu'],
'environs': ['PrgEnv-cray', 'PrgEnv-gnu',
'PrgEnv-intel', 'PrgEnv-pgi'],
'descr': 'Hybrid nodes (Haswell/P100)',
'max_jobs': 100,
},

'mc': {
'scheduler': 'pbs+mpiexec',
'modules': ['daint-mc'],
'access': ['proc=mc'],
'environs': ['PrgEnv-cray', 'PrgEnv-gnu',
'PrgEnv-intel', 'PrgEnv-pgi'],
'descr': 'Multicore nodes (Broadwell)',
'max_jobs': 100,
},
}
},

'generic': {
'descr': 'Generic example system',
'partitions': {
'login': {
'scheduler': 'local',
'modules': [],
'access': [],
'environs': ['builtin-gcc'],
'descr': 'Login nodes'
}
}
}
},

'environments': {
'*': {
'PrgEnv-cray': {
'type': 'ProgEnvironment',
'modules': ['PrgEnv-cray'],
},

'PrgEnv-gnu': {
'type': 'ProgEnvironment',
'modules': ['PrgEnv-gnu'],
},

'PrgEnv-intel': {
'type': 'ProgEnvironment',
'modules': ['PrgEnv-intel'],
},

'PrgEnv-pgi': {
'type': 'ProgEnvironment',
'modules': ['PrgEnv-pgi'],
},

'builtin': {
'type': 'ProgEnvironment',
'cc': 'cc',
'cxx': '',
'ftn': '',
},

'builtin-gcc': {
'type': 'ProgEnvironment',
'cc': 'gcc',
'cxx': 'g++',
'ftn': 'gfortran',
}
}
},
}

_logging_config = {
'level': 'DEBUG',
'handlers': {
'reframe.log': {
'level': 'DEBUG',
'format': '[%(asctime)s] %(levelname)s: '
'%(check_info)s: %(message)s',
'append': False,
},

# Output handling
'&1': {
'level': 'INFO',
'format': '%(message)s'
},
'reframe.out': {
'level': 'INFO',
'format': '%(message)s',
'append': False,
}
}
}

@property
def version(self):
return self._version

@property
def reframe_module(self):
return self._reframe_module

@property
def job_poll_intervals(self):
return self._job_poll_intervals

@property
def job_submit_timeout(self):
return self._job_submit_timeout

@property
def checks_path(self):
return self._checks_path

@property
def checks_path_recurse(self):
return self._checks_path_recurse

@property
def site_configuration(self):
return self._site_configuration

@property
def logging_config(self):
return self._logging_config


settings = ReframeSettings()
1 change: 0 additions & 1 deletion config/cscs.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,6 @@ class ReframeSettings:
}
},

# Generic system used for cli unit tests
'generic': {
'descr': 'Generic example system',
'partitions': {
Expand Down
2 changes: 1 addition & 1 deletion cscs-checks/cuda/cuda_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def __init__(self, name, **kwargs):
self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-gnu']
self.sourcesdir = os.path.join(self.current_system.resourcesdir,
'CUDA', 'essentials')
self.modules = ['cudatoolkit']
self.modules = ['craype-accel-nvidia60']
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's irrelevant here. I should remove it.

self.maintainers = ['AJ', 'VK']
self.num_gpus_per_node = 1
self.tags = {'production'}
Expand Down
75 changes: 45 additions & 30 deletions docs/configure.rst
Original file line number Diff line number Diff line change
Expand Up @@ -128,36 +128,14 @@ The available partition attributes are the following:
* ``descr``: A detailed description of the partition (default is the partition name).

* ``scheduler``: The job scheduler and parallel program launcher combination that is used on this partition to launch jobs.
The syntax of this attribute is ``<scheduler>+<launcher>``. The available values for the job scheduler are the following:

* ``slurm``: Jobs on this partition will be launched using `Slurm <https://www.schedmd.com/>`__.
This scheduler relies on job accounting (``sacct`` command) in order to reliably query the job status.
* ``squeue``: Jobs on this partition will be launched using `Slurm <https://www.schedmd.com/>`__, but no job accounting is required.
The job status is obtained using the ``squeue`` command.
This scheduler is less reliable than the one based on the ``sacct`` command, but the framework does its best to query the job state as reliably as possible.
* ``local``: Jobs on this partition will be launched locally as OS processes.

The available values for the parallel program launchers are the following:

* ``srun``: Programs on this partition will be launched using a bare ``srun`` command *without* any job allocation options passed to it.
This launcher may only be used with the ``slurm`` scheduler.

* ``srunalloc``: Programs on this partition will be launched using the ``srun`` command *with* job allocation options passed automatically to it.
This launcher may also be used with the ``local`` scheduler.
* ``alps``: Programs on this partition will be launched using the ``aprun`` command.
* ``mpirun``: Programs on this partition will be launched using the ``mpirun`` command.
* ``mpiexec``: Programs on this partition will be launched using the ``mpiexec`` command.
* ``local``: Programs on this partition will be launched as-is without using any parallel program launcher.

There exist also the following aliases for specific combinations of job schedulers and parallel program launchers:

* ``nativeslurm``: This is equivalent to ``slurm+srun``.
* ``local``: This is equivalent to ``local+local``.
The syntax of this attribute is ``<scheduler>+<launcher>``.
A list of the supported `schedulers <#supported-scheduler-backends>`__ and `parallel launchers <#supported-parallel-launchers>`__ can be found at the end of this section.

* ``access``: A list of scheduler options that will be passed to the generated job script for gaining access to that logical partition (default ``[]``).

* ``environs``: A list of environments, with which ReFrame will try to run any regression tests written for this partition (default ``[]``).
The environment names must be resolved inside the ``environments`` section of the ``_site_configuration`` dictionary (see `Environments Configuration <#environments-configuration>`__ for more information).

* ``modules``: A list of modules to be loaded before running a regression test on that partition (default ``[]``).

* ``variables``: A set of environment variables to be set before running a regression test on that partition (default ``{}``).
Expand Down Expand Up @@ -227,20 +205,57 @@ The available partition attributes are the following:
}
}

.. note::
For the `PBS <#supported-scheduler-backends>`__ backend, options accepted in the ``access`` and ``resources`` attributes may either refer to actual ``qsub`` options or be just resources specifications to be passed to the ``-l select`` option.
The backend assumes a ``qsub`` option, if the options passed in these attributes start with a ``-``.

.. note::
.. versionchanged:: 2.8
A new syntax for the ``scheduler`` values was introduced as well as more parallel program launchers.
The old values for the ``scheduler`` key will continue to be supported.

.. note::
.. versionadded:: 2.8.1
The ``squeue`` backend scheduler was added.

.. note::
.. versionchanged:: 2.9
Better support for custom job resources.



Supported scheduler backends
============================

ReFrame supports the following job schedulers:


* ``slurm``: Jobs on the configured partition will be launched using `Slurm <https://www.schedmd.com/>`__.
This scheduler relies on job accounting (``sacct`` command) in order to reliably query the job status.
* ``squeue``: *[new in 2.8.1]*
Jobs on the configured partition will be launched using `Slurm <https://www.schedmd.com/>`__, but no job accounting is required.
The job status is obtained using the ``squeue`` command.
This scheduler is less reliable than the one based on the ``sacct`` command, but the framework does its best to query the job state as reliably as possible.

* ``pbs``: *[new in 2.13]* Jobs on the configured partition will be launched using a `PBS-based <https://en.wikipedia.org/wiki/Portable_Batch_System>`__ scheduler.
* ``local``: Jobs on the configured partition will be launched locally as OS processes.


Supported parallel launchers
============================

ReFrame supports the following parallel job launchers:

* ``srun``: Programs on the configured partition will be launched using a bare ``srun`` command *without* any job allocation options passed to it.
This launcher may only be used with the ``slurm`` scheduler.
* ``srunalloc``: Programs on the configured partition will be launched using the ``srun`` command *with* job allocation options passed automatically to it.
This launcher may also be used with the ``local`` scheduler.
* ``alps``: Programs on the configured partition will be launched using the ``aprun`` command.
* ``mpirun``: Programs on the configured partition will be launched using the ``mpirun`` command.
* ``mpiexec``: Programs on the configured partition will be launched using the ``mpiexec`` command.
* ``local``: Programs on the configured partition will be launched as-is without using any parallel program launcher.

There exist also the following aliases for specific combinations of job schedulers and parallel program launchers:

* ``nativeslurm``: This is equivalent to ``slurm+srun``.
* ``local``: This is equivalent to ``local+local``.


Environments Configuration
--------------------------

Expand Down
6 changes: 3 additions & 3 deletions reframe/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@


VERSION = '2.13-dev1'
_required_pyver = (3, 5, 0)
INSTALL_PREFIX = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
MIN_PYTHON_VERSION = (3, 5, 0)

# Check python version
if sys.version_info[:3] < _required_pyver:
if sys.version_info[:3] < MIN_PYTHON_VERSION:
sys.stderr.write('Unsupported Python version: '
'Python >= %d.%d.%d is required\n' % _required_pyver)
'Python >= %d.%d.%d is required\n' % MIN_PYTHON_VERSION)
sys.exit(1)


Expand Down
3 changes: 1 addition & 2 deletions reframe/core/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -811,8 +811,7 @@ def _setup_job(self, **job_opts):
pre_run=self.pre_run,
post_run=self.post_run,
sched_exclusive_access=self.exclusive_access,
**job_opts
)
**job_opts)

# Get job options from managed resources and prepend them to
# job_opts. We want any user supplied options to be able to
Expand Down
Loading