diff --git a/.gitignore b/.gitignore index 50e2935246..8da67fabdd 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,9 @@ var/ .installed.cfg *.egg +# Ignore Mac DS_Store files +.DS_Store + # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. diff --git a/docs/Makefile b/docs/Makefile index e5993e8b24..308f1faed0 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -2,22 +2,21 @@ # # You can set these variables from the command line. +BUILD_CMD = build +DOCS = docs +DOCS_ALL = _build/_allversions +MASTER = master PYTHON = python SPHINXOPTS = SPHINXBUILD = -msphinx SPHINXPROJ = ReFrame +SPHINX_VERS = sphinx-versioning SOURCEDIR = . BUILDDIR = $(VERSION) PANDOC = pandoc PANDOCOPTS = --columns 1000 RM = /bin/rm -rf - -ifeq ($(finstring "darwin", $(OSTYPE)), "darwin") -SYMLINK_DIR=ln -sfh -else -# Assume a GNU/Linux system here -SYMLINK_DIR=ln -sfn -endif +TAG_VERS = '^v\d+(\.\d+)*[a-z]*' MDS = about.md \ advanced.md \ @@ -58,24 +57,29 @@ TARGET_DOCS := \ coverage all: $(RSTS) + @$(SPHINX_VERS) -l conf.py build docs/ html/ + @rsync -az old/ html/_old/ + @./link_old_docs.sh + +latest: $(RSTS) @make html @touch html/.nojekyll - @echo 'Linking to old documentation pages' - @cd html && $(SYMLINK_DIR) ../old _old && cd - > /dev/null + @rsync -az old/ html/_old/ %.rst: %.md $(PANDOC) $(PANDOCOPTS) --from=markdown --to=rst --output=$(@) $(@:.rst=.md) clean: @echo 'Removing md files' - -$(RM) $(RSTS) doctrees + -$(RM) $(RSTS) distclean: clean @echo 'Removing directories' - -$(RM) $(TARGET_DOCS) + -$(RM) $(TARGET_DOCS) doctrees $(TARGET_DOCS): Makefile @$(PYTHON) $(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -.PHONY: clean Makefile + +.PHONY: all distclean clean latest Makefile diff --git a/docs/_templates/footer.html b/docs/_templates/footer.html index 5d5a86d276..7e8c93a7de 100644 --- a/docs/_templates/footer.html +++ b/docs/_templates/footer.html @@ -62,7 +62,7 @@
- +
diff --git a/docs/_templates/layout.html b/docs/_templates/layout.html index 4880c5d08f..77f9952134 100644 --- a/docs/_templates/layout.html +++ b/docs/_templates/layout.html @@ -12,9 +12,9 @@ Useful Links

{% endblock %} - - diff --git a/docs/conf.py b/docs/conf.py index 4d0a869421..c935730b5d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -18,6 +18,7 @@ # documentation root, use os.path.abspath to make it absolute, like shown here. # import os +import re import sys sys.path.insert(0, os.path.abspath('..')) from recommonmark.parser import CommonMarkParser @@ -74,9 +75,9 @@ # built documents. # # The short X.Y version. -version = '2.7' +version = '2.8' # The full version, including alpha/beta/rc tags. -release = '2.7' +release = '2.8' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -88,7 +89,7 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'old', 'html/_old'] +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'old', 'html/*'] # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' @@ -236,3 +237,8 @@ author, 'ReFrame', 'ReFrame is a new framework for writing regression tests for HPC systems.', 'Miscellaneous'), ] + + +# Versioning +scv_whitelist_branches = ('master',) +scv_whitelist_tags = (re.compile(r'^v\d+\.\d+(\.d+)?[a-z]?'),) diff --git a/docs/configure.md b/docs/configure.md index bbee58573d..adbb582faa 100644 --- a/docs/configure.md +++ b/docs/configure.md @@ -21,6 +21,7 @@ The following example shows a minimal configuration for the [Piz Daint](http://w 'daint': { 'descr': 'Piz Daint', 'hostnames': ['daint'], + 'modules_system': 'tmod', 'partitions': { 'login': { 'scheduler': 'local', @@ -88,6 +89,8 @@ The valid attributes of a system are the following: * `descr`: A detailed description of the system (default is the system name). * `hostnames`: This is a list of hostname patterns that will be used by ReFrame when it tries to [auto-detect](#system-auto-detection) the current system (default `[]`). +* `modules_system`: _(New in version 2.8)_ The modules system that should be used for loading environment modules on this system. + The only available modules system backend is currently `tmod`, which corresponds to the [TCL implementation](http://modules.sourceforge.net/) of the environment modules (default `None`). * `prefix`: Default regression prefix for this system (default `.`). * `stagedir`: Default stage directory for this system (default `None`). * `outputdir`: Default output directory for this system (default `None`). @@ -109,16 +112,33 @@ The partitions of a system are defined similarly to systems as a set of key/valu The available partition attributes are the following: * `descr`: A detailed description of the partition (default is the partition name). -* `scheduler`: The job scheduler to use for launching jobs on this partition. - Available values are the following: - * `local` (**default**): Jobs on this partition will be launched locally as OS processes. - When a job is launched with this scheduler, ReFrame will create a wrapper shell script for running the check on the local machine. - * `nativeslurm`: Jobs on this partition will be launched using Slurm and the `srun` command for creating MPI processes. - * `slurm+alps`: Jobs on this partition will be launched using Slurm and the `aprun` command for creating MPI processes. + +* `scheduler`: _(Changed in version 2.8)_ The job scheduler and parallel program launcher combination that is used on this partition to launch jobs. + The syntax of this attribute is `+`. + The available values for the job scheduler are the following: + - `slurm`: Jobs on this partition will be launched using [Slurm](https://www.schedmd.com/). + - `local`: Jobs on this partition will be launched locally as OS processes. + + The available values for the parallel program launchers are the following: + - `srun`: Programs on this partition will be launched using a bare `srun` command *without* any job allocation options passed to it. + This launcher may only be used with the `slurm` scheduler. + - `srunalloc`: Programs on this partition will be launched using the `srun` command *with* job allocation options passed automatically to it. + This launcher may also be used with the `local` scheduler. + - `alps`: Programs on this partition will be launched using the `aprun` command. + - `mpirun`: Programs on this partition will be launched using the `mpirun` command. + - `mpiexec`: Programs on this partition will be launched using the `mpiexec` command. + - `local`: Programs on this partition will be launched as-is without using any parallel program launcher. + + There exist also the following aliases for specific combinations of job schedulers and parallel program launchers: + - `nativeslurm`: This is equivalent to `slurm+srun`. + - `local`: This is equivalent to `local+local`. + * `access`: A list of scheduler options that will be passed to the generated job script for gaining access to that logical partition (default `[]`). + * `environs`: A list of environments, with which ReFrame will try to run any regression tests written for this partition (default `[]`). The environment names must be resolved inside the `environments` section of the `_site_configuration` dictionary (see [Environments Configuration](#environments-configuration) for more information). * `modules`: A list of modules to be loaded before running a regression test on that partition (default `[]`). + * `variables`: A set of environment variables to be set before running a regression test on that partition (default `{}`). Environment variables can be set as follows (notice that both the variable name and its value are strings): @@ -128,9 +148,11 @@ The available partition attributes are the following: 'OTHER': 'foo' } ``` + * `max_jobs`: The maximum number of concurrent regression tests that may be active (not completed) on this partition. This option is relevant only when Reframe executes with the [asynchronous execution policy](running.html#asynchronous-execution-of-regression-checks). -* `resources`: A set of custom resource specifications and how these can be requested from the partition's scheduler (default `{}`). + +* `resources`: _(Changed in version 2.8)_ A set of custom resource specifications and how these can be requested from the partition's scheduler (default `{}`). This variable is a set of key/value pairs with the key being the resource name and the value being a list of options to be passed to the partition's job scheduler. The option strings can contain "references" to the resource being required using the syntax `{resource_name}`. In such cases, the `{resource_name}` will be replaced by the value of that resource defined in the regression test that is being run. @@ -144,11 +166,19 @@ The available partition attributes are the following: } ``` -When ReFrame will run a test that defines `self.num_gpus_per_node = 8`, the generated job script will have the following line in its preamble: +A regression test then may request this resource as follows: + +```python +self.extra_resources = {'num_gpus_per_node': '8'} +``` + +and the generated job script will have the following line in its preamble: ```bash #SBATCH --gres=gpu:8 ``` +Refer to the [reference guide](reference.html#reframe.core.pipeline.RegressionTest.extra_resources) for more information on the use of the `extra_resources` regression test attribute. + ## Environments Configuration The environments available for testing in different systems are defined under the `environments` key of the top-level `_site_configuration` dictionary. diff --git a/docs/index.rst b/docs/index.rst index 1dd0b09187..af5f6baf74 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -14,7 +14,7 @@ Reframe also offers a high-level and flexible abstraction for writing sanity and Writing system regression tests in a high-level modern programming language, like Python, poses a great advantage in organizing and maintaining the tests. Users can create their own test hierarchies or test factories for generating multiple tests at the same time and they can also customize them in a simple and expressive way. -For versions 2.6.1 and older, please refer to `this documentation <_old/index.html>`__. +For versions 2.6.1 and older, please refer to `this documentation `__. Use Cases ========= diff --git a/docs/link_old_docs.sh b/docs/link_old_docs.sh new file mode 100755 index 0000000000..e54e1852d2 --- /dev/null +++ b/docs/link_old_docs.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +prefix=${1:-html} + +if [[ x$OSTYPE =~ x"darwin" ]]; then + symlink="ln -sfh" +else + symlink="ln -sfn" +fi + +link_old_docs() +{ + echo "linking old docs for $1 ..." + cd $1 && $symlink ../_old _old && cd - > /dev/null +} + + +link_old_docs $prefix/master + +for d in $prefix/v*; do + # Verify that $d is actually a version + if ! echo $(basename $d) | grep -e 'v[0-9]\+\.[0-9]\+' > /dev/null; then + echo Skipping non version directory $d ... + continue + fi + + link_old_docs $d +done diff --git a/docs/pipeline.md b/docs/pipeline.md index 728e4dd915..03bd3c6377 100644 --- a/docs/pipeline.md +++ b/docs/pipeline.md @@ -66,11 +66,9 @@ At this phase, all these directories are created. ### Prepare a job for the test At this point a _job descriptor_ will be created for the test. A job descriptor in ReFrame is an abstraction of the job scheduler's functionality relevant to the regression framework. It is responsible for submitting a job in a job queue and waiting for its completion. -Currently, the ReFrame framework supports three job scheduler backends: +ReFrame supports two job scheduler backends that can be combined with several different parallel program launchers. +For a complete list of the job scheduler/parallel launchers combinations, please refer to ["Partition Configuration"](configure.html#partition-configuration). -* __local__, which is basically a *pseudo-scheduler* that just spawns local OS processes, -* __nativeslurm__, which is the native [Slurm](https://slurm.schedmd.com) job scheduler and -* __slurm+alps__, which uses [Slurm](https://slurm.schedmd.com) for job submission, but uses [Cray's ALPS](http://docs.cray.com/books/S-2529-116//S-2529-116.pdf) for launching MPI processes on the compute nodes. ## 2. The Compilation Phase diff --git a/docs/reference.rst b/docs/reference.rst index 56bc8e8051..965525f7c8 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -20,3 +20,7 @@ Reference Guide .. automodule:: reframe.core.launchers :members: :show-inheritance: + +.. automodule:: reframe.core.launchers.registry + :members: + :show-inheritance: diff --git a/docs/requirements.txt b/docs/requirements.txt index f8d55db5c3..9101e49b86 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -25,6 +25,7 @@ sphinx-bootstrap-theme>=0.5.3 sphinx-fakeinv>=1.0.0 sphinx-rtd-theme>=0.2.4 sphinxcontrib-websupport>=1.0.1 +sphinxcontrib-versioning>=2.2.1 tornado>=4.5.2 urllib3>=1.22 watchdog>=0.8.3 diff --git a/docs/running.md b/docs/running.md index 4e14f69c07..6706789c71 100644 --- a/docs/running.md +++ b/docs/running.md @@ -34,7 +34,7 @@ An example listing of checks is the following that lists all the tests found und The ouput looks like: ``` Command line: ./bin/reframe -c tutorial/ -l -Reframe version: 2.6.1 +Reframe version: 2.7 Launched by user: karakasv Launched on host: daint103 Reframe paths @@ -90,7 +90,7 @@ To run the regression tests you should specify the _run_ action though the `-r` The output of the regression run looks like the following: ``` Command line: ./bin/reframe -c tutorial/example1.py -r -Reframe version: 2.6.1 +Reframe version: 2.7 Launched by user: karakasv Launched on host: daint103 Reframe paths @@ -218,7 +218,7 @@ For example, you can select only the `example7_check` from the tutorial as follo ``` Command line: ./bin/reframe -c tutorial/ -n example7_check -l -Reframe version: 2.6.1 +Reframe version: 2.7 Launched by user: karakasv Launched on host: daint103 Reframe paths @@ -239,7 +239,7 @@ Similarly, you can exclude this test by passing the `-x example7_check` option: ``` Command line: ./bin/reframe -c tutorial/ -x example7_check -l -Reframe version: 2.6.1 +Reframe version: 2.7 Launched by user: karakasv Launched on host: daint103 Reframe paths @@ -357,7 +357,7 @@ This is useful if you want to check the directories that ReFrame will create. ``` ``` Command line: ./bin/reframe --prefix /foo -t foo -l -Reframe version: 2.6.1 +Reframe version: 2.7 Launched by user: karakasv Launched on host: daint103 Reframe paths @@ -429,7 +429,7 @@ _logging_config = { 'reframe.log' : { 'level' : 'DEBUG', 'format' : '[%(asctime)s] %(levelname)s: ' - '%(check_name)s: %(message)s', + '%(testcase_name)s: %(message)s', 'append' : False, }, @@ -465,11 +465,15 @@ The configurable properties of a log record handler are the following: * `level` (default: `'debug'`): The lowest level of log records that this handler can process. * `format` (default: `'%(message)s'`): Format string for the printout of the log record. - ReFrame supports all the [format strings](https://docs.python.org/3.6/library/logging.html#logrecord-attributes) from Python's logging library and provides two additional ones: + ReFrame supports all the [format strings](https://docs.python.org/3.6/library/logging.html#logrecord-attributes) from Python's logging library and provides the following additional ones: - `check_name`: Prints the name of the regression test on behalf of which ReFrame is currently executing. If ReFrame is not in the context of regression test, `reframe` will be printed. - `check_jobid`: Prints the job or process id of the job or process associated with currently executing regression test. If a job or process is not yet created, `-1` will be printed. + - `testcase_name`: Print the name of the test case that is currently executing. + Test case is essentially a tuple consisting of the test name, the current system and partition and the current programming envrinoment. + This format string prints out like `@ using `. + * `datefmt` (default: `'%FT%T'`) The format that will be used for outputting timestamps (i.e., the `%(asctime)s` field). Acceptable formats must conform to standard library's [time.strftime()](https://docs.python.org/3.6/library/time.html#time.strftime) function. * `append` (default: `False`) Controls whether ReFrame should append to this file or not. @@ -483,15 +487,14 @@ The configurable properties of a log record handler are the following: ReFrame supports additional logging for performance tests specifically, in order to record historical performance data. For each performance test, a log file of the form `.log` is created under the ReFrame's [log directory](#configuring-reframe-directories) where the test's performance is recorded. -The default format used for this file is `'[%(asctime)s] %(check_name)s (jobid=%(check_jobid)s): %(message)s'` and ReFrame always appends to this file. +The default format used for this file is `'[%(asctime)s] %(testcase_name)s (jobid=%(check_jobid)s): %(message)s'` and ReFrame always appends to this file. Currently, it is not possible for users to configure performance logging. The resulting log file looks like the following: ``` -[2017-10-21T00:48:42] example7_check (jobid=4073910): value: 49.253851, reference: (50.0, -0.1, 0.1) -[2017-10-24T21:19:21] example7_check (jobid=4163846): value: 49.690761, reference: (50.0, -0.1, 0.1) -[2017-10-24T21:19:33] example7_check (jobid=4163852): value: 50.037254, reference: (50.0, -0.1, 0.1) -[2017-10-24T21:20:00] example7_check (jobid=4163856): value: 49.622199, reference: (50.0, -0.1, 0.1) +[2017-12-01T15:31:20] example7_check@daint:gpu using PrgEnv-cray (jobid=649790): value: 47.797996, reference: (50.0, -0.1, 0.1) +[2017-12-01T15:31:24] example7_check@daint:gpu using PrgEnv-gnu (jobid=649791): value: 49.048228, reference: (50.0, -0.1, 0.1) +[2017-12-01T15:31:24] example7_check@daint:gpu using PrgEnv-pgi (jobid=649792): value: 48.575334, reference: (50.0, -0.1, 0.1) ``` The interpretation of the performance values depends on the individual tests. @@ -516,7 +519,7 @@ Here is an example output of ReFrame using asynchronous execution policy: ``` ommand line: ./reframe.py -c tutorial/ --exec-policy=async -r -Reframe version: 2.6.1 +Reframe version: 2.7 Launched by user: karakasv Launched on host: daint104 Reframe paths diff --git a/docs/started.md b/docs/started.md index 4ddce0f3db..73192062c3 100644 --- a/docs/started.md +++ b/docs/started.md @@ -1,22 +1,22 @@ # Getting Started ## Requirements + * Python 3.5 or higher. Python 2 is not supported. -* A functional Tcl [modules](http://modules.sourceforge.net/) software management environment with Python bindings. - The following need to be present or functional: - - `MODULESHOME` variable must be set. - - `modulecmd python` must be supported. + +> _Changed in version 2.8:_ A functional TCL modules system is no more required. +> ReFrame can now operate without a modules system at all. + ### Optional -* The [nose](https://pypi.python.org/pypi/nose) Python module must be installed if you want to run the unit tests of the framework. -* If you want to use the framework for launching tests on a cluster, a functional job submission management system is required. - Currently only [Slurm](https://www.schedmd.com/) is supported with either a native Slurm job launcher or the Cray ALPS launcher. - * In the case of Slurm, job accounting storage (`sacct` command) is required. +* For running the unit tests of the framework, the [nose](https://pypi.python.org/pypi/nose) Python module is needed. + You are advised to run the [unit tests](#running-the-unit-tests) of the framework after installing it on a new system to make sure that everything works fine. + ## Getting the Framework To get the latest stable version of the framework, you can just clone it from the [github](https://github.com/eth-cscs/reframe) project page: @@ -61,7 +61,6 @@ You will notice in the output that all the job submission related tests have bee The test suite detects if the current system has a job submission system and is configured for ReFrame (see [Configuring ReFrame for your site](configure.html)) and it will skip all the unsupported unit tests. As soon as you configure ReFrame for your system, you can rerun the test suite to check that job submission unit tests pass as well. Note here that some unit tests may still be skipped depending on the configured job submission system. -For example, the Slurm+ALPS tests will also be skipped on a system configured with native SLURM. ## Where to Go from Here diff --git a/docs/tutorial.md b/docs/tutorial.md index c7917ca39e..d5de0ce952 100644 --- a/docs/tutorial.md +++ b/docs/tutorial.md @@ -117,7 +117,7 @@ def _get_checks(**kwargs): ``` A regression test written in ReFrame is essentially a Python class that must eventually derive from `reframe.core.pipeline.RegressionTest`. -In order to make the test available to the framework, every file defining regression tests must define the special function `_get_checks()`, which it should return a list of instantiated regression tests. +In order to make the test available to the framework, every file defining regression tests must define the special function `_get_checks()`, which should return a list of instantiated regression tests. This method will be called by the framework upon loading your file, in order to retrieve the regression tests defined. The framework will pass some special arguments to the `_get_checks()` function through the `kwargs` parameter, which are needed for the correct initialization of the regression test. @@ -131,7 +131,7 @@ class SerialTest(RegressionTest): The `__init__()` method is the constructor of your test. It is usually the only method you need to implement for your tests, especially if you don't want to customize any of the regression test pipeline stages. -The first statement in the `SerialTest` constructor calls the constructor of the base class, passing it as arguments the name of the regression test (`example1_check` here), the prefix of the test (the directory that the regression test file resides) and any other arguments passed to the `SerialTest`'s constructor. +The first statement in the `SerialTest` constructor calls the constructor of the base class, passing as arguments the name of the regression test (`example1_check` here), the path to the test directory and any other arguments passed to the `SerialTest`'s constructor. You can consider these first three lines and especially the way you should call the constructor of the base class, as boilerplate code. As you will see, it remains the same across all our examples, except, of course, for the check name. @@ -181,7 +181,7 @@ The exact compiler that is going to be used depends on the programming environme For example, given our configuration, if it is run with `PrgEnv-cray`, the Cray C compiler will be used, if it is run with `PrgEnv-gnu`, the GCC compiler will be used etc. A user can associate compilers with programming environments in the ReFrame's [settings file](configure.html#the-configuration-file). -The next line in our first regression test specifies a list of options to be used for running the generated executable: +The next line in our first regression test specifies a list of options to be used for running the generated executable (the matrix dimension and the number of iterations in this particular example): ```python self.executable_opts = ['1024', '100'] @@ -241,7 +241,7 @@ Here we will only show you how to run a specific tutorial test: If everything is configured correctly for your system, you should get an output similar to the following: ``` -Reframe version: X.X.X +Reframe version: 2.7 Launched by user: Launched on host: daint104 Reframe paths @@ -336,7 +336,7 @@ def _get_checks(**kwargs): return [OpenMPTestIfElse(**kwargs)] ``` -There are two new things introduced with this example: +This example introduces two new concepts: 1. We need to set the `OMP_NUM_THREADS` environment variable, in order to specify the number of threads to use with our program. 2. We need to specify different flags for the different compilers provided by the programming environments we are testing. @@ -670,8 +670,8 @@ The first thing we do is to extract the norm printed in the standard output. ``` The `extractsingle()` sanity function extracts some information from a single occurrence (by default the first) of a pattern in a filename. -In our case, this function will extract the `norm` [group](https://docs.python.org/3.6/library/re.html#regular-expression-syntax) from the match of the regular expression `r'The L2 norm of the resulting vector is:\s+(?P\S+)'` in standard output, it will convert it to float and it will return it. -Unnamed groups in regular expressions are also supported, which you can reference them by their group number. +In our case, this function will extract the `norm` [capturing group](https://docs.python.org/3.6/library/re.html#regular-expression-syntax) from the match of the regular expression `r'The L2 norm of the resulting vector is:\s+(?P\S+)'` in standard output, it will convert it to float and it will return it. +Unnamed capturing groups in regular expressions are also supported, which you can reference by their group number. For example, we could have written the same statement as follows: ```python @@ -679,9 +679,9 @@ For example, we could have written the same statement as follows: r'The L2 norm of the resulting vector is:\s+(\S+)', self.stdout, 1, float) ``` -Notice that we replaced the `'norm'` argument with `1`, which is the group number. +Notice that we replaced the `'norm'` argument with `1`, which is the capturing group number. -> NOTE: In regular expressions, group `0` is always the whole match. +> NOTE: In regular expressions, capturing group `0` corresponds always to the whole match. > In sanity functions dealing with regular expressions, this will yield the whole line that matched. A useful counterpart of `extractsingle()` is the `extractall()` function, which instead of a single occurrence, returns a list of all the occurrences found. @@ -780,7 +780,7 @@ Let's have a closer look at each of them: The `perf_patterns` attribute is a dictionary, whose keys are _performance variables_ (i.e., arbitrary names assigned to the performance values we are looking for), and its values are _sanity expressions_ that specify how to obtain these performance values from the output. A sanity expression is a Python expression that uses the result of one or more _sanity functions_. -In our example, we name the performance value we are looking for simply as `perf` and we extract its value by converting to float the regex group named `Gflops` from the line that was matched in the standard output. +In our example, we name the performance value we are looking for simply as `perf` and we extract its value by converting to float the regex capturing group named `Gflops` from the line that was matched in the standard output. Each of the performance variables defined in `perf_patterns` must be resolved in the `reference` dictionary of reference values. When the framework obtains a performance value from the output of the test it searches for a reference value in the `reference` dictionary, and then it checks whether the user supplied tolerance is respected. diff --git a/docs/usecases.md b/docs/usecases.md index d544ed5fc0..9c15954e52 100644 --- a/docs/usecases.md +++ b/docs/usecases.md @@ -35,17 +35,17 @@ A bit of this effect can be seen in the following Table where the total amount o We also present a snapshot of the first public release of ReFrame ([v2.2](https://github.com/eth-cscs/reframe/releases/tag/v2.2)). -Maintenance Burden | Shell-Script Based | ReFrame (v2.2) | ReFrame (v2.7) | ---------------------------|--------------------|----------------|----------------| -Total tests | 179 | 122 | 172 | -Total size of tests | 14635 loc | 2985 loc | 4493 loc | -Avg. test file size | 179 loc | 93 loc | 87 loc | -Avg. effective test size | 179 loc | 25 loc | 25 loc | +Maintenance Burden | Shell-Script Based | ReFrame (May 2017) | ReFrame (Nov 2017) | +--------------------------|--------------------|--------------------|--------------------| +Total tests | 179 | 122 | 172 | +Total size of tests | 14635 loc | 2985 loc | 4493 loc | +Avg. test file size | 179 loc | 93 loc | 87 loc | +Avg. effective test size | 179 loc | 25 loc | 25 loc | The difference in the total amount of regression test code is dramatic. From the 15K lines of code of the old shell script based regression testing suite, ReFrame tests use only 3K lines of code (first release) achieving a higher coverage. -> NOTE: The higher test count of the older suite refers to test cases, i.e., running the same test for different programming environments, whereas for ReFrame the counts does not account for this. +> NOTE: The higher test count of the older suite refers to test cases, i.e., running the same test for different programming environments, whereas for ReFrame the counts do not account for this. Each regression test file in ReFrame is 80–90 loc on average. However, eash regression test file may contain or generate more than one related tests, thus leading to the effective decrease of the line count per test to only 25 loc. diff --git a/reframe/__init__.py b/reframe/__init__.py index 055d32ae7a..1aae6e4723 100644 --- a/reframe/__init__.py +++ b/reframe/__init__.py @@ -14,27 +14,3 @@ sys.stderr.write('Unsupported Python version: ' 'Python >= %d.%d.%d is required\n' % required_version) sys.exit(1) - - -if 'MODULESHOME' not in os.environ: - sys.stderr.write('MODULESHOME is not set. ' - 'Do you have modules framework installed? Exiting...\n') - sys.exit(1) - - -MODULECMD = 'modulecmd' -MODULECMD_PYTHON = MODULECMD + ' python' -try: - _completed = subprocess.run(args=MODULECMD_PYTHON.split(), - stderr=subprocess.PIPE, - universal_newlines=True) - if re.search('Unknown shell type', _completed.stderr, re.MULTILINE): - sys.stderr.write( - 'Python is not supported by this modules framework.\n') - sys.exit(1) - -except OSError: - # modulecmd was not found - sys.stderr.write("Could not run modulecmd. Tried `%s' and failed.\n" % - MODULECMD_PYTHON) - sys.exit(1) diff --git a/reframe/core/debug.py b/reframe/core/debug.py index aad3cc2684..fa77d85e16 100644 --- a/reframe/core/debug.py +++ b/reframe/core/debug.py @@ -45,15 +45,13 @@ def repr(obj, indent=4, max_depth=2): if _depth[tid] == max_depth: attr_list = ['...'] else: - attr_list = ['%s%s=%r' % (indent_width * ' ', attr, val) + attr_list = ['{0}={1}'.format(attr, val) for attr, val in sorted(obj.__dict__.items())] - repr_fmt = '%(module_name)s.%(class_name)s(%(attr_repr)s)@0x%(addr)x' - ret = repr_fmt % { - 'module_name': obj.__module__, - 'class_name': type(obj).__name__, - 'attr_repr': ',\n'.join(attr_list), - 'addr': id(obj) - } + repr_fmt = '{module_name}.{class_name}({attr_repr})@0x{addr:x}' + ret = repr_fmt.format(module_name=obj.__module__, + class_name=type(obj).__name__, + attr_repr=', '.join(attr_list), + addr=id(obj)) _decrease_indent() return ret diff --git a/reframe/core/environments.py b/reframe/core/environments.py index 4b04c5acbe..c45e38961c 100644 --- a/reframe/core/environments.py +++ b/reframe/core/environments.py @@ -8,7 +8,7 @@ CommandError, CompilationError) from reframe.core.fields import * -from reframe.core.modules import * +from reframe.core.modules import get_modules_system class Environment: @@ -77,11 +77,12 @@ def set_variable(self, name, value): def load(self): # conflicted module list must be filled at the time of load for m in self._modules: - if module_present(m): + if get_modules_system().is_module_loaded(m): self._preloaded.add(m) - self._conflicted += module_force_load(m) + self._conflicted += get_modules_system().load_module(m, force=True) for conflict in self._conflicted: + # FIXME: explicit modules system commands are no more portable self._load_stmts += ['module unload %s' % conflict] self._load_stmts += ['module load %s' % m] @@ -107,11 +108,11 @@ def unload(self): # Unload modules in reverse order for m in reversed(self._modules): if m not in self._preloaded: - module_unload(m) + get_modules_system().unload_module(m) # Reload the conflicted packages, previously removed for m in self._conflicted: - module_load(m) + get_modules_system().load_module(m) self._loaded = False @@ -145,8 +146,9 @@ def __repr__(self): return debug.repr(self) def __str__(self): - return ('Name: %s\nModules: %s\nEnvironment: %s' % - (self._name, self._modules, self._variables)) + ret = "{0}(name='{1}', modules={2}, variables={3})" + return ret.format(type(self).__name__, self.name, + self.modules, self.variables) def swap_environments(src, dst): @@ -157,7 +159,7 @@ def swap_environments(src, dst): class EnvironmentSnapshot(Environment): def __init__(self, name='env_snapshot'): self._name = name - self._modules = module_list() + self._modules = get_modules_system().loaded_modules() self._variables = dict(os.environ) self._conflicted = [] diff --git a/reframe/core/exceptions.py b/reframe/core/exceptions.py index f02d5119ab..45ea555d1c 100644 --- a/reframe/core/exceptions.py +++ b/reframe/core/exceptions.py @@ -109,10 +109,6 @@ class JobSubmissionError(CommandError): pass -class JobResourcesError(ReframeError): - pass - - class ReframeDeprecationWarning(DeprecationWarning): """Warning for deprecated features of the ReFrame framework.""" diff --git a/reframe/core/fields.py b/reframe/core/fields.py index 9a8778c096..494c12ca9e 100644 --- a/reframe/core/fields.py +++ b/reframe/core/fields.py @@ -23,7 +23,11 @@ def __repr__(self): return debug.repr(self) def __get__(self, obj, objtype): - return obj.__dict__[self._name] + try: + return obj.__dict__[self._name] + except KeyError: + raise AttributeError("%s object has no attribute '%s'" % + (objtype.__name__, self._name)) def __set__(self, obj, value): obj.__dict__[self._name] = value @@ -281,11 +285,16 @@ def __set__(self, obj, value): super().__set__(obj, copy.deepcopy(value)) -class ReadOnlyField(Field): - """Holds a read-only field. Attempts to set it will raise an exception""" +class ConstantField(Field): + """Holds a constant. + + Attempt to set it will raise an exception. + + :arg value: the value of this field. + """ def __init__(self, value): - super().__init__('_readonly_') + super().__init__('__readonly') self._value = value def __get__(self, obj, objtype): diff --git a/reframe/core/launchers.py b/reframe/core/launchers.py deleted file mode 100644 index 8303ebf750..0000000000 --- a/reframe/core/launchers.py +++ /dev/null @@ -1,161 +0,0 @@ -import abc -import math -import reframe.core.debug as debug - -from reframe.core.fields import TypedListField - - -class JobLauncher(abc.ABC): - """A job launcher. - - A job launcher is the executable that actually launches a distributed - program to multiple nodes, e.g., ``mpirun``, ``srun`` etc. - - This is an abstract class. - Users may not instantiate this class directly. - - :arg job: The job descriptor to associate with this launcher. - The launcher may need the job descriptor in order to obtain information - for the job submission. - Users needing to create a launcher inside a :class:`RegressionTest` - should always pass ``self.job`` to this argument. - :type job: :class:`reframe.core.schedulers.Job` - :arg options: Options to be passed to the launcher invocation. - :type options: :class:`list` of :class:`str` - """ - - #: List of options to be passed to the job launcher - #: - #: :type: :class:`list` of :class:`str` - #: :default: ``[]`` - options = TypedListField('options', str) - - def __init__(self, job, options=[]): - self._job = job - self.options = list(options) - - def __repr__(self): - return debug.repr(self) - - @property - def job(self): - """The job descriptor associated with this launcher. - - :type: :class:`reframe.core.schedulers.Job` - """ - return self._job - - @property - @abc.abstractmethod - def executable(self): - """The executable name of this launcher. - - :type: :class:`str` - """ - - @property - def fixed_options(self): - """Options to be always passed to this job launcher's executable. - - :type: :class:`list` of :class:`str` - :default: ``[]`` - """ - return [] - - def emit_run_command(self, target_executable, builder, **builder_opts): - options = ' '.join(self.fixed_options + self.options) - return builder.verbatim('%s %s %s' % - (self.executable, options, target_executable), - **builder_opts) - - -class NativeSlurmLauncher(JobLauncher): - @property - def executable(self): - return 'srun' - - -class AlpsLauncher(JobLauncher): - @property - def executable(self): - return 'aprun' - - @property - def fixed_options(self): - return ['-B'] - - -class LauncherWrapper(JobLauncher): - """Wrap a launcher object so as to modify its invocation. - - This is useful for parallel debuggers. - For example, to launch a regression test using the DDT debugger, you can do - the following: - - .. - def setup(self, partition, environ, **job_opts): - super().setup(partition, environ, **job_opts) - self.job.launcher = LauncherWrapper(self.job.launcher, 'ddt', - ['--offline']) - - :arg target_launcher: The launcher to wrap. - :arg wrapper_command: The wrapper command. - :arg wrapper_options: List of options to pass to the wrapper command. - """ - - def __init__(self, target_launcher, wrapper_command, wrapper_options=[]): - super().__init__(target_launcher.job, target_launcher.options) - self._target_launcher = target_launcher - self._wrapper_command = wrapper_command - self._wrapper_options = list(wrapper_options) - - @property - def executable(self): - return self._wrapper_command - - @property - def fixed_options(self): - return (self._wrapper_options + - [self._target_launcher.executable] + - self._target_launcher.fixed_options) - - -class LocalLauncher(JobLauncher): - @property - def executable(self): - return '' - - def emit_run_command(self, cmd, builder, **builder_opts): - # Just emit the command - return builder.verbatim(cmd, **builder_opts) - - -class VisitLauncher(JobLauncher): - """ReFrame launcher for the `VisIt `__ visualization - software. - """ - - def __init__(self, job, options=[]): - super().__init__(job, options) - if self._job: - # The self._job.launcher must be stored at the moment of the - # VisitLauncher construction, because the user will afterwards set - # the newly created VisitLauncher as new self._job.launcher! - self._target_launcher = self._job.launcher - - @property - def executable(self): - return 'visit' - - @property - def fixed_options(self): - options = [] - if (self._target_launcher and - not isinstance(self._target_launcher, LocalLauncher)): - num_nodes = math.ceil( - self._job.num_tasks / self._job.num_tasks_per_node) - options.append('-np %s' % self._job.num_tasks) - options.append('-nn %s' % num_nodes) - options.append('-l %s' % self._target_launcher.executable) - - return options diff --git a/reframe/core/launchers/__init__.py b/reframe/core/launchers/__init__.py new file mode 100644 index 0000000000..b83529eb0e --- /dev/null +++ b/reframe/core/launchers/__init__.py @@ -0,0 +1,87 @@ +import abc + +from reframe.core.fields import StringField, TypedListField + + +class JobLauncher(abc.ABC): + """A job launcher. + + A job launcher is the executable that actually launches a distributed + program to multiple nodes, e.g., ``mpirun``, ``srun`` etc. + + .. note:: + This is an abstract class. + Regression tests may not instantiate this class directly. + + .. note:: + .. versionchanged:: 2.8 + Job launchers do not get a reference to a job during their + initialization. + """ + + #: List of options to be passed to the job launcher invocation. + #: + #: :type: :class:`list` of :class:`str` + #: :default: ``[]`` + options = TypedListField('options', str) + + def __init__(self, options=[]): + self.options = list(options) + + @abc.abstractmethod + def command(self, job): + # The launcher command. + # + # :arg job: A :class:`reframe.core.schedulers.Job` that may be used by + # this launcher to properly emit its options. + # Subclasses may override this method and emit options according the + # num of tasks associated to the job etc. + # :returns: a list of command line arguments (including the launcher + # executable). + pass + + def emit_run_command(self, job, builder): + return builder.verbatim( + ' '.join(self.command(job) + self.options + [job.command])) + + +class LauncherWrapper(JobLauncher): + """Wrap a launcher object so as to modify its invocation. + + This is useful for parallel debuggers. + For example, to launch a regression test using the `DDT + `_ debugger, you can do the + following: + + :: + + def setup(self, partition, environ, **job_opts): + super().setup(partition, environ, **job_opts) + self.job.launcher = LauncherWrapper(self.job.launcher, 'ddt', + ['--offline']) + + If the current system partition uses native Slurm for job submission, this + setup will generate the following command in the submission script: + + :: + + ddt --offline srun + + If the current partition uses ``mpirun`` instead, it will generate + + :: + + ddt --offline mpirun -np ... + + :arg target_launcher: The launcher to wrap. + :arg wrapper_command: The wrapper command. + :arg wrapper_options: List of options to pass to the wrapper command. + """ + + def __init__(self, target_launcher, wrapper_command, wrapper_options=[]): + super().__init__(target_launcher.options) + self._target_launcher = target_launcher + self._wrapper_command = [wrapper_command] + list(wrapper_options) + + def command(self, job): + return self._wrapper_command + self._target_launcher.command(job) diff --git a/reframe/core/launchers/local.py b/reframe/core/launchers/local.py new file mode 100644 index 0000000000..51a49835c9 --- /dev/null +++ b/reframe/core/launchers/local.py @@ -0,0 +1,13 @@ +from reframe.core.launchers import JobLauncher + +from reframe.core.launchers.registry import register_launcher + + +@register_launcher('local', local=True) +class LocalLauncher(JobLauncher): + def __init__(self, options=[]): + # Ignore options passed by users + super().__init__([]) + + def command(self, job): + return [] diff --git a/reframe/core/launchers/mpi.py b/reframe/core/launchers/mpi.py new file mode 100644 index 0000000000..1fd7197090 --- /dev/null +++ b/reframe/core/launchers/mpi.py @@ -0,0 +1,85 @@ +import math + +from reframe.core.launchers import JobLauncher +from reframe.core.launchers.registry import register_launcher + + +@register_launcher('srun') +class SrunLauncher(JobLauncher): + def command(self, job): + return ['srun'] + + +@register_launcher('alps') +class AlpsLauncher(JobLauncher): + def command(self, job): + return ['aprun', '-B'] + + +@register_launcher('mpirun') +class MpirunLauncher(JobLauncher): + def command(self, job): + return ['mpirun', '-np', str(job.num_tasks)] + + +@register_launcher('mpiexec') +class MpiexecLauncher(JobLauncher): + def command(self, job): + return ['mpiexec', '-n', str(job.num_tasks)] + + +@register_launcher('srunalloc') +class SrunAllocationLauncher(JobLauncher): + def command(self, job): + ret = ['srun'] + if job.name: + ret += ['--job-name=%s' % job.name] + + if job.time_limit: + ret += ['--time=%d:%d:%d' % job.time_limit] + + if job.stdout: + ret += ['--output=%s' % job.stdout] + + if job.stderr: + ret += ['--error=%s' % job.stderr] + + if job.num_tasks: + ret += ['--ntasks=%s' % str(job.num_tasks)] + + if job.num_tasks_per_node: + ret += ['--ntasks-per-node=%s' % str(job.num_tasks_per_node)] + + if job.num_tasks_per_core: + ret += ['--ntasks-per-core=%s' % str(job.num_tasks_per_core)] + + if job.num_tasks_per_socket: + ret += ['--ntasks-per-socket=%s' % str(job.num_tasks_per_socket)] + + if job.num_cpus_per_task: + ret += ['--cpus-per-task=%s' % str(job.num_cpus_per_task)] + + if job.sched_partition: + ret += ['--partition=%s' % str(job.sched_partition)] + + if job.sched_exclusive_access: + ret += ['--exclusive'] + + if job.use_smt is not None: + hint = 'multithread' if job.use_smt else 'nomultithread' + ret += ['--hint=%s' % hint] + + if job.sched_partition: + ret += ['--partition=%s' % str(job.sched_partition)] + + if job.sched_account: + ret += ['--account=%s' % str(job.sched_account)] + + if job.sched_nodelist: + ret += ['--nodelist=%s' % str(job.sched_nodelist)] + + if job.sched_exclude_nodelist: + ret += ['--exclude=%s' % str(job.sched_exclude_nodelist)] + + return ret + diff --git a/reframe/core/launchers/registry.py b/reframe/core/launchers/registry.py new file mode 100644 index 0000000000..6bffca6003 --- /dev/null +++ b/reframe/core/launchers/registry.py @@ -0,0 +1,73 @@ +import reframe.core.fields as fields + + +# Name registry for job launchers +_LAUNCHERS = {} + + +def register_launcher(name, local=False): + """Class decorator for registering new job launchers. + + :arg name: The registration name of this launcher + :arg local: :class:`True` if launcher may only submit local jobs, + :class:`False` otherwise. + + .. note:: + .. versionadded:: 2.8 + + This method is only relevant to developers of new job launchers. + """ + def _register_launcher(cls): + if name in _LAUNCHERS: + raise ReframeError( + "a job launcher is already registered with name '%s'" % name) + + cls.is_local = fields.ConstantField(bool(local)) + cls.registered_name = fields.ConstantField(name) + _LAUNCHERS[name] = cls + return cls + + return _register_launcher + + +def getlauncher(name): + """Get launcher by its registered name. + + The available names are those specified in the + :doc:`configuration file `. + + This method may become handy in very special situations, e.g., testing an + application that needs to replace the system partition launcher or if a + different launcher must be used for a different programming environment. + + For example, if you want to replace the current partition's launcher with + the local one, here is how you can achieve it: + + :: + + def setup(self, partition, environ, **job_opts): + super().setup(partition, environ, **job_opts) + self.job.launcher = getlauncher('local')() + + + Note that this method returns a launcher class type and not an instance of + that class. + You have to instantiate it explicitly before assigning it to the + :attr:`launcher` attribute of the job. + + :arg name: The name of the launcher to retrieve. + :returns: The class of the launcher requested, which is a subclass of + :class:`reframe.core.launchers.JobLauncher`. + + .. note:: + .. versionadded:: 2.8 + """ + try: + return _LAUNCHERS[name] + except KeyError: + raise ConfigurationError("no such job launcher: '%s'" % name) + + +# Import the launchers modules to trigger their registration +import reframe.core.launchers.local +import reframe.core.launchers.mpi diff --git a/reframe/core/logging.py b/reframe/core/logging.py index 228692e514..66f239167f 100644 --- a/reframe/core/logging.py +++ b/reframe/core/logging.py @@ -158,8 +158,18 @@ def setLevel(self, level): def makeRecord(self, name, level, fn, lno, msg, args, exc_info, func=None, extra=None, sinfo=None): # Setup dynamic fields of the check - if self.check and self.check.job: - extra['check_jobid'] = self.check.job.jobid + if self.check: + testcase_name = self.check.name + if self.check.job: + extra['check_jobid'] = self.check.job.jobid + + if self.check.current_partition: + testcase_name += '@%s' % self.check.current_partition.fullname + + if self.check.current_environ: + testcase_name += ' using %s' % self.check.current_environ.name + + extra['testcase_name'] = testcase_name record = super().makeRecord(name, level, fn, lno, msg, args, exc_info, func, extra, sinfo) @@ -200,7 +210,8 @@ def __init__(self, logger=None, check=None): logger, { 'check_name': check.name if check else 'reframe', - 'check_jobid': '-1' + 'check_jobid': '-1', + 'testcase_name': check.name if check else 'reframe' } ) if self.logger: @@ -226,20 +237,52 @@ def verbose(self, message, *args, **kwargs): null_logger = LoggerAdapter() _logger = None -_frontend_logger = null_logger +_context_logger = null_logger + + +class _LoggingContext: + def __init__(self, check=None, level=DEBUG): + global _context_logger + + self._level = level + self._orig_logger = _context_logger + if check is not None: + _context_logger = LoggerAdapter(_logger, check) + + def __enter__(self): + return _context_logger + + def __exit__(self, exc_type, exc_value, traceback): + global _context_logger + + # Log any exceptions thrown with the current context logger + if exc_type is not None: + msg = 'caught {0}: {1}' + exc_fullname = '%s.%s' % (exc_type.__module__, exc_type.__name__) + getlogger().log(self._level, msg.format(exc_fullname, exc_value)) + + # Restore context logger + _context_logger = self._orig_logger + + # Propagate any exception thrown + return exc_type is None + + +def logging_context(check=None, exc_log_level=DEBUG): + return _LoggingContext(check, exc_log_level) def configure_logging(config): global _logger - global _frontend_logger + global _context_logger if config is None: _logger = None - _frontend_logger = null_logger + _context_logger = null_logger return _logger = load_from_dict(config) - _frontend_logger = LoggerAdapter(_logger) + _context_logger = LoggerAdapter(_logger) def save_log_files(dest): @@ -249,10 +292,5 @@ def save_log_files(dest): shutil.copy(hdlr.baseFilename, dest, follow_symlinks=True) -def getlogger(logger_kind, *args, **kwargs): - if logger_kind == 'frontend': - return _frontend_logger - elif logger_kind == 'check': - return LoggerAdapter(_logger, *args, **kwargs) - else: - raise ReframeError('unknown kind of logger: %s' % logger_kind) +def getlogger(): + return _context_logger diff --git a/reframe/core/modules.py b/reframe/core/modules.py index 20bad6d3b7..5cff298a13 100644 --- a/reframe/core/modules.py +++ b/reframe/core/modules.py @@ -2,32 +2,30 @@ # Utilities for manipulating the modules subsystem # +import abc import os -import subprocess import re -import reframe -import reframe.core.debug as debug import reframe.utility.os as os_ext +import subprocess -from reframe.core.exceptions import ModuleError +from reframe.core.exceptions import ModuleError, ReframeError class Module: """Module wrapper. - We basically need it for defining operators for use in standard Python - algorithms.""" + This class represents internally a module. Concrete module system + implementation should deal only with that. + """ def __init__(self, name): if not name: raise ModuleError('no module name specified') - name_parts = name.split('/') - self._name = name_parts[0] - if len(name_parts) > 1: - self._version = name_parts[1] - else: - self._version = None + try: + self._name, self._version = name.split('/', maxsplit=1) + except ValueError: + self._name, self._version = name, None @property def name(self): @@ -37,120 +35,334 @@ def name(self): def version(self): return self._version + @property + def fullname(self): + if self.version is not None: + return '/'.join((self.name, self.version)) + else: + return self.name + + def __hash__(self): + # Here we hash only over the name of the module, because foo/1.2 and + # simply foo compare equal. In case of hash conflicts (e.g., foo/1.2 + # and foo/1.3), the equality operator will resolve it. + return hash(self.name) + def __eq__(self, other): if not isinstance(other, type(self)): return NotImplemented - if self._version is None or other._version is None: - return self._name == other._name + if not self.version or not other.version: + return self.name == other.name + else: + return self.name == other.name and self.version == other.version + + def __repr__(self): + return '%s(%s)' % (type(self).__name__, self.fullname) + + def __str__(self): + return self.fullname + + +class ModulesSystem: + """Implements the frontend of the module systems.""" + + def __init__(self, backend): + self._backend = backend + + @property + def backend(self): + return(self._backend) + + def loaded_modules(self): + """Return a list of loaded modules. + + This method returns a list of strings. + """ + return [str(m) for m in self._backend.loaded_modules()] + + def conflicted_modules(self, name): + """Return the list of conflicted modules. + + This method returns a list of strings. + """ + return [str(m) for m in self._backend.conflicted_modules(Module(name))] + + def load_module(self, name, force=False): + """Load the module `name'. + + If ``force`` is set, forces the loading, + unloading first any conflicting modules currently loaded. + + Returns the list of unloaded modules as strings.""" + module = Module(name) + + loaded_modules = self._backend.loaded_modules() + if module in loaded_modules: + # Do not try to load the module if it is already present + return [] + + # Get the list of the modules that need to be unloaded + unload_list = set() + if force: + conflict_list = self._backend.conflicted_modules(module) + unload_list = set(loaded_modules) & set(conflict_list) + + for m in unload_list: + self._backend.unload_module(m) + + self._backend.load_module(module) + return [str(m) for m in unload_list] + + def unload_module(self, name): + """Unload module ``name``.""" + self._backend.unload_module(Module(name)) + + def is_module_loaded(self, name): + """Check presence of module ``name``.""" + return self._backend.is_module_loaded(Module(name)) + + @property + def name(self): + """Return the name of this module system.""" + return self._backend.name() + + @property + def version(self): + """Return the version of this module system.""" + return self._backend.version() + + def unload_all(self): + """Unload all loaded modules.""" + return self._backend.unload_all() + + @property + def searchpath(self): + """The module system search path as a list of directories.""" + return self._backend.searchpath() + + def searchpath_add(self, *dirs): + """Add ``dirs`` to the module system search path.""" + return self._backend.searchpath_add(*dirs) + + def searchpath_remove(self, *dirs): + """Remove ``dirs`` from the module system search path.""" + return self._backend.searchpath_remove(*dirs) + + def __str__(self): + return str(self._backend) + - return self._name == other._name and self._version == other._version +class ModulesSystemImpl(abc.ABC): + """Abstract base class for module systems.""" + + @abc.abstractmethod + def loaded_modules(self): + """Return a list of loaded modules. + + This method returns a list of Module instances. + """ + + @abc.abstractmethod + def conflicted_modules(self, module): + """Return the list of conflicted modules. + + This method returns a list of Module instances. + """ + + @abc.abstractmethod + def load_module(self, module): + """Load the module `name'. + + If ``force`` is set, forces the loading, + unloading first any conflicting modules currently loaded. + + Returns the unloaded modules as a list of module instances.""" + + @abc.abstractmethod + def unload_module(self, module): + """Unload module ``module``.""" + + @abc.abstractmethod + def is_module_loaded(self, module): + """Check presence of module ``module``.""" + + @abc.abstractmethod + def name(self): + """Return the name of this module system.""" + + @abc.abstractmethod + def version(self): + """Return the version of this module system.""" + + @abc.abstractmethod + def unload_all(self): + """Unload all loaded modules.""" + + @abc.abstractmethod + def searchpath(self): + """The module system search path as a list of directories.""" + + @abc.abstractmethod + def searchpath_add(self, *dirs): + """Add ``dirs`` to the module system search path.""" + + @abc.abstractmethod + def searchpath_remove(self, *dirs): + """Remove ``dirs`` from the module system search path.""" def __repr__(self): - return debug.repr(self) + return type(self).__name__ + '()' def __str__(self): - if self._version: - return '%s/%s' % (self._name, self._version) - else: - return self._name + return self.name() + ' ' + self.version() -def module_equal(rhs, lhs): - return Module(rhs) == Module(lhs) +class TModImpl(ModulesSystemImpl): + """Module system for TMod (Tcl).""" + def __init__(self): + # Try to figure out if we are indeed using the TCL version + try: + completed = os_ext.run_command('modulecmd -V') + except OSError as e: + raise ReframeError( + 'could not find a sane Tmod installation: %s' % e) -def module_list(): - try: - # LOADEDMODULES may be defined but empty - return [m for m in os.environ['LOADEDMODULES'].split(':') if m] - except KeyError: - return [] + version_match = re.search(r'^VERSION=(\S+)', completed.stdout, + re.MULTILINE) + tcl_version_match = re.search(r'^TCL_VERSION=(\S+)', completed.stdout, + re.MULTILINE) + + if version_match is None or tcl_version_match is None: + raise ReframeError('could not find a sane Tmod installation') + + self._version = version_match.group(1) + self._command = 'modulecmd python' + try: + # Try the Python bindings now + completed = os_ext.run_command(self._command) + except OSError as e: + raise ReframeError( + 'could not get the Python bindings for Tmod: ' % e) + + if re.search(r'Unknown shell type', completed.stderr): + raise ReframeError( + 'Python is not supported by this Tmod installation') + def name(self): + return 'tmod' + + def version(self): + return self._version -def module_conflict_list(name): - """Return the list of conflicted packages""" - conflict_list = [] - completed = os_ext.run_command( - cmd='%s show %s' % (reframe.MODULECMD_PYTHON, name)) + def _run_module_command(self, *args): + command = [self._command, *args] + return os_ext.run_command(' '.join(command)) - # Search for lines starting with 'conflict' - for line in completed.stderr.split('\n'): - match = re.search('^conflict\s+(?P\S+)', line) - if match: - conflict_list.append(match.group('module_name')) + def _exec_module_command(self, *args): + completed = self._run_module_command(*args) + exec(completed.stdout) - return conflict_list + def loaded_modules(self): + try: + # LOADEDMODULES may be defined but empty + return [Module(m) + for m in os.environ['LOADEDMODULES'].split(':') if m] + except KeyError: + return [] + def conflicted_modules(self, module): + conflict_list = [] + completed = self._run_module_command('show', str(module)) + return [Module(m.group(1)) + for m in re.finditer(r'^conflict\s+(\S+)', + completed.stderr, re.MULTILINE)] -def module_present(name): - for m in module_list(): - if module_equal(m, name): - return True + def is_module_loaded(self, module): + return module in self.loaded_modules() - return False + def load_module(self, module): + self._exec_module_command('load', str(module)) + if not self.is_module_loaded(module): + raise ModuleError('could not load module %s' % module) + def unload_module(self, module): + self._exec_module_command('unload', str(module)) + if self.is_module_loaded(module): + raise ModuleError('could not unload module %s' % module) -def module_load(name): - completed = os_ext.run_command( - cmd='%s load %s' % (reframe.MODULECMD_PYTHON, name)) - exec(completed.stdout) + def unload_all(self): + self._exec_module_command('purge') - if not module_present(name): - raise ModuleError('Could not load module %s' % name) + def searchpath(self): + return os.environ['MODULEPATH'].split(':') + def searchpath_add(self, *dirs): + self._exec_module_command('use', *dirs) -def module_force_load(name): - """Forces the loading of package `name', unloading first any conflicting - currently loaded modules. + def searchpath_remove(self, *dirs): + self._exec_module_command('unuse', *dirs) - Returns the a list of unloaded packages - """ - # Do not try to load the module if it is already present - if module_present(name): + +class NoModImpl(ModulesSystemImpl): + """A convenience class that implements a no-op a modules system.""" + + def loaded_modules(self): + return [] + + def conflicted_modules(self, module): return [] - # Discard the version information of the loaded modules - loaded_modules = set([m.split('/')[0] for m in module_list()]) - conflict_list = set(module_conflict_list(name)) - unload_list = loaded_modules & conflict_list - for m in unload_list: - module_unload(m) + def load_module(self, module): + pass + + def unload_module(self, module): + pass + + def is_module_loaded(self, module): + # + # Always return `True`, since this pseudo modules system effectively + # assumes that everything needed is loaded. + # + return True - module_load(name) - return list(unload_list) + def name(self): + return 'nomod' + def version(self): + return '1.0' -def module_unload(name): - completed = os_ext.run_command( - cmd='%s unload %s' % (reframe.MODULECMD_PYTHON, name)) - exec(completed.stdout) + def unload_all(self): + pass - if module_present(name): - raise ModuleError('Could not unload module %s' % name) + def searchpath(self): + return [] + def searchpath_add(self, *dirs): + pass -def module_purge(): - completed = os_ext.run_command( - cmd='%s purge' % reframe.MODULECMD_PYTHON) - exec(completed.stdout) + def searchpath_remove(self, *dirs): + pass -def module_path_add(dirs): - """ - Adds list of dirs to module path - """ - args = ' '.join(dirs) - completed = os_ext.run_command( - cmd='%s use %s' % (reframe.MODULECMD_PYTHON, args)) - exec(completed.stdout) +# The module system used by the framework +_modules_system = None -def module_path_remove(dirs): - """ - Removes list of dirs from module path - """ - args = ' '.join(dirs) - completed = os_ext.run_command( - cmd='%s unuse %s' % (reframe.MODULECMD_PYTHON, args)) - exec(completed.stdout) +def init_modules_system(modules_kind=None): + global _modules_system + + if modules_kind is None: + _modules_system = ModulesSystem(NoModImpl()) + elif modules_kind == 'tmod': + _modules_system = ModulesSystem(TModImpl()) + else: + raise ReframeError('unknown module system') + + +def get_modules_system(): + if _modules_system is None: + raise ReframeError('no modules system is configured') + + return _modules_system diff --git a/reframe/core/pipeline.py b/reframe/core/pipeline.py index 7eca462a27..d10c123799 100644 --- a/reframe/core/pipeline.py +++ b/reframe/core/pipeline.py @@ -5,20 +5,21 @@ import copy import glob import os +import re import shutil import reframe.core.debug as debug +import reframe.core.fields as fields import reframe.core.logging as logging import reframe.settings as settings import reframe.utility.os as os_ext from reframe.core.deferrable import deferrable, _DeferredExpression, evaluate from reframe.core.environments import Environment -from reframe.core.exceptions import ReframeFatalError, SanityError -from reframe.core.fields import * -from reframe.core.launchers import * -from reframe.core.logging import getlogger, LoggerAdapter, null_logger -from reframe.core.schedulers import * +from reframe.core.exceptions import ReframeError, SanityError +from reframe.core.schedulers import Job +from reframe.core.schedulers.registry import getscheduler +from reframe.core.launchers.registry import getlauncher from reframe.core.shell import BashScriptBuilder from reframe.core.systems import System, SystemPartition from reframe.frontend.resources import ResourcesManager @@ -148,26 +149,26 @@ def __init__(self, my_test_args, **kwargs): #: The name of the test. #: #: :type: Alphanumeric string. - name = AlphanumericField('name') + name = fields.AlphanumericField('name') #: List of programming environmets supported by this test. #: #: :type: :class:`list` of :class:`str` #: :default: ``[]`` - valid_prog_environs = TypedListField('valid_prog_environs', str) + valid_prog_environs = fields.TypedListField('valid_prog_environs', str) #: List of systems supported by this test. #: The general syntax for systems is ``[:`) or :class:`None` #: :default: :class:`None` - sanity_patterns = AnyField( - 'sanity_patterns', [(TypedField, _DeferredExpression), - (SanityPatternField,)], allow_none=True + sanity_patterns = fields.AnyField( + 'sanity_patterns', [(fields.TypedField, _DeferredExpression), + (fields.SanityPatternField,)], allow_none=True ) # FIXME: Here we first check for the new syntax. The other way around @@ -384,9 +389,9 @@ def __init__(self, my_test_args, **kwargs): #: `) as values. #: :class:`None` is also allowed. #: :default: :class:`None` - perf_patterns = AnyField( - 'perf_patterns', [(TypedDictField, str, _DeferredExpression), - (SanityPatternField,)], allow_none=True + perf_patterns = fields.AnyField( + 'perf_patterns', [(fields.TypedDictField, str, _DeferredExpression), + (fields.SanityPatternField,)], allow_none=True ) #: List of modules to be loaded before running this test. @@ -395,7 +400,7 @@ def __init__(self, my_test_args, **kwargs): #: #: :type: :class:`list` of :class:`str` #: :default: ``[]`` - modules = TypedListField('modules', str) + modules = fields.TypedListField('modules', str) #: Environment variables to be set before running this test. #: @@ -403,7 +408,7 @@ def __init__(self, my_test_args, **kwargs): #: #: :type: dictionary with :class:`str` keys/values #: :default: ``{}`` - variables = TypedDictField('variables', str, str) + variables = fields.TypedDictField('variables', str, str) #: Time limit for this test. #: @@ -412,24 +417,63 @@ def __init__(self, my_test_args, **kwargs): #: #: :type: a three-tuple with the above properties. #: :default: ``(0, 10, 0)`` - time_limit = TimerField('time_limit') + time_limit = fields.TimerField('time_limit') - resources = TypedField('resouces', ResourcesManager) + #: Extra resources for this test. + #: + #: This field is for specifying custom resources needed by this test. + #: These resources are defined in the :doc:`configuration ` + #: of a system partition. + #: For example, assume that ``num_accels`` is defined as follows in the + #: configuration file: + #: + #: :: + #: + #: 'resources': { + #: 'num_accels': [ + #: '--gres=gpu:{num_accels}' + #: ] + #: } + #: + #: A regression test then may define :attr:`extra_resources` as follows in + #: order to get two accelerator devices: + #: + #: :: + #: + #: self.extra_resources = {'num_accels': 2} + #: + #: The framework will then pass the option ``--gres=gpu:2`` to the backend + #: scheduler. + #: + #: If the resource name specified in this variable does not match a resource + #: name in the partition configuration, it will be simply ignored. + #: The :attr:`num_gpus_per_node` attribute translates internally to the + #: ``num_gpus_per_node`` resource, so that setting ``self.num_gpus_per_node = + #: 2`` is equivalent to the following: + #: + #: :: + #: + #: self.extra_resources = {'num_gpus_per_node': 2} + #: + #: :type: dictionary with :class:`str` keys/values + #: :default: ``{}`` + #: + #: .. note:: + #: .. versionadded:: 2.8 + extra_resources = fields. TypedDictField('extra_resources', str, str) # Private properties - _prefix = StringField('_prefix') - _stagedir = StringField('_stagedir', allow_none=True) - _stdout = StringField('_stdout', allow_none=True) - _stderr = StringField('_stderr', allow_none=True) - _logger = TypedField('_logger', LoggerAdapter) - _perf_logfile = StringField('_perf_logfile', allow_none=True) - _current_system = TypedField('_current_system', System) - _current_partition = TypedField('_current_partition', SystemPartition, - allow_none=True) - _current_environ = TypedField('_current_environ', Environment, - allow_none=True) - _job = TypedField('_job', Job, allow_none=True) - _job_resources = TypedDictField('_job_resources', str, str) + _prefix = fields.StringField('_prefix') + _stagedir = fields.StringField('_stagedir', allow_none=True) + _stdout = fields.StringField('_stdout', allow_none=True) + _stderr = fields.StringField('_stderr', allow_none=True) + _perf_logfile = fields.StringField('_perf_logfile', allow_none=True) + _current_system = fields.TypedField('_current_system', System) + _current_partition = fields.TypedField('_current_partition', + SystemPartition, allow_none=True) + _current_environ = fields.TypedField('_current_environ', Environment, + allow_none=True) + _job = fields.TypedField('_job', Job, allow_none=True) def __init__(self, name, prefix, system, resources): self.name = name @@ -489,23 +533,19 @@ def __init__(self, name, prefix, system, resources): # Associated job self._job = None - self._job_resources = {} - self._launcher_type = None + self.extra_resources = {} # Dynamic paths of the regression check; will be set in setup() - self._resources = resources - self._stagedir = None - self._stdout = None - self._stderr = None + self._resources_mgr = resources + self._stagedir = None + self._stdout = None + self._stderr = None # Compilation task output self._compile_task = None - # Check-specific logging - self._logger = null_logger - # Performance logging - self._perf_logger = null_logger + self._perf_logger = logging.null_logger self._perf_logfile = None # Export read-only views to interesting fields @@ -556,7 +596,7 @@ def logger(self): You can use this logger to log information for your test. """ - return self._logger + return logging.getlogger() @property def prefix(self): @@ -631,12 +671,12 @@ def is_local(self): """Check if the test will execute locally. A test executes locally if the :attr:`local` attribute is set or if the - current partition's scheduler is the ``local`` one. + current partition's scheduler does not support job submission. """ if self._current_partition is None: return self.local - return self.local or self._current_partition.scheduler == 'local' + return self.local or self._current_partition.scheduler.is_local def _sanitize_basename(self, name): """Create a basename safe to be used as path component @@ -647,7 +687,6 @@ def _sanitize_basename(self, name): def _setup_environ(self, environ): """Setup the current environment and load it.""" - self._logger.debug('setting up the environment') self._current_environ = environ # Add user modules and variables to the environment @@ -658,24 +697,22 @@ def _setup_environ(self, environ): self._current_environ.set_variable(k, v) # First load the local environment of the partition - self._logger.debug('loading environment for partition %s' % - self._current_partition.fullname) + self.logger.debug('loading environment for the current partition') self._current_partition.local_env.load() - self._logger.debug('loading environment %s' % - self._current_environ.name) + self.logger.debug("loading test's environment") self._current_environ.load() def _setup_paths(self): """Setup the check's dynamic paths.""" - self._logger.debug('setting up paths') + self.logger.debug('setting up paths') - self._stagedir = self._resources.stagedir( + self._stagedir = self._resources_mgr.stagedir( self._sanitize_basename(self._current_partition.name), self.name, self._sanitize_basename(self._current_environ.name) ) - self.outputdir = self._resources.outputdir( + self.outputdir = self._resources_mgr.outputdir( self._sanitize_basename(self._current_partition.name), self.name, self._sanitize_basename(self._current_environ.name) @@ -686,28 +723,24 @@ def _setup_paths(self): def _setup_job(self, **job_opts): """Setup the job related to this check.""" - self._logger.debug('setting up the job descriptor') - self._logger.debug( - 'job scheduler backend: %s' % - ('local' if self.is_local() else self._current_partition.scheduler)) + self.logger.debug('setting up the job descriptor') + + msg = 'job scheduler backend: {0}' + self.logger.debug( + msg.format('local' if self.is_local else + self._current_partition.scheduler.registered_name)) # num_gpus_per_node is a managed resource if self.num_gpus_per_node > 0: - self._job_resources.setdefault('num_gpus_per_node', - str(self.num_gpus_per_node)) - - # If check is local, use the LocalLauncher, otherwise try to infer the - # launcher from the system info - if self.is_local(): - self._launcher_type = LocalLauncher - elif self._current_partition.scheduler == 'nativeslurm': - self._launcher_type = NativeSlurmLauncher - elif self._current_partition.scheduler == 'slurm+alps': - self._launcher_type = AlpsLauncher + self.extra_resources.setdefault('num_gpus_per_node', + self.num_gpus_per_node) + + if self.local: + scheduler_type = getscheduler('local') + launcher_type = getlauncher('local') else: - # Oops - raise ReframeFatalError('Oops: unsupported launcher: %s' % - self._current_partition.scheduler) + scheduler_type = self._current_partition.scheduler + launcher_type = self._current_partition.launcher job_name = '%s_%s_%s_%s' % ( self.name, @@ -717,57 +750,45 @@ def _setup_job(self, **job_opts): ) job_script_filename = os.path.join(self._stagedir, job_name + '.sh') - if self.is_local(): - self._job = LocalJob( - job_name=job_name, - job_environ_list=[ - self._current_partition.local_env, - self._current_environ - ], - job_script_builder=BashScriptBuilder(), - script_filename=job_script_filename, - stdout=self._stdout, - stderr=self._stderr, - time_limit=self.time_limit, - **job_opts) - else: - self._job = SlurmJob( - job_name=job_name, - job_environ_list=[ - self._current_partition.local_env, - self._current_environ - ], - job_script_builder=BashScriptBuilder(login=True), - script_filename=job_script_filename, - num_tasks=self.num_tasks, - num_tasks_per_node=self.num_tasks_per_node, - num_cpus_per_task=self.num_cpus_per_task, - num_tasks_per_core=self.num_tasks_per_core, - num_tasks_per_socket=self.num_tasks_per_socket, - use_smt=self.use_multithreading, - exclusive_access=self.exclusive_access, - launcher_type=self._launcher_type, - stdout=self._stdout, - stderr=self._stderr, - time_limit=self.time_limit, - **job_opts) - - # Get job options from managed resources and prepend them to - # job_opts. We want any user supplied options to be able to - # override those set by the framework. - resources_opts = [] - for r, v in self._job_resources.items(): - resources_opts.extend( - self._current_partition.get_resource(r, v)) - - self._job.options = (self._current_partition.access + - resources_opts + self._job.options) + self._job = scheduler_type( + name=job_name, + command=' '.join([self.executable] + self.executable_opts), + launcher=launcher_type(), + environs=[ + self._current_partition.local_env, + self._current_environ + ], + workdir=self._stagedir, + num_tasks=self.num_tasks, + num_tasks_per_node=self.num_tasks_per_node, + num_tasks_per_core=self.num_tasks_per_core, + num_tasks_per_socket=self.num_tasks_per_socket, + num_cpus_per_task=self.num_cpus_per_task, + use_smt=self.use_multithreading, + time_limit=self.time_limit, + script_filename=job_script_filename, + stdout=self._stdout, + stderr=self._stderr, + sched_exclusive_access=self.exclusive_access, + **job_opts + ) + + # Get job options from managed resources and prepend them to + # job_opts. We want any user supplied options to be able to + # override those set by the framework. + resources_opts = [] + for r, v in self.extra_resources.items(): + resources_opts.extend( + self._current_partition.get_resource(r, v)) + + self._job.options = (self._current_partition.access + + resources_opts + self._job.options) # FIXME: This is a temporary solution to address issue #157 def _setup_perf_logging(self): - self._logger.debug('setting up performance logging') + self.logger.debug('setting up performance logging') self._perf_logfile = os.path.join( - self._resources.logdir(self._current_partition.name), + self._resources_mgr.logdir(self._current_partition.name), self.name + '.log' ) @@ -776,14 +797,14 @@ def _setup_perf_logging(self): 'handlers': { self._perf_logfile: { 'level': 'DEBUG', - 'format': '[%(asctime)s] %(check_name)s ' + 'format': '[%(asctime)s] %(testcase_name)s ' '(jobid=%(check_jobid)s): %(message)s', 'append': True, } } } - self._perf_logger = LoggerAdapter( + self._perf_logger = logging.LoggerAdapter( logger=logging.load_from_dict(perf_logging_config), check=self ) @@ -798,10 +819,6 @@ def setup(self, partition, environ, **job_opts): ``job_opts`` to the base class method. :raises reframe.core.exceptions.ReframeError: In case of errors. """ - # Logging prevents deep copy, so we initialize the check's logger late - # during the check's setup phase - self._logger = getlogger('check', check=self) - self._current_partition = partition self._setup_environ(environ) self._setup_paths() @@ -810,19 +827,19 @@ def setup(self, partition, environ, **job_opts): self._setup_perf_logging() def _copy_to_stagedir(self, path): - self._logger.debug('copying %s to stage directory (%s)' % - (path, self._stagedir)) - self._logger.debug('symlinking files: %s' % self.readonly_files) + self.logger.debug('copying %s to stage directory (%s)' % + (path, self._stagedir)) + self.logger.debug('symlinking files: %s' % self.readonly_files) os_ext.copytree_virtual(path, self._stagedir, self.readonly_files) def prebuild(self): for cmd in self.prebuild_cmd: - self._logger.debug('executing prebuild command: %s' % cmd) + self.logger.debug('executing prebuild commands') os_ext.run_command(cmd, check=True) def postbuild(self): for cmd in self.postbuild_cmd: - self._logger.debug('executing postbuild command: %s' % cmd) + self.logger.debug('executing postbuild commands') os_ext.run_command(cmd, check=True) def compile(self, **compile_opts): @@ -863,20 +880,19 @@ def compile(self, **compile_opts): os.chdir(self._stagedir) try: self.prebuild() - self._logger.debug('compilation started') self._compile_task = self._current_environ.compile( sourcepath=target_sourcepath, executable=os.path.join(self._stagedir, self.executable), **compile_opts) - self._logger.debug('compilation stdout:\n%s' % - self._compile_task.stdout) - self._logger.debug('compilation stderr:\n%s' % - self._compile_task.stderr) + self.logger.debug('compilation stdout:\n%s' % + self._compile_task.stdout) + self.logger.debug('compilation stderr:\n%s' % + self._compile_task.stderr) self.postbuild() finally: # Always restore working directory os.chdir(wd_save) - self._logger.debug('compilation finished') + self.logger.debug('compilation finished') def run(self): """The run phase of the regression test pipeline. @@ -887,13 +903,12 @@ def run(self): if not self._current_system or not self._current_partition: raise ReframeError('no system or system partition is set') - self._job.submit(cmd='%s %s' % - (self.executable, ' '.join(self.executable_opts)), - workdir=self._stagedir) + self._job.prepare(BashScriptBuilder(login=True)) + self._job.submit() msg = ('spawned job (%s=%s)' % ('pid' if self.is_local() else 'jobid', self._job.jobid)) - self._logger.debug(msg) + self.logger.debug(msg) def poll(self): """Poll the test's state. @@ -916,7 +931,7 @@ def wait(self): :raises reframe.core.exceptions.ReframeError: In case of errors. """ self._job.wait() - self._logger.debug('spawned job finished') + self.logger.debug('spawned job finished') def check_sanity(self): """The sanity checking phase of the regression test pipeline. @@ -991,7 +1006,7 @@ def _check_performance_new(self): def _copy_to_outputdir(self): """Copy checks interesting files to the output directory.""" - self._logger.debug('copying interesting files to output directory') + self.logger.debug('copying interesting files to output directory') shutil.copy(self._stdout, self.outputdir) shutil.copy(self._stderr, self.outputdir) if self._job: @@ -1013,17 +1028,17 @@ def cleanup(self, remove_files=False, unload_env=True): """ aliased = os.path.samefile(self._stagedir, self.outputdir) if aliased: - self._logger.debug('skipping copy to output dir ' - 'since they alias each other') + self.logger.debug('skipping copy to output dir ' + 'since they alias each other') else: self._copy_to_outputdir() if remove_files: - self._logger.debug('removing stage directory') + self.logger.debug('removing stage directory') shutil.rmtree(self._stagedir) if unload_env: - self._logger.debug("unloading test's environment") + self.logger.debug("unloading test's environment") self._current_environ.unload() self._current_partition.local_env.unload() @@ -1127,7 +1142,7 @@ def _match_patterns(self, multi_patterns, reference, scan_info): # Restore the handler patterns['\e'] = eof_handler - self._logger.debug('output scan info:\n' + scan_info.scan_report()) + self.logger.debug('output scan info:\n' + scan_info.scan_report()) return ret def __str__(self): diff --git a/reframe/core/schedulers.py b/reframe/core/schedulers.py deleted file mode 100644 index 67905ee30c..0000000000 --- a/reframe/core/schedulers.py +++ /dev/null @@ -1,693 +0,0 @@ -# -# Scheduler implementations -# - -import abc -import itertools -import os -import re -import numbers -import signal -import stat -import subprocess -import time - -import reframe.core.debug as debug -import reframe.utility.os as os_ext - -from datetime import datetime -from reframe.core.exceptions import (ReframeError, - JobSubmissionError, - JobResourcesError) -from reframe.core.fields import TypedField, TypedListField -from reframe.core.launchers import LocalLauncher -from reframe.settings import settings - - -class _TimeoutExpired(ReframeError): - pass - - -class Job(abc.ABC): - """A job descriptor. - - Users may not create jobs directly.""" - - #: Options to be passed to the backend job scheduler. - #: - #: :type: :class:`list` of :class:`str` - #: :default: ``[]`` - options = TypedListField('options', str) - - #: List of shell commands to execute before launching this job. - #: - #: These commands do not execute in the context of ReFrame. - #: Instead, they are emitted in the generated job script just before the - #: actual job launch command. - #: - #: :type: :class:`list` of :class:`str` - #: :default: ``[]`` - pre_run = TypedListField('pre_run', str) - - #: List of shell commands to execute after launching this job. - #: - #: See :attr:`pre_run` for a more detailed description of the semantics. - #: - #: :type: :class:`list` of :class:`str` - #: :default: ``[]`` - post_run = TypedListField('post_run', str) - - # FIXME: This is not very meaningful, but allows a meaningful - # documentation. - # - #: The job launcher used to launch this job. - #: - #: :type: :class:`reframe.core.launchers.JobLauncher` - launcher = TypedField('launcher', object) - - def __init__(self, - job_name, - job_environ_list, - job_script_builder, - launcher_type, - num_tasks, - time_limit=(0, 10, 0), - script_filename=None, - stdout=None, - stderr=None, - sched_options=[], - launcher_options=[], - **kwargs): - # Mutable fields - self.options = list(sched_options) - - self.launcher = launcher_type(self, launcher_options) - - # Commands to be run before and after the job is launched - self.pre_run = [] - self.post_run = [] - - self._name = job_name - self._environs = list(job_environ_list) or [] - self._script_builder = job_script_builder - self._num_tasks = num_tasks - self._script_filename = script_filename or '%s.sh' % self._name - self._stdout = stdout or '%s.out' % self._name - self._stderr = stderr or '%s.err' % self._name - self._time_limit = time_limit - - # Live job information; to be filled during job's lifetime - self._jobid = -1 - self._state = None - self._exitcode = None - - def __repr__(self): - return debug.repr(self) - - # Read-only properties - @property - def exitcode(self): - return self._exitcode - - @property - def jobid(self): - return self._jobid - - @property - def name(self): - return self._name - - @property - def num_tasks(self): - return self._num_tasks - - @property - def script_filename(self): - return self._script_filename - - @property - def state(self): - return self._state - - @property - def stdout(self): - return self._stdout - - @property - def stderr(self): - return self._stderr - - @property - def time_limit(self): - return self._time_limit - - def emit_preamble(self, builder): - for e in self._environs: - e.emit_load_instructions(self._script_builder) - - for stmt in self.pre_run: - builder.verbatim(stmt) - - def emit_postamble(self, builder): - for stmt in self.post_run: - builder.verbatim(stmt) - - @abc.abstractmethod - def _submit(self, script): - """Submit a script file for execution. - - Keyword arguments: - script -- the name of a script file to be submitted - """ - - # Wait for the job to finish. - @abc.abstractmethod - def wait(self): - pass - - # Return `True` if job has finished. - @abc.abstractmethod - def finished(self): - pass - - # Cancel the job. - @abc.abstractmethod - def cancel(self): - pass - - def submit(self, cmd, workdir='.'): - # Build the submission script and submit it - self.emit_preamble(self._script_builder) - self._script_builder.verbatim('cd %s' % workdir) - self.launcher.emit_run_command(cmd, self._script_builder) - self.emit_postamble(self._script_builder) - - script_file = open(self._script_filename, 'w+') - script_file.write(self._script_builder.finalise()) - script_file.close() - self._submit(script_file) - - -class JobState: - def __init__(self, state): - self._state = state - - def __repr__(self): - return debug.repr(self) - - def __eq__(self, other): - if not isinstance(other, type(self)): - return NotImplemented - - return self._state == other._state - - def __str__(self): - return self._state - - -class JobResources: - """Managed job resources. - - Custom resources usually configured per system by the system - administrators. - """ - - def __init__(self, resources): - self._resources = resources - - def __repr__(self): - return debug.repr(self) - - def get(self, name, **kwargs): - """Get resource option string for the resource ``name``.""" - try: - return self._resources.format(**kwargs) - except KeyError: - return None - - def getall(self, resources_spec): - """Get all resource option strings for resources in ``resource_spec``.""" - ret = [] - for opt, kwargs in resources_spec.items(): - opt_str = self.get(opt, **kwargs) - if opt_str: - ret.append(opt_str) - - return ret - - -# Local job states -class LocalJobState(JobState): - pass - - -LOCAL_JOB_SUCCESS = LocalJobState('SUCCESS') -LOCAL_JOB_FAILURE = LocalJobState('FAILURE') -LOCAL_JOB_TIMEOUT = LocalJobState('TIMEOUT') - - -class LocalJob(Job): - def __init__(self, - time_limit=(0, 10, 0), - **kwargs): - super().__init__(num_tasks=1, - launcher_type=LocalLauncher, - **kwargs) - # Launched process - self.cancel_grace_period = 2 - self._wait_poll_secs = 0.1 - self._proc = None - - def _submit(self, script): - # `chmod +x' first, because we will execute the script locally - os.chmod(script.name, os.stat(script.name).st_mode | stat.S_IEXEC) - - # Run from the absolute path - self._f_stdout = open(self._stdout, 'w+') - self._f_stderr = open(self._stderr, 'w+') - - # The new process starts also a new session (session leader), so that - # we can later kill any other processes that this might spawn by just - # killing this one. - self._proc = os_ext.run_command_async(os.path.abspath(script.name), - stdout=self._f_stdout, - stderr=self._f_stderr, - start_new_session=True) - # Update job info - self._jobid = self._proc.pid - - def _kill_all(self): - """Send SIGKILL to all the processes of the spawned job.""" - try: - os.killpg(self._jobid, signal.SIGKILL) - except (ProcessLookupError, PermissionError): - # The process group may already be dead or assigned to a different - # group, so ignore this error - pass - - def _term_all(self): - """Send SIGTERM to all the processes of the spawned job.""" - os.killpg(self._jobid, signal.SIGTERM) - - def _wait_all(self, timeout=0): - """Wait for all the processes of spawned job to finish. - - Keyword arguments: - - timeout -- Timeout period for this wait call in seconds (may be a real - number, too). If `None` or `0`, no timeout will be set. - """ - t_wait = datetime.now() - self._proc.wait(timeout=timeout or None) - t_wait = datetime.now() - t_wait - try: - # Wait for all processes in the process group to finish - while not timeout or t_wait.total_seconds() < timeout: - t_poll = datetime.now() - os.killpg(self._jobid, 0) - time.sleep(self._wait_poll_secs) - t_poll = datetime.now() - t_poll - t_wait += t_poll - - # Final check - os.killpg(self._jobid, 0) - raise _TimeoutExpired - except (ProcessLookupError, PermissionError): - # Ignore also EPERM errors in case this process id is assigned - # elsewhere and we cannot query its status - return - - def cancel(self): - """Cancel the current job. - - The SIGTERM signal will be sent first to all the processes of this job - and after a grace period (default 2s) the SIGKILL signal will be send. - - This function waits for the spawned process tree to finish. - """ - if self._jobid == -1: - return - - self._term_all() - - # Set the time limit to the grace period and let wait() do the final - # killing - self._time_limit = (0, 0, self.cancel_grace_period) - self.wait() - - def wait(self, timeout=None): - """Wait for the spawned job to finish. - - As soon as the parent job process finishes, all of its spawned - subprocesses will be forced to finish, too. - - Upon return, the whole process tree of the spawned job process will be - cleared, unless any of them has called `setsid()`. - - Keyword arguments: - - timeout -- Timeout period for this wait call in seconds. If `None` the - default time limit will be used. - """ - if self._state is not None: - # Job has been already waited for - return - - if timeout is None: - # Convert time_limit to seconds - h, m, s = self._time_limit - timeout = h * 3600 + m * 60 + s - - try: - self._wait_all(timeout=timeout) - self._exitcode = self._proc.returncode - if self._exitcode != 0: - self._state = LOCAL_JOB_FAILURE - else: - self._state = LOCAL_JOB_SUCCESS - except (_TimeoutExpired, subprocess.TimeoutExpired): - self._state = LOCAL_JOB_TIMEOUT - finally: - # Cleanup all the processes of this job - self._kill_all() - self._wait_all() - self._f_stdout.close() - self._f_stderr.close() - - def finished(self): - """Check if the spawned process has finished. - - This function does not wait the process. It just queries its state. If - the process has finished, you *must* call wait() to properly cleanup - after it. - """ - self._proc.poll() - - if self._proc.returncode is None: - return False - - return True - - -class SlurmJobState(JobState): - def __init__(self, state): - super().__init__(state) - - -# Slurm Job states -SLURM_JOB_BOOT_FAIL = SlurmJobState('BOOT_FAIL') -SLURM_JOB_CANCELLED = SlurmJobState('CANCELLED') -SLURM_JOB_COMPLETED = SlurmJobState('COMPLETED') -SLURM_JOB_CONFIGURING = SlurmJobState('CONFIGURING') -SLURM_JOB_COMPLETING = SlurmJobState('COMPLETING') -SLURM_JOB_FAILED = SlurmJobState('FAILED') -SLURM_JOB_NODE_FAILED = SlurmJobState('NODE_FAILED') -SLURM_JOB_PENDING = SlurmJobState('PENDING') -SLURM_JOB_PREEMPTED = SlurmJobState('PREEMPTED') -SLURM_JOB_RESIZING = SlurmJobState('RESIZING') -SLURM_JOB_RUNNING = SlurmJobState('RUNNING') -SLURM_JOB_SUSPENDED = SlurmJobState('SUSPENDED') -SLURM_JOB_TIMEOUT = SlurmJobState('TIMEOUT') - - -class SlurmJob(Job): - def __init__(self, - use_smt=None, - sched_nodelist=None, - sched_exclude=None, - sched_partition=None, - sched_reservation=None, - sched_account=None, - num_tasks_per_node=None, - num_cpus_per_task=None, - num_tasks_per_core=None, - num_tasks_per_socket=None, - exclusive_access=False, - **kwargs): - super().__init__(**kwargs) - self._partition = sched_partition - self._use_smt = use_smt - self._exclusive_access = exclusive_access - self._nodelist = sched_nodelist - self._exclude = sched_exclude - self._reservation = sched_reservation - self._account = sched_account - self._prefix = '#SBATCH' - self._signal = None - self._job_init_poll_num_tries = 0 - - self._num_tasks_per_node = num_tasks_per_node - self._num_cpus_per_task = num_cpus_per_task - self._num_tasks_per_core = num_tasks_per_core - self._num_tasks_per_socket = num_tasks_per_socket - self._completion_states = [SLURM_JOB_BOOT_FAIL, - SLURM_JOB_CANCELLED, - SLURM_JOB_COMPLETED, - SLURM_JOB_FAILED, - SLURM_JOB_NODE_FAILED, - SLURM_JOB_PREEMPTED, - SLURM_JOB_TIMEOUT] - self._pending_states = [SLURM_JOB_CONFIGURING, - SLURM_JOB_PENDING] - # Reasons to cancel a pending job: if the job is expected to remain - # pending for a much longer time then usual (mostly if a sysadmin - # intervention is required) - self._cancel_reasons = ['FrontEndDown', - 'Licenses', # May require sysadmin - 'NodeDown', - 'PartitionDown', - 'PartitionInactive', - 'PartitionNodeLimit', - 'QOSJobLimit', - 'QOSResourceLimit', - 'ReqNodeNotAvail', # Inaccurate SLURM doc - 'QOSUsageThreshold'] - self._is_cancelling = False - - @property - def account(self): - return self._account - - @property - def exclude_list(self): - return self._exclude - - @property - def exclusive_access(self): - return self._exclusive_access - - @property - def nodelist(self): - return self._nodelist - - @property - def num_cpus_per_task(self): - return self._num_cpus_per_task - - @property - def num_tasks_per_core(self): - return self._num_tasks_per_core - - @property - def num_tasks_per_node(self): - return self._num_tasks_per_node - - @property - def num_tasks_per_socket(self): - return self._num_tasks_per_socket - - @property - def partition(self): - return self._partition - - @property - def reservation(self): - return self._reservation - - @property - def use_smt(self): - return self._use_smt - - @property - def signal(self): - return self._signal - - def emit_preamble(self, builder): - builder.verbatim('%s --job-name="%s"' % (self._prefix, self._name)) - builder.verbatim('%s --time=%s' % - (self._prefix, '%d:%d:%d' % self._time_limit)) - builder.verbatim('%s --ntasks=%d' % (self._prefix, self._num_tasks)) - if self._num_tasks_per_node: - builder.verbatim('%s --ntasks-per-node=%d' % - (self._prefix, self._num_tasks_per_node)) - - if self._num_cpus_per_task: - builder.verbatim('%s --cpus-per-task=%d' % - (self._prefix, self._num_cpus_per_task)) - - if self._num_tasks_per_core: - builder.verbatim('%s --ntasks-per-core=%d' % - (self._prefix, self._num_tasks_per_core)) - - if self._num_tasks_per_socket: - builder.verbatim('%s --ntasks-per-socket=%d' % - (self._prefix, self._num_tasks_per_socket)) - - if self._partition: - builder.verbatim('%s --partition=%s' % - (self._prefix, self._partition)) - - if self._exclusive_access: - builder.verbatim('%s --exclusive' % self._prefix) - - if self._account: - builder.verbatim( - '%s --account=%s' % (self._prefix, self._account)) - - if self._nodelist: - builder.verbatim( - '%s --nodelist=%s' % (self._prefix, self._nodelist)) - - if self._exclude: - builder.verbatim( - '%s --exclude=%s' % (self._prefix, self._exclude)) - - if self._use_smt is not None: - hint = 'multithread' if self._use_smt else 'nomultithread' - builder.verbatim('%s --hint=%s' % (self._prefix, hint)) - - if self._reservation: - builder.verbatim('%s --reservation=%s' % (self._prefix, - self._reservation)) - if self._stdout: - builder.verbatim('%s --output="%s"' % (self._prefix, self._stdout)) - - if self._stderr: - builder.verbatim('%s --error="%s"' % (self._prefix, self._stderr)) - - for opt in self.options: - builder.verbatim('%s %s' % (self._prefix, opt)) - - super().emit_preamble(builder) - - def _submit(self, script): - cmd = 'sbatch %s' % script.name - completed = os_ext.run_command( - cmd, check=True, timeout=settings.job_submit_timeout) - - jobid_match = re.search('Submitted batch job (?P\d+)', - completed.stdout) - if not jobid_match: - raise JobSubmissionError(command=cmd, - stdout=completed.stdout, - stderr=completed.stderr, - exitcode=completed.returncode) - - # Job id's are treated as string; keep in mind - self._jobid = jobid_match.group('jobid') - if not self._stdout: - self._stdout = 'slurm-%s.out' % self._jobid - - if not self._stderr: - self._stderr = self._stdout - - def _update_state(self): - """Check the status of the job.""" - intervals = itertools.cycle(settings.job_init_poll_intervals) - state_match = None - max_tries = settings.job_init_poll_max_tries - while (not state_match and - self._job_init_poll_num_tries < max_tries): - # Query job state persistently. When you first submit, the job may - # not be yet registered in the database; so try some times We - # restrict the `sacct' query to today (`-S' option), so as to avoid - # possible older and stale slurm database entries. - completed = os_ext.run_command( - 'sacct -S %s -P -j %s -o jobid,state,exitcode' % - (datetime.now().strftime('%F'), self._jobid), - check=True) - state_match = re.search( - '^(?P\d+)\|(?P\S+)([^\|]*)\|' - '(?P\d+)\:(?P\d+)', - completed.stdout, re.MULTILINE) - if not state_match: - self._job_init_poll_num_tries += 1 - time.sleep(next(intervals)) - - if not state_match: - raise ReframeError('Querying initial job state timed out') - - assert self._jobid == state_match.group('jobid') - - self._state = SlurmJobState(state_match.group('state')) - self._exitcode = int(state_match.group('exitcode')) - self._signal = int(state_match.group('signal')) - - def _cancel_if_blocked(self): - if self._is_cancelling or self._state not in self._pending_states: - return - - completed = os_ext.run_command('squeue -j %s -o "%%i|%%T|%%r" ' % - self._jobid, check=True) - # Note: the reason may given as "ReqNodeNotAvail, - # UnavailableNodes:nid00[408,411-415]" by squeue. In this case, - # we take only the string up to the comma. - state_match = re.search( - '^(?P\d+)\|(?P\S+)\|' - '(?P\w+)(\W+(?P.*))?', - completed.stdout, re.MULTILINE) - # If squeue does not return any job info (state_match is empty), - # it means normally that the job has finished meanwhile. So we - # can exit this function. - if not state_match: - return - - assert self._jobid == state_match.group('jobid') - # Assure that the job is still in a pending state - state = SlurmJobState(state_match.group('state')) - reason = state_match.group('reason') - if state in self._pending_states and reason in self._cancel_reasons: - self.cancel() - reason_msg = ('job canceled because it was blocked in pending ' - 'state due to the following SLURM reason: ' + reason) - reason_details = state_match.group('reason_details') - if reason_details: - reason_msg += ', ' + reason_details - raise JobResourcesError(reason_msg) - - def wait(self): - intervals = itertools.cycle(settings.job_state_poll_intervals) - - # Quickly return in case we have finished already - if self._state in self._completion_states: - return - - self._update_state() - self._cancel_if_blocked() - while self._state not in self._completion_states: - time.sleep(next(intervals)) - self._update_state() - self._cancel_if_blocked() - - def cancel(self): - """Cancel job execution. - - This call waits until the job has finished.""" - if self._jobid == -1: - return - - os_ext.run_command('scancel %s' % self._jobid, - check=True, timeout=settings.job_submit_timeout) - self._is_cancelling = True - self.wait() - - def finished(self): - try: - self._update_state() - # We postpone exception handling: we ignore the exception at this point - # and mark the job as unfinished in order to deal with it later - except ReframeError: - return False - else: - return self._state in self._completion_states - diff --git a/reframe/core/schedulers/__init__.py b/reframe/core/schedulers/__init__.py new file mode 100644 index 0000000000..22ad64fd8a --- /dev/null +++ b/reframe/core/schedulers/__init__.py @@ -0,0 +1,267 @@ +# +# Scheduler implementations +# + +import abc + +import reframe.core.debug as debug +import reframe.utility.os as os_ext +import reframe.core.fields as fields + +from reframe.core.exceptions import ConfigurationError +from reframe.core.launchers import JobLauncher +from reframe.core.logging import getlogger +from reframe.core.shell import BashScriptBuilder + + +class JobState: + def __init__(self, state): + self._state = state + + def __repr__(self): + return debug.repr(self) + + def __eq__(self, other): + if not isinstance(other, type(self)): + return NotImplemented + + return self._state == other._state + + def __str__(self): + return self._state + + +class Job(abc.ABC): + """A job descriptor. + + .. note:: + This is an abstract class. + Users may not create jobs directly. + """ + + #: Options to be passed to the backend job scheduler. + #: + #: :type: :class:`list` of :class:`str` + #: :default: ``[]`` + options = fields.TypedListField('options', str) + + #: List of shell commands to execute before launching this job. + #: + #: These commands do not execute in the context of ReFrame. + #: Instead, they are emitted in the generated job script just before the + #: actual job launch command. + #: + #: :type: :class:`list` of :class:`str` + #: :default: ``[]`` + pre_run = fields.TypedListField('pre_run', str) + + #: List of shell commands to execute after launching this job. + #: + #: See :attr:`pre_run` for a more detailed description of the semantics. + #: + #: :type: :class:`list` of :class:`str` + #: :default: ``[]`` + post_run = fields.TypedListField('post_run', str) + + #: The parallel program launcher that will be used to launch the parallel + #: executable of this job. + #: + #: :type: :class:`reframe.core.launchers.JobLauncher` + launcher = fields.TypedField('launcher', JobLauncher) + + _jobid = fields.IntegerField('_jobid', allow_none=True) + _exitcode = fields.IntegerField('_exitcode', allow_none=True) + _state = fields.TypedField('_state', JobState, allow_none=True) + + # The sched_* arguments are exposed also to the frontend + def __init__(self, + name, + command, + launcher, + environs=[], + workdir='.', + num_tasks=1, + num_tasks_per_node=None, + num_tasks_per_core=None, + num_tasks_per_socket=None, + num_cpus_per_task=None, + use_smt=None, + time_limit=(0, 10, 0), + script_filename=None, + stdout=None, + stderr=None, + sched_account=None, + sched_partition=None, + sched_reservation=None, + sched_nodelist=None, + sched_exclude_nodelist=None, + sched_exclusive_access=None, + sched_options=[]): + + # Mutable fields + self.options = list(sched_options) + + # Commands to be run before and after the job is launched + self.pre_run = [] + self.post_run = [] + self.launcher = launcher + + self._name = name + self._command = command + self._environs = list(environs) + self._workdir = workdir + self._num_tasks = num_tasks + self._num_tasks_per_node = num_tasks_per_node + self._num_tasks_per_core = num_tasks_per_core + self._num_tasks_per_socket = num_tasks_per_socket + self._num_cpus_per_task = num_cpus_per_task + self._use_smt = use_smt + self._script_filename = script_filename or '%s.sh' % self._name + self._stdout = stdout or '%s.out' % self._name + self._stderr = stderr or '%s.err' % self._name + self._time_limit = time_limit + + # Backend scheduler related information + self._sched_nodelist = sched_nodelist + self._sched_exclude_nodelist = sched_exclude_nodelist + self._sched_partition = sched_partition + self._sched_reservation = sched_reservation + self._sched_account = sched_account + self._sched_exclusive_access = sched_exclusive_access + + # Live job information; to be filled during job's lifetime by the + # scheduler + self._jobid = None + self._exitcode = None + self._state = None + + def __repr__(self): + return debug.repr(self) + + # Read-only properties + @property + def exitcode(self): + return self._exitcode + + @property + def jobid(self): + return self._jobid + + @property + def state(self): + return self._state + + @property + def name(self): + return self._name + + @property + def command(self): + return self._command + + @property + def workdir(self): + return self._workdir + + @property + def environs(self): + return self._environs + + @property + def num_tasks(self): + return self._num_tasks + + @property + def script_filename(self): + return self._script_filename + + @property + def stdout(self): + return self._stdout + + @property + def stderr(self): + return self._stderr + + @property + def time_limit(self): + return self._time_limit + + @property + def num_cpus_per_task(self): + return self._num_cpus_per_task + + @property + def num_tasks_per_core(self): + return self._num_tasks_per_core + + @property + def num_tasks_per_node(self): + return self._num_tasks_per_node + + @property + def num_tasks_per_socket(self): + return self._num_tasks_per_socket + + @property + def use_smt(self): + return self._use_smt + + @property + def sched_nodelist(self): + return self._sched_nodelist + + @property + def sched_exclude_nodelist(self): + return self._sched_exclude_nodelist + + @property + def sched_partition(self): + return self._sched_partition + + @property + def sched_reservation(self): + return self._sched_reservation + + @property + def sched_account(self): + return self._sched_account + + @property + def sched_exclusive_access(self): + return self._sched_exclusive_access + + def emit_preamble(self, builder): + for e in self._environs: + e.emit_load_instructions(builder) + + for c in self.pre_run: + builder.verbatim(c) + + def emit_postamble(self, script_builder): + for c in self.post_run: + script_builder.verbatim(c) + + def prepare(self, script_builder): + self.emit_preamble(script_builder) + script_builder.verbatim('cd %s' % self._workdir) + self.launcher.emit_run_command(self, script_builder) + self.emit_postamble(script_builder) + with open(self.script_filename, 'w') as fp: + fp.write(script_builder.finalise()) + + @abc.abstractmethod + def submit(self): + pass + + @abc.abstractmethod + def wait(self): + pass + + @abc.abstractmethod + def cancel(self): + pass + + @abc.abstractmethod + def finished(self): + pass diff --git a/reframe/core/schedulers/local.py b/reframe/core/schedulers/local.py new file mode 100644 index 0000000000..f3697d2fa0 --- /dev/null +++ b/reframe/core/schedulers/local.py @@ -0,0 +1,168 @@ +import os +import signal +import stat +import subprocess +import time + +import reframe.core.schedulers as sched +import reframe.utility.os as os_ext + +from datetime import datetime +from reframe.core.exceptions import ReframeError +from reframe.core.logging import getlogger +from reframe.core.schedulers.registry import register_scheduler + + +# Local job states +class LocalJobState(sched.JobState): + pass + + +LOCAL_JOB_SUCCESS = LocalJobState('SUCCESS') +LOCAL_JOB_FAILURE = LocalJobState('FAILURE') +LOCAL_JOB_TIMEOUT = LocalJobState('TIMEOUT') + + +class _TimeoutExpired(ReframeError): + pass + + +@register_scheduler('local') +class LocalJob(sched.Job): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.cancel_grace_period = 2 + self._wait_poll_secs = 0.1 + self._proc = None # Launched process + + def submit(self): + # `chmod +x' first, because we will execute the script locally + os.chmod(self._script_filename, + os.stat(self._script_filename).st_mode | stat.S_IEXEC) + + # Run from the absolute path + self._f_stdout = open(self.stdout, 'w+') + self._f_stderr = open(self.stderr, 'w+') + + # The new process starts also a new session (session leader), so that + # we can later kill any other processes that this might spawn by just + # killing this one. + self._proc = os_ext.run_command_async( + os.path.abspath(self._script_filename), + stdout=self._f_stdout, + stderr=self._f_stderr, + start_new_session=True) + + # Update job info + self._jobid = self._proc.pid + + def _kill_all(self): + """Send SIGKILL to all the processes of the spawned job.""" + try: + os.killpg(self._jobid, signal.SIGKILL) + except (ProcessLookupError, PermissionError): + # The process group may already be dead or assigned to a different + # group, so ignore this error + getlogger().debug( + 'pid %s already dead or assigned elsewhere' % self._jobid) + + def _term_all(self): + """Send SIGTERM to all the processes of the spawned job.""" + os.killpg(self._jobid, signal.SIGTERM) + + def _wait_all(self, timeout=0): + """Wait for all the processes of spawned job to finish. + + Keyword arguments: + + timeout -- Timeout period for this wait call in seconds (may be a real + number, too). If `None` or `0`, no timeout will be set. + """ + t_wait = datetime.now() + self._proc.wait(timeout=timeout or None) + t_wait = datetime.now() - t_wait + try: + # Wait for all processes in the process group to finish + while not timeout or t_wait.total_seconds() < timeout: + t_poll = datetime.now() + os.killpg(self._jobid, 0) + time.sleep(self._wait_poll_secs) + t_poll = datetime.now() - t_poll + t_wait += t_poll + + # Final check + os.killpg(self._jobid, 0) + raise _TimeoutExpired + except (ProcessLookupError, PermissionError): + # Ignore also EPERM errors in case this process id is assigned + # elsewhere and we cannot query its status + getlogger().debug( + 'pid %s already dead or assigned elsewhere' % self._jobid) + return + + def cancel(self): + """Cancel job. + + The SIGTERM signal will be sent first to all the processes of this job + and after a grace period (default 2s) the SIGKILL signal will be send. + + This function waits for the spawned process tree to finish. + """ + if self._jobid is None: + raise ReframeError('no job is spawned yet') + + self._term_all() + + # Set the time limit to the grace period and let wait() do the final + # killing + self._time_limit = (0, 0, self.cancel_grace_period) + self.wait() + + def wait(self): + """Wait for the spawned job to finish. + + As soon as the parent job process finishes, all of its spawned + subprocesses will be forced to finish, too. + + Upon return, the whole process tree of the spawned job process will be + cleared, unless any of them has called `setsid()`. + """ + if self._jobid is None: + raise ReframeError('no job is spawned yet') + + if self._state is not None: + # Job has been already waited for + return + + # Convert job's time_limit to seconds + h, m, s = self.time_limit + timeout = h * 3600 + m * 60 + s + try: + self._wait_all(timeout) + self._exitcode = self._proc.returncode + if self._exitcode != 0: + self._state = LOCAL_JOB_FAILURE + else: + self._state = LOCAL_JOB_SUCCESS + except (_TimeoutExpired, subprocess.TimeoutExpired): + getlogger().debug('job timed out') + self._state = LOCAL_JOB_TIMEOUT + finally: + # Cleanup all the processes of this job + self._kill_all() + self._wait_all() + self._f_stdout.close() + self._f_stderr.close() + + def finished(self): + """Check if the spawned process has finished. + + This function does not wait the process. It just queries its state. If + the process has finished, you *must* call wait() to properly cleanup + after it. + """ + self._proc.poll() + if self._proc.returncode is None: + return False + + return True diff --git a/reframe/core/schedulers/registry.py b/reframe/core/schedulers/registry.py new file mode 100644 index 0000000000..e8c48c72ea --- /dev/null +++ b/reframe/core/schedulers/registry.py @@ -0,0 +1,33 @@ +import reframe.core.fields as fields + + +# Name registry for job schedulers +_SCHEDULERS = {} + + +def register_scheduler(name, local=False): + """Class decorator for registering new schedulers.""" + + def _register_scheduler(cls): + if name in _SCHEDULERS: + raise ReframeError( + "a scheduler is already registered with name '%s'" % name) + + cls.is_local = fields.ConstantField(bool(local)) + cls.registered_name = fields.ConstantField(name) + _SCHEDULERS[name] = cls + return cls + + return _register_scheduler + + +def getscheduler(name): + try: + return _SCHEDULERS[name] + except KeyError: + raise ConfigurationError("no such scheduler: '%s'" % name) + + +# Import the schedulers modules to trigger their registration +import reframe.core.schedulers.local +import reframe.core.schedulers.slurm diff --git a/reframe/core/schedulers/slurm.py b/reframe/core/schedulers/slurm.py new file mode 100644 index 0000000000..e2dd7f1bd0 --- /dev/null +++ b/reframe/core/schedulers/slurm.py @@ -0,0 +1,207 @@ +import itertools +import re +import time + +import reframe.core.schedulers as sched +import reframe.utility.os as os_ext + +from datetime import datetime +from reframe.core.exceptions import JobSubmissionError, ReframeError +from reframe.core.logging import getlogger +from reframe.core.schedulers.registry import register_scheduler +from reframe.settings import settings + + +class SlurmJobState(sched.JobState): + pass + + +# Slurm Job states +SLURM_JOB_BOOT_FAIL = SlurmJobState('BOOT_FAIL') +SLURM_JOB_CANCELLED = SlurmJobState('CANCELLED') +SLURM_JOB_COMPLETED = SlurmJobState('COMPLETED') +SLURM_JOB_CONFIGURING = SlurmJobState('CONFIGURING') +SLURM_JOB_COMPLETING = SlurmJobState('COMPLETING') +SLURM_JOB_FAILED = SlurmJobState('FAILED') +SLURM_JOB_NODE_FAILED = SlurmJobState('NODE_FAILED') +SLURM_JOB_PENDING = SlurmJobState('PENDING') +SLURM_JOB_PREEMPTED = SlurmJobState('PREEMPTED') +SLURM_JOB_RESIZING = SlurmJobState('RESIZING') +SLURM_JOB_RUNNING = SlurmJobState('RUNNING') +SLURM_JOB_SUSPENDED = SlurmJobState('SUSPENDED') +SLURM_JOB_TIMEOUT = SlurmJobState('TIMEOUT') + + +@register_scheduler('slurm') +class SlurmJob(sched.Job): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._prefix = '#SBATCH' + self._completion_states = [SLURM_JOB_BOOT_FAIL, + SLURM_JOB_CANCELLED, + SLURM_JOB_COMPLETED, + SLURM_JOB_FAILED, + SLURM_JOB_NODE_FAILED, + SLURM_JOB_PREEMPTED, + SLURM_JOB_TIMEOUT] + self._pending_states = [SLURM_JOB_CONFIGURING, + SLURM_JOB_PENDING] + + # Reasons to cancel a pending job: if the job is expected to remain + # pending for a much longer time then usual (mostly if a sysadmin + # intervention is required) + self._cancel_reasons = ['FrontEndDown', + 'Licenses', # May require sysadmin + 'NodeDown', + 'PartitionDown', + 'PartitionInactive', + 'PartitionNodeLimit', + 'QOSJobLimit', + 'QOSResourceLimit', + 'ReqNodeNotAvail', # Inaccurate SLURM doc + 'QOSUsageThreshold'] + self._is_cancelling = False + + def _emit_job_option(self, var, option, builder): + if var is not None: + builder.verbatim(self._prefix + ' ' + option.format(var)) + + def emit_preamble(self, builder): + self._emit_job_option(self.name, '--job-name="{0}"', builder) + self._emit_job_option('%d:%d:%d' % self.time_limit, + '--time={0}', builder) + self._emit_job_option(self.num_tasks, '--ntasks={0}', builder) + self._emit_job_option(self.num_tasks_per_node, + '--ntasks-per-node={0}', builder) + self._emit_job_option(self.num_tasks_per_core, + '--ntasks-per-core={0}', builder) + self._emit_job_option(self.num_tasks_per_socket, + '--ntasks-per-socket={0}', builder) + self._emit_job_option(self.num_cpus_per_task, + '--cpus-per-task={0}', builder) + self._emit_job_option(self.sched_partition, '--partition={0}', builder) + self._emit_job_option(self.sched_exclusive_access, + '--exclusive', builder) + self._emit_job_option(self.sched_account, '--account={0}', builder) + self._emit_job_option(self.sched_nodelist, '--nodelist={0}', builder) + self._emit_job_option(self.sched_exclude_nodelist, + '--exclude={0}', builder) + if self.use_smt is None: + hint = None + else: + hint = 'multithread' if self.use_smt else 'nomultithread' + + self._emit_job_option(hint, '--hint={0}', builder) + self._emit_job_option(self.sched_reservation, + '--reservation={0}', builder) + self._emit_job_option(self.stdout, '--output={0}', builder) + self._emit_job_option(self.stderr, '--error={0}', builder) + + for opt in self.options: + builder.verbatim('%s %s' % (self._prefix, opt)) + + super().emit_preamble(builder) + + def submit(self): + cmd = 'sbatch %s' % self.script_filename + completed = os_ext.run_command( + cmd, check=True, timeout=settings.job_submit_timeout) + jobid_match = re.search('Submitted batch job (?P\d+)', + completed.stdout) + if not jobid_match: + raise JobSubmissionError(command=cmd, + stdout=completed.stdout, + stderr=completed.stderr, + exitcode=completed.returncode) + self._jobid = int(jobid_match.group('jobid')) + + def _update_state(self): + """Check the status of the job.""" + + completed = os_ext.run_command( + 'sacct -S %s -P -j %s -o jobid,state,exitcode' % + (datetime.now().strftime('%F'), self._jobid), + check=True) + state_match = re.search(r'^(?P\d+)\|(?P\S+)([^\|]*)\|' + r'(?P\d+)\:(?P\d+)', + completed.stdout, re.MULTILINE) + if state_match is None: + getlogger().debug('job state not matched (stdout follows)\n%s' % + completed.stdout) + return + + self._state = SlurmJobState(state_match.group('state')) + if self._state in self._completion_states: + self._exitcode = int(state_match.group('exitcode')) + + def _cancel_if_blocked(self): + if self._is_cancelling or self._state not in self._pending_states: + return + + completed = os_ext.run_command('squeue -j %s -o %%r' % self._jobid, + check=True) + + # Get the reason description by removing the header from the result + try: + reason_descr = completed.stdout.split('\n')[1] + except IndexError: + # Can't retrieve job's state. Perhaps it has finished already and + # does not show up in the output of squeue + return + + # The reason description may have two parts as follows: + # "ReqNodeNotAvail, UnavailableNodes:nid00[408,411-415]" + try: + reason, reason_details = reason_descr.split(',', maxsplit=1) + except ValueError: + # no reason details + reason, reason_details = reason_descr, None + + if reason in self._cancel_reasons: + self.cancel() + reason_msg = ('job cancelled because it was blocked due to ' + 'a perhaps non-recoverable reason: ' + reason) + if reason_details is not None: + reason_msg += ', ' + reason_details + + raise ReframeError(reason_msg) + + def wait(self): + if self._jobid is None: + raise ReframeError('no job is spawned yet') + + # Quickly return in case we have finished already + if self._state in self._completion_states: + return + + intervals = itertools.cycle(settings.job_poll_intervals) + self._update_state() + self._cancel_if_blocked() + while self._state not in self._completion_states: + time.sleep(next(intervals)) + self._update_state() + self._cancel_if_blocked() + + def cancel(self): + """Cancel job execution. + + This call waits until the job has finished.""" + getlogger().debug('cancelling job (id=%s)' % self._jobid) + if self._jobid is None: + raise ReframeError('no job is spawned yet') + + os_ext.run_command('scancel %s' % self._jobid, + check=True, timeout=settings.job_submit_timeout) + self._is_cancelling = True + self.wait() + + def finished(self): + try: + self._update_state() + except ReframeError as e: + # We ignore these exceptions at this point and we simply mark the + # job as unfinished. + getlogger().debug('ignoring error during polling: %s' % e) + return False + else: + return self._state in self._completion_states diff --git a/reframe/core/systems.py b/reframe/core/systems.py index a5677f0abf..8016ee4c59 100644 --- a/reframe/core/systems.py +++ b/reframe/core/systems.py @@ -12,7 +12,6 @@ class SystemPartition: _name = NonWhitespaceField('_name') _descr = StringField('_descr') - _scheduler = NonWhitespaceField('_scheduler', allow_none=True) _access = TypedListField('_access', str) _environs = TypedListField('_environs', Environment) _resources = TypedDictField('_resources', str, (list, str)) @@ -21,11 +20,13 @@ class SystemPartition: # maximum concurrent jobs _max_jobs = IntegerField('_max_jobs') - def __init__(self, name, descr=None, scheduler=None, access=[], - environs=[], resources={}, local_env=None, max_jobs=1): + def __init__(self, name, descr=None, scheduler=None, launcher=None, + access=[], environs=[], resources={}, local_env=None, + max_jobs=1): self._name = name self._descr = descr or name self._scheduler = scheduler + self._launcher = launcher self._access = list(access) self._environs = list(environs) self._resources = dict(resources) @@ -95,15 +96,29 @@ def resources(self): @property def scheduler(self): - """The backend scheduler of this partition. + """The type of the backend scheduler of this partition. - This is the name of the scheduler defined in the - `partition configuration `__. + :returns: a subclass of :class:`reframe.core.schedulers.Job`. - :type: `str` + .. note:: + .. versionchanged:: 2.8 + + Prior versions returned a string representing the scheduler and job + launcher combination. """ return self._scheduler + @property + def launcher(self): + """The type of the backend launcher of this partition. + + :returns: a subclass of :class:`reframe.core.launchers.JobLauncher`. + + .. note:: + .. versionadded:: 2.8 + """ + return self._launcher + # Instantiate managed resource `name` with `value`. def get_resource(self, name, value): ret = [] @@ -129,6 +144,7 @@ def __eq__(self, other): return (self._name == other.name and self._scheduler == other._scheduler and + self._launcher == other._launcher and self._access == other._access and self._environs == other._environs and self._resources == other._resources and @@ -147,6 +163,7 @@ class System: _descr = StringField('_descr') _hostnames = TypedListField('_hostnames', str) _partitions = TypedListField('_partitions', SystemPartition) + _modules_system = AlphanumericField('_modules_system', allow_none=True) prefix = StringField('prefix') stagedir = StringField('stagedir', allow_none=True) @@ -164,11 +181,12 @@ class System: def __init__(self, name, descr=None, hostnames=[], partitions=[], prefix='.', stagedir=None, outputdir=None, logdir=None, - resourcesdir='.'): + resourcesdir='.', modules_system=None): self._name = name self._descr = descr or name self._hostnames = list(hostnames) self._partitions = list(partitions) + self._modules_system = modules_system self.prefix = prefix self.stagedir = stagedir self.outputdir = outputdir @@ -188,6 +206,10 @@ def descr(self): def hostnames(self): return self._hostnames + @property + def modules_system(self): + return self._modules_system + @property def name(self): """The name of this system.""" diff --git a/reframe/frontend/cli.py b/reframe/frontend/cli.py index c9da0ca47e..01935e3b71 100644 --- a/reframe/frontend/cli.py +++ b/reframe/frontend/cli.py @@ -7,7 +7,7 @@ import reframe.utility.os as os_ext from reframe.core.exceptions import ModuleError -from reframe.core.modules import module_force_load, module_unload +from reframe.core.modules import get_modules_system from reframe.core.logging import getlogger from reframe.frontend.argparse import ArgumentParser from reframe.frontend.executors import Runner @@ -16,6 +16,7 @@ from reframe.frontend.loader import (RegressionCheckLoader, SiteConfiguration, autodetect_system) +from reframe.core.modules import init_modules_system from reframe.frontend.printer import PrettyPrinter from reframe.frontend.resources import ResourcesManager from reframe.settings import settings @@ -218,6 +219,9 @@ def main(): list_supported_systems(site_config.systems.values(), printer) sys.exit(1) + # Init modules system + init_modules_system(system.modules_system) + if options.mode: try: mode_args = site_config.modes[options.mode] @@ -353,10 +357,10 @@ def main(): # Act on checks # Unload regression's module and load user-specified modules - module_unload(settings.module_name) + get_modules_system().unload_module(settings.module_name) for m in options.user_modules: try: - module_force_load(m) + get_modules_system().load_module(m, force=True) except ModuleError: printer.info("Could not load module `%s': Skipping..." % m) diff --git a/reframe/frontend/executors/__init__.py b/reframe/frontend/executors/__init__.py index fdf5ecb267..f25cfa1149 100644 --- a/reframe/frontend/executors/__init__.py +++ b/reframe/frontend/executors/__init__.py @@ -7,6 +7,7 @@ ReframeFatalError, ReframeError, SanityError ) from reframe.core.fields import StringField, TypedField +from reframe.core.logging import logging_context from reframe.core.pipeline import RegressionTest from reframe.frontend.printer import PrettyPrinter from reframe.frontend.statistics import TestStats @@ -85,25 +86,42 @@ def current_stage(self): def setup(self, partition, environ, **job_opts): self._current_stage = 'setup' - self._check.setup(partition, environ, **job_opts) + with logging_context(check=self._check) as logger: + logger.debug('entering setup stage') + self._check.setup(partition, environ, **job_opts) def compile(self): self._current_stage = 'compile' - self._check.compile() + with logging_context(check=self._check) as logger: + logger.debug('entering compilation stage') + self._check.compile() def run(self): self._current_stage = 'run' - self._check.run() + with logging_context(check=self._check) as logger: + logger.debug('entering running stage') + self._check.run() def wait(self): self._current_stage = 'wait' - self._check.wait() + with logging_context(check=self._check) as logger: + logger.debug('entering waiting stage') + self._check.wait() + + def poll(self): + with logging_context(check=self._check) as logger: + logger.debug('polling check') + ret = self._check.poll() + return ret def check_sanity(self): # check_sanity() may be overriden by the user tests; we log this phase # here then self._current_stage = 'sanity' - ret = self._check.check_sanity() + with logging_context(check=self._check) as logger: + logger.debug('entering sanity checking stage') + ret = self._check.check_sanity() + return ret def check_performance(self): @@ -114,7 +132,9 @@ def check_performance(self): # FIXME: the logic has become a bit ugly here in order to support # both sanity syntaxes. It should be simplified again as soon as # the old syntax is dropped. - ret = self._check.check_performance() + with logging_context(check=self._check) as logger: + logger.debug('entering performance checking stage') + ret = self._check.check_performance() except SanityError: # This is to handle the new sanity systax if self._check.strict_check: @@ -126,7 +146,10 @@ def check_performance(self): def cleanup(self, remove_files=False, unload_env=True): self._current_stage = 'cleanup' - self._check.cleanup(remove_files, unload_env) + with logging_context(check=self._check) as logger: + logger.debug('entering cleanup stage') + self._check.cleanup(remove_files, unload_env) + self._current_stage = 'completed' diff --git a/reframe/frontend/executors/policies.py b/reframe/frontend/executors/policies.py index 41621b1e48..b4b8975518 100644 --- a/reframe/frontend/executors/policies.py +++ b/reframe/frontend/executors/policies.py @@ -37,7 +37,7 @@ def run_check(self, check, partition, environ): sched_partition=self.sched_partition, sched_reservation=self.sched_reservation, sched_nodelist=self.sched_nodelist, - sched_exclude=self.sched_exclude_nodelist, + sched_exclude_nodelist=self.sched_exclude_nodelist, sched_options=self.sched_options ) @@ -131,8 +131,6 @@ def __init__(self): # Job limit per partition self._max_jobs = {} - self._logger = getlogger('frontend') - def _compile_run_testcase(self, testcase): try: executor = testcase.executor @@ -206,7 +204,6 @@ def _print_executor_status(self, status, executor): partname = executor.check.current_partition.fullname envname = executor.check.current_environ.name msg = '%s on %s using %s' % (checkname, partname, envname) - self._logger.debug('%s %s' % (status.lower(), msg)) self.printer.status(status, msg) def run_check(self, check, partition, environ): @@ -221,7 +218,7 @@ def run_check(self, check, partition, environ): sched_partition=self.sched_partition, sched_reservation=self.sched_reservation, sched_nodelist=self.sched_nodelist, - sched_exclude=self.sched_exclude_nodelist, + sched_exclude_nodelist=self.sched_exclude_nodelist, sched_options=self.sched_options ) @@ -229,8 +226,8 @@ def run_check(self, check, partition, environ): partname = partition.fullname if self._running_cases_counts[partname] >= partition.max_jobs: # Make sure that we still exceeded the job limit - self._logger.debug('reached job limit (%s) for partition %s' % - (partition.max_jobs, partname)) + getlogger().debug('reached job limit (%s) for partition %s' % + (partition.max_jobs, partname)) self._update_running_counts() if self._running_cases_counts[partname] < partition.max_jobs: @@ -264,10 +261,12 @@ def run_check(self, check, partition, environ): def _update_running_counts(self): """Update the counts of running checks per partition.""" + getlogger().debug('updating counts for running test cases') freed_slots = {} for rc in self._running_cases: - check = rc.testcase.executor.check - if not rc.zombie and check.poll(): + executor = rc.testcase.executor + check = executor.check + if not rc.zombie and executor.poll(): # Tests without a job descriptor are considered finished rc.zombie = True partname = check.current_partition.fullname @@ -276,9 +275,10 @@ def _update_running_counts(self): freed_slots[partname] += 1 for p, ns in freed_slots.items(): - self._logger.debug('freed %s slot(s) on partition %s' % (ns, p)) + getlogger().debug('freed %s slot(s) on partition %s' % (ns, p)) def _reschedule(self, ready_testcase, load_env=True): + getlogger().debug('scheduling test case for running') testcase = ready_testcase.testcase executor = testcase.executor partname = executor.check.current_partition.fullname @@ -303,8 +303,8 @@ def _reschedule_all(self): ]) if num_schedule_jobs: - self._logger.debug('rescheduling %s job(s) on %s' % - (num_schedule_jobs, partname)) + getlogger().debug('rescheduling %s job(s) on %s' % + (num_schedule_jobs, partname)) for i in range(num_schedule_jobs): ready_case = self._ready_cases[partname].pop() @@ -312,15 +312,15 @@ def _reschedule_all(self): self._reschedule(ready_case) def _waitany(self): - intervals = itertools.cycle(settings.job_state_poll_intervals) + intervals = itertools.cycle(settings.job_poll_intervals) while True: for i, running in enumerate(self._running_cases): testcase = running.testcase executor = testcase.executor running_check = executor.check - if running_check.poll(): + if executor.poll(): try: - running_check.wait() + executor.wait() return running except (KeyboardInterrupt, ReframeFatalError, AssertionError): # These errors should be propagated as-is diff --git a/reframe/frontend/loader.py b/reframe/frontend/loader.py index deab9c6d58..453e72c66d 100644 --- a/reframe/frontend/loader.py +++ b/reframe/frontend/loader.py @@ -15,6 +15,8 @@ from reframe.core.exceptions import ConfigurationError, ReframeError from reframe.core.systems import System, SystemPartition from reframe.core.fields import ScopedDict, ScopedDictField +from reframe.core.schedulers.registry import getscheduler +from reframe.core.launchers.registry import getlauncher from reframe.settings import settings @@ -162,6 +164,22 @@ def systems(self): def modes(self): return self._modes + def get_schedsystem_config(self, descr): + # Handle the special shortcuts first + if descr == 'nativeslurm': + return getscheduler('slurm'), getlauncher('srun') + + if descr == 'local': + return getscheduler('local'), getlauncher('local') + + try: + sched_descr, launcher_descr = descr.split('+') + except ValueError as e: + raise ConfigurationError( + 'invalid syntax for the scheduling system') from e + + return getscheduler(sched_descr), getlauncher(launcher_descr) + def load_from_dict(self, site_config): if not isinstance(site_config, collections.abc.Mapping): raise ConfigurationError('site configuration is not a dict') @@ -235,6 +253,7 @@ def create_env(system, partition, name): sys_outputdir = config.get('outputdir', None) sys_logdir = config.get('logdir', None) sys_resourcesdir = config.get('resourcesdir', '.') + sys_modules_system = config.get('modules_system', None) # Expand variables if sys_prefix: @@ -259,7 +278,8 @@ def create_env(system, partition, name): stagedir=sys_stagedir, outputdir=sys_outputdir, logdir=sys_logdir, - resourcesdir=sys_resourcesdir) + resourcesdir=sys_resourcesdir, + modules_system=sys_modules_system) for part_name, partconfig in config.get('partitions', {}).items(): if not isinstance(partconfig, collections.abc.Mapping): raise ConfigurationError( @@ -268,7 +288,9 @@ def create_env(system, partition, name): ) part_descr = partconfig.get('descr', part_name) - part_scheduler = partconfig.get('scheduler', 'local') + part_scheduler, part_launcher = self.get_schedsystem_config( + partconfig.get('scheduler', 'local+local') + ) part_local_env = Environment( name='__rfm_env_%s' % part_name, modules=partconfig.get('modules', []), @@ -284,6 +306,7 @@ def create_env(system, partition, name): system.add_partition(SystemPartition(name=part_name, descr=part_descr, scheduler=part_scheduler, + launcher=part_launcher, access=part_access, environs=part_environs, resources=part_resources, diff --git a/reframe/frontend/printer.py b/reframe/frontend/printer.py index cb78214833..28ef6aa250 100644 --- a/reframe/frontend/printer.py +++ b/reframe/frontend/printer.py @@ -56,7 +56,7 @@ def __init__(self): self.colorize = True self.line_width = 78 self.status_width = 10 - self._logger = getlogger('frontend') + self._logger = getlogger() def __repr__(self): return debug.repr(self) diff --git a/reframe/settings.py b/reframe/settings.py index 78d7cfa917..3cbb7f3f2c 100644 --- a/reframe/settings.py +++ b/reframe/settings.py @@ -7,11 +7,9 @@ class RegressionSettings: - _version = '2.7' + _version = '2.8' _module_name = 'reframe' - _job_state_poll_intervals = [1, 2, 3] - _job_init_poll_intervals = [1] - _job_init_poll_max_tries = 30 + _job_poll_intervals = [1, 2, 3] _job_submit_timeout = 60 _checks_path = ['checks/'] _checks_path_recurse = True @@ -60,7 +58,7 @@ class RegressionSettings: 'reframe.log': { 'level': 'DEBUG', 'format': '[%(asctime)s] %(levelname)s: ' - '%(check_name)s: %(message)s', + '%(testcase_name)s: %(message)s', 'append': False, }, @@ -86,16 +84,8 @@ def module_name(self): return self._module_name @property - def job_state_poll_intervals(self): - return self._job_state_poll_intervals - - @property - def job_init_poll_intervals(self): - return self._job_init_poll_intervals - - @property - def job_init_poll_max_tries(self): - return self._job_init_poll_max_tries + def job_poll_intervals(self): + return self._job_poll_intervals @property def job_submit_timeout(self): diff --git a/reframe/utility/os.py b/reframe/utility/os.py index 02b6b6a2ca..26d80a05c6 100644 --- a/reframe/utility/os.py +++ b/reframe/utility/os.py @@ -7,11 +7,14 @@ import shlex import shutil import subprocess +import tempfile -from reframe.core.exceptions import * +from reframe.core.exceptions import CommandError, ReframeError +from reframe.core.logging import getlogger def run_command(cmd, check=False, timeout=None): + getlogger().debug('executing OS command: ' + cmd) try: return subprocess.run(shlex.split(cmd), stdout=subprocess.PIPE, @@ -57,6 +60,7 @@ def run_command_async(cmd, stderr=subprocess.PIPE, bufsize=1, **popen_args): + getlogger().debug('executing OS command asynchronously: ' + cmd) return subprocess.Popen(args=shlex.split(cmd), stdout=stdout, stderr=stderr, @@ -177,3 +181,9 @@ def samefile(path1, path2): return os.path.samefile(path1, path2) return follow_link(path1) == follow_link(path2) + + +def mkstemp_path(*args, **kwargs): + fd, path = tempfile.mkstemp(*args, **kwargs) + os.close(fd) + return path diff --git a/tutorial/config/settings.py b/tutorial/config/settings.py index 33084641b5..82d7ddd84a 100644 --- a/tutorial/config/settings.py +++ b/tutorial/config/settings.py @@ -9,11 +9,9 @@ class RegressionSettings: - _version = '2.7' + _version = '2.8' _module_name = 'reframe' - _job_state_poll_intervals = [1, 2, 3] - _job_init_poll_intervals = [1] - _job_init_poll_max_tries = 30 + _job_poll_intervals = [1, 2, 3] _job_submit_timeout = 60 _checks_path = ['checks/'] _checks_path_recurse = True @@ -113,16 +111,8 @@ def module_name(self): return self._module_name @property - def job_state_poll_intervals(self): - return self._job_state_poll_intervals - - @property - def job_init_poll_intervals(self): - return self._job_init_poll_intervals - - @property - def job_init_poll_max_tries(self): - return self._job_init_poll_max_tries + def job_poll_intervals(self): + return self._job_poll_intervals @property def job_submit_timeout(self): diff --git a/unittests/fixtures.py b/unittests/fixtures.py index 4c945b0b9f..9956094035 100644 --- a/unittests/fixtures.py +++ b/unittests/fixtures.py @@ -3,13 +3,17 @@ # import os +from reframe.core.schedulers.registry import getscheduler from reframe.frontend.loader import autodetect_system, SiteConfiguration +from reframe.core.modules import (get_modules_system, + init_modules_system, NoModImpl) from reframe.settings import settings -TEST_RESOURCES = os.path.join(os.path.dirname(os.path.realpath(__file__)), - 'resources') -TEST_MODULES = os.path.join(os.path.dirname(os.path.realpath(__file__)), - 'modules') + +TEST_RESOURCES = os.path.join( + os.path.dirname(os.path.realpath(__file__)), 'resources') +TEST_MODULES = os.path.join( + os.path.dirname(os.path.realpath(__file__)), 'modules') TEST_SITE_CONFIG = { 'systems': { 'testsys': { @@ -74,7 +78,18 @@ } -def get_setup_config(): +def init_native_modules_system(): + init_modules_system(HOST.modules_system if HOST else None) + + +# Guess current system and initialize its modules system +_site_config = SiteConfiguration() +_site_config.load_from_dict(settings.site_configuration) +HOST = autodetect_system(_site_config) +init_native_modules_system() + + +def get_test_config(): """Get a regression tests setup configuration. Returns a tuple of system, partition and environment that you can pass to @@ -83,9 +98,9 @@ def get_setup_config(): site_config = SiteConfiguration() site_config.load_from_dict(TEST_SITE_CONFIG) - system = site_config.systems['testsys'] + system = site_config.systems['testsys'] partition = system.partition('gpu') - environ = partition.environment('builtin-gcc') + environ = partition.environment('builtin-gcc') return (system, partition, environ) @@ -96,35 +111,32 @@ def force_remove_file(filename): pass -def guess_system(): - site_config = SiteConfiguration() - site_config.load_from_dict(settings.site_configuration) - return autodetect_system(site_config) - - # FIXME: This may conflict in the unlikely situation that a user defines a # system named `kesch` with a partition named `pn`. -def system_with_scheduler(sched_type, skip_partitions=['kesch:pn']): - """Retrieve a partition from the current system with a specific scheduler. +def partition_with_scheduler(name, skip_partitions=['kesch:pn']): + """Retrieve a partition from the current system whose registered name is + ``name``. - If `sched_type` is `None`, the first partition with a non-local scheduler - will be returned. + If ``name`` is :class:`None`, any partition with a non-local scheduler will + be returned. + Partitions specified in ``skip_partitions`` will be skipped from searching. + """ - Partitions in `skip_partitions` will be skipped from searching. Items of - `skip_partitions` are of the form `:`.""" - system = guess_system() - if not system: + if HOST is None: return None - for p in system.partitions: - canon_name = '%s:%s' % (system.name, p) - if canon_name in skip_partitions: + for p in HOST.partitions: + if p.fullname in skip_partitions: continue - if sched_type is None and p.scheduler != 'local': + if name is None and not p.scheduler.is_local: return p - if p.scheduler == sched_type: + if p.scheduler.registered_name == name: return p return None + + +def has_sane_modules_system(): + return not isinstance(get_modules_system().backend, NoModImpl) diff --git a/unittests/test_cli.py b/unittests/test_cli.py index d8086645ee..1468d0bfbf 100644 --- a/unittests/test_cli.py +++ b/unittests/test_cli.py @@ -9,13 +9,14 @@ import reframe.utility.os as os_ext import reframe.core.logging as logging +import unittests.fixtures as fixtures from contextlib import redirect_stdout, redirect_stderr from io import StringIO from reframe.core.environments import EnvironmentSnapshot +from reframe.core.modules import init_modules_system from reframe.frontend.loader import SiteConfiguration, autodetect_system from reframe.settings import settings -from unittests.fixtures import guess_system, system_with_scheduler def run_command_inline(argv, funct, *args, **kwargs): @@ -24,9 +25,7 @@ def run_command_inline(argv, funct, *args, **kwargs): captured_stdout = StringIO() captured_stderr = StringIO() sys.argv = argv - exitcode = None - print(' '.join(argv)) with redirect_stdout(captured_stdout): with redirect_stderr(captured_stderr): try: @@ -34,9 +33,11 @@ def run_command_inline(argv, funct, *args, **kwargs): except SystemExit as e: exitcode = e.code finally: - # restore environment and command-line arguments + # restore environment, command-line arguments, and the native + # modules system environ_save.load() sys.argv = argv_save + fixtures.init_native_modules_system() return (exitcode, captured_stdout.getvalue(), @@ -115,7 +116,6 @@ def _run_reframe(self): def _stage_exists(self, check_name, partitions, environs): stagedir = os.path.join(self.prefix, 'stage') - for p in partitions: for e in environs: path = os.path.join(stagedir, p, check_name, e) @@ -147,12 +147,13 @@ def test_check_success(self): self.assertEqual(0, returncode) self.assert_log_file_is_saved() - @unittest.skipIf(not system_with_scheduler(None), + @unittest.skipIf(not fixtures.partition_with_scheduler(None), 'job submission not supported') def test_check_submit_success(self): # This test will run on the auto-detected system - system = guess_system() - partition = system_with_scheduler(None) + system = fixtures.HOST + partition = fixtures.partition_with_scheduler(None) + init_modules_system(system.modules_system) self.local = False self.system = partition.fullname diff --git a/unittests/test_core.py b/unittests/test_core.py deleted file mode 100644 index c3dd78a56d..0000000000 --- a/unittests/test_core.py +++ /dev/null @@ -1,319 +0,0 @@ -import os -import tempfile -import stat -import unittest -import reframe.core.debug as debug -import reframe.utility.os as os_ext - -from reframe.core.environments import Environment, EnvironmentSnapshot, \ - ProgEnvironment -from reframe.core.modules import * -from reframe.core.exceptions import CompilationError -from reframe.core.modules import * -from reframe.core.shell import BashScriptBuilder -from unittests.fixtures import TEST_RESOURCES, TEST_MODULES, force_remove_file - - -class TestEnvironment(unittest.TestCase): - def assertEnvironmentVariable(self, name, value): - if name not in os.environ: - self.fail('environment variable %s not set' % name) - - self.assertEqual(os.environ[name], value) - - def assertModulesLoaded(self, modules): - for m in modules: - self.assertTrue(module_present(m)) - - def assertModulesNotLoaded(self, modules): - for m in modules: - self.assertFalse(module_present(m)) - - def setUp(self): - module_path_add([TEST_MODULES]) - - # Always add a base module; this is a workaround for the modules - # environment's inconsistent behaviour, that starts with an empty - # LOADEDMODULES variable and ends up removing it completely if all - # present modules are removed. - module_load('testmod_base') - - os.environ['_fookey1'] = 'origfoo' - os.environ['_fookey1b'] = 'foovalue1' - os.environ['_fookey2b'] = 'foovalue2' - self.environ_save = EnvironmentSnapshot() - self.environ = Environment(name='TestEnv1', modules=['testmod_foo']) - self.environ.set_variable(name='_fookey1', value='value1') - self.environ.set_variable(name='_fookey2', value='value2') - self.environ.set_variable(name='_fookey1', value='value3') - self.environ.set_variable(name='_fookey3b', value='$_fookey1b') - self.environ.set_variable(name='_fookey4b', value='${_fookey2b}') - self.environ_other = Environment(name='TestEnv2', - modules=['testmod_boo']) - self.environ_other.set_variable(name='_fookey11', value='value11') - - def tearDown(self): - module_path_remove([TEST_MODULES]) - self.environ_save.load() - - def test_setup(self): - self.assertEqual(len(self.environ.modules), 1) - self.assertEqual(len(self.environ.variables.keys()), 4) - self.assertEqual(self.environ.variables['_fookey1'], 'value3') - self.assertEqual(self.environ.variables['_fookey2'], 'value2') - self.assertIn('testmod_foo', self.environ.modules) - - def test_environment_snapshot(self): - self.assertRaises(RuntimeError, - self.environ_save.add_module, 'testmod_foo') - self.assertRaises(RuntimeError, self.environ_save.set_variable, - 'foo', 'bar') - self.assertRaises(RuntimeError, self.environ_save.unload) - self.environ.load() - self.environ_other.load() - self.environ_save.load() - self.assertEqual(self.environ_save, EnvironmentSnapshot()) - - def test_load_restore(self): - self.environ.load() - self.assertEnvironmentVariable(name='_fookey1', value='value3') - self.assertEnvironmentVariable(name='_fookey2', value='value2') - self.assertEnvironmentVariable(name='_fookey3b', value='foovalue1') - self.assertEnvironmentVariable(name='_fookey4b', value='foovalue2') - self.assertModulesLoaded(self.environ.modules) - self.assertTrue(self.environ.is_loaded) - - self.environ.unload() - self.assertEqual(self.environ_save, EnvironmentSnapshot()) - self.assertFalse(module_present('testmod_foo')) - self.assertEnvironmentVariable(name='_fookey1', value='origfoo') - - def test_load_present(self): - module_load('testmod_boo') - self.environ.load() - self.environ.unload() - self.assertTrue(module_present('testmod_boo')) - - def test_equal(self): - env1 = Environment('env1', modules=['foo', 'bar']) - env2 = Environment('env1', modules=['bar', 'foo']) - self.assertEqual(env1, env2) - - def test_not_equal(self): - env1 = Environment('env1', modules=['foo', 'bar']) - env2 = Environment('env2', modules=['foo', 'bar']) - self.assertNotEqual(env1, env2) - - def test_conflicting_environments(self): - envfoo = Environment(name='envfoo', - modules=['testmod_foo', 'testmod_boo']) - envbar = Environment(name='envbar', modules=['testmod_bar']) - envfoo.load() - envbar.load() - for m in envbar.modules: - self.assertTrue(module_present(m)) - - for m in envfoo.modules: - self.assertFalse(module_present(m)) - - def test_conflict_environ_after_module_load(self): - module_load('testmod_foo') - envfoo = Environment(name='envfoo', modules=['testmod_foo']) - envfoo.load() - envfoo.unload() - self.assertTrue(module_present('testmod_foo')) - - def test_conflict_environ_after_module_force_load(self): - module_load('testmod_foo') - envbar = Environment(name='envbar', modules=['testmod_bar']) - envbar.load() - envbar.unload() - self.assertTrue(module_present('testmod_foo')) - - def test_swap(self): - from reframe.core.environments import swap_environments - - self.environ.load() - swap_environments(self.environ, self.environ_other) - self.assertFalse(self.environ.is_loaded) - self.assertTrue(self.environ_other.is_loaded) - - -class TestProgEnvironment(unittest.TestCase): - def setUp(self): - self.environ_save = EnvironmentSnapshot() - self.executable = os.path.join(TEST_RESOURCES, 'hello') - - def tearDown(self): - # Remove generated executable ingoring file-not-found errors - force_remove_file(self.executable) - self.environ_save.load() - - def assertHelloMessage(self, executable=None): - if not executable: - executable = self.executable - - self.assertTrue(os_ext.grep_command_output(cmd=executable, - pattern='Hello, World\!')) - force_remove_file(executable) - - def compile_with_env(self, env, skip_fortran=False): - srcdir = os.path.join(TEST_RESOURCES, 'src') - env.cxxflags = '-O2' - env.load() - env.compile(sourcepath=os.path.join(srcdir, 'hello.c'), - executable=self.executable) - self.assertHelloMessage() - - env.compile(sourcepath=os.path.join(srcdir, 'hello.cpp'), - executable=self.executable) - self.assertHelloMessage() - - if not skip_fortran: - env.compile(sourcepath=os.path.join(srcdir, 'hello.f90'), - executable=self.executable) - self.assertHelloMessage() - - env.unload() - - def compile_dir_with_env(self, env, skip_fortran=False): - srcdir = os.path.join(TEST_RESOURCES, 'src') - env.cxxflags = '-O3' - env.load() - - executables = ['hello_c', 'hello_cpp'] - if skip_fortran: - env.compile(srcdir, makefile='Makefile.nofort') - else: - env.compile(srcdir) - executables.append('hello_fort') - - for e in executables: - self.assertHelloMessage(os.path.join(srcdir, e)) - - env.compile(sourcepath=srcdir, options='clean') - env.unload() - - def test_compile(self): - # Compile a 'Hello, World' with the builtin gcc/g++ - env = ProgEnvironment(name='builtin-gcc', - cc='gcc', cxx='g++', ftn=None) - try: - self.compile_with_env(env, skip_fortran=True) - self.compile_dir_with_env(env, skip_fortran=True) - except CompilationError as e: - self.fail("Compilation failed\n") - - -class TestShellScriptBuilder(unittest.TestCase): - def setUp(self): - self.script_file = tempfile.NamedTemporaryFile(mode='w+', delete=False) - os.chmod(self.script_file.name, - os.stat(self.script_file.name).st_mode | stat.S_IEXEC) - - def tearDown(self): - os.remove(self.script_file.name) - - def test_bash_builder(self): - builder = BashScriptBuilder() - builder.set_variable('var1', '13') - builder.set_variable('var2', '2') - builder.set_variable('foo', '33', suppress=True) - builder.verbatim('((var3 = var1 + var2)); echo hello $var3') - self.script_file.write(builder.finalise()) - self.script_file.close() - self.assertTrue( - os_ext.grep_command_output(self.script_file.name, 'hello 15')) - self.assertFalse( - os_ext.grep_command_output(self.script_file.name, 'foo')) - - -class TestModules(unittest.TestCase): - def setUp(self): - self.environ_save = EnvironmentSnapshot() - module_path_add([TEST_MODULES]) - - def tearDown(self): - self.environ_save.load() - module_unload('testmod_foo') - module_unload('testmod_bar') - module_path_remove([TEST_MODULES]) - - def test_module_path(self): - self.assertTrue(os_ext.inpath(TEST_MODULES, os.environ['MODULEPATH'])) - - module_path_remove([TEST_MODULES]) - self.assertFalse(os_ext.inpath(TEST_MODULES, os.environ['MODULEPATH'])) - - def test_module_equal(self): - self.assertTrue(module_equal('foo', 'foo')) - self.assertTrue(module_equal('foo/1.2', 'foo/1.2')) - self.assertTrue(module_equal('foo', 'foo/1.2')) - self.assertFalse(module_equal('foo/1.2', 'foo/1.3')) - self.assertFalse(module_equal('foo', 'bar')) - self.assertFalse(module_equal('foo', 'foobar')) - - def test_module_load(self): - self.assertRaises(ModuleError, module_load, 'foo') - self.assertFalse(module_present('foo')) - - module_load('testmod_foo') - self.assertTrue(module_present('testmod_foo')) - self.assertIn('TESTMOD_FOO', os.environ) - - module_unload('testmod_foo') - self.assertFalse(module_present('testmod_foo')) - self.assertNotIn('TESTMOD_FOO', os.environ) - - def test_module_force_load(self): - module_load('testmod_foo') - - unloaded = module_force_load('testmod_foo') - self.assertEqual(len(unloaded), 0) - self.assertTrue(module_present('testmod_foo')) - - unloaded = module_force_load('testmod_bar') - self.assertTrue(module_present('testmod_bar')) - self.assertFalse(module_present('testmod_foo')) - self.assertIn('testmod_foo', unloaded) - self.assertIn('TESTMOD_BAR', os.environ) - - def test_module_purge(self): - module_load('testmod_base') - module_purge() - self.assertNotIn('LOADEDMODULES', os.environ) - - -class TestDebugRepr(unittest.TestCase): - def test_builtin_types(self): - # builtin types must use the default repr() - self.assertEqual(repr(1), debug.repr(1)) - self.assertEqual(repr(1.2), debug.repr(1.2)) - self.assertEqual(repr([1, 2, 3]), debug.repr([1, 2, 3])) - self.assertEqual(repr({1, 2, 3}), debug.repr({1, 2, 3})) - self.assertEqual(repr({1, 2, 3}), debug.repr({1, 2, 3})) - self.assertEqual(repr({'a': 1, 'b': {2, 3}}), - debug.repr({'a': 1, 'b': {2, 3}})) - - def test_obj_repr(self): - class C: - def __repr__(self): - return debug.repr(self) - - class D: - def __repr__(self): - return debug.repr(self) - - c = C() - c._a = -1 - c.a = 1 - c.b = {1, 2, 3} - c.d = D() - c.d.a = 2 - c.d.b = 3 - - rep = repr(c) - self.assertIn('unittests.test_core', rep) - self.assertIn('_a=%r' % c._a, rep) - self.assertIn('b=%r' % c.b, rep) - self.assertIn('D(...)', rep) diff --git a/unittests/test_environments.py b/unittests/test_environments.py new file mode 100644 index 0000000000..ce12410812 --- /dev/null +++ b/unittests/test_environments.py @@ -0,0 +1,214 @@ +import os +import unittest +import reframe.core.environments as renv +import reframe.utility.os as os_ext + +from reframe.core.modules import get_modules_system +from reframe.core.exceptions import CompilationError +from unittests.fixtures import (TEST_RESOURCES, TEST_MODULES, HOST, + force_remove_file, has_sane_modules_system) + + +class TestEnvironment(unittest.TestCase): + def assertEnvironmentVariable(self, name, value): + if name not in os.environ: + self.fail('environment variable %s not set' % name) + + self.assertEqual(os.environ[name], value) + + def assertModulesLoaded(self, modules): + for m in modules: + self.assertTrue(get_modules_system().is_module_loaded(m)) + + def assertModulesNotLoaded(self, modules): + for m in modules: + self.assertFalse(get_modules_system().is_module_loaded(m)) + + def setUp(self): + get_modules_system().searchpath_add(TEST_MODULES) + + # Always add a base module; this is a workaround for the modules + # environment's inconsistent behaviour, that starts with an empty + # LOADEDMODULES variable and ends up removing it completely if all + # present modules are removed. + get_modules_system().load_module('testmod_base') + + os.environ['_fookey1'] = 'origfoo' + os.environ['_fookey1b'] = 'foovalue1' + os.environ['_fookey2b'] = 'foovalue2' + self.environ_save = renv.EnvironmentSnapshot() + self.environ = renv.Environment( + name='TestEnv1', modules=['testmod_foo']) + self.environ.set_variable(name='_fookey1', value='value1') + self.environ.set_variable(name='_fookey2', value='value2') + self.environ.set_variable(name='_fookey1', value='value3') + self.environ.set_variable(name='_fookey3b', value='$_fookey1b') + self.environ.set_variable(name='_fookey4b', value='${_fookey2b}') + self.environ_other = renv.Environment(name='TestEnv2', + modules=['testmod_boo']) + self.environ_other.set_variable(name='_fookey11', value='value11') + + def tearDown(self): + get_modules_system().searchpath_remove(TEST_MODULES) + self.environ_save.load() + + def test_setup(self): + if has_sane_modules_system(): + self.assertEqual(len(self.environ.modules), 1) + self.assertIn('testmod_foo', self.environ.modules) + + self.assertEqual(len(self.environ.variables.keys()), 4) + self.assertEqual(self.environ.variables['_fookey1'], 'value3') + self.assertEqual(self.environ.variables['_fookey2'], 'value2') + + def test_environment_snapshot(self): + self.assertRaises(RuntimeError, + self.environ_save.add_module, 'testmod_foo') + self.assertRaises(RuntimeError, self.environ_save.set_variable, + 'foo', 'bar') + self.assertRaises(RuntimeError, self.environ_save.unload) + self.environ.load() + self.environ_other.load() + self.environ_save.load() + self.assertEqual(self.environ_save, renv.EnvironmentSnapshot()) + + def test_load_restore(self): + self.environ.load() + self.assertEnvironmentVariable(name='_fookey1', value='value3') + self.assertEnvironmentVariable(name='_fookey2', value='value2') + self.assertEnvironmentVariable(name='_fookey3b', value='foovalue1') + self.assertEnvironmentVariable(name='_fookey4b', value='foovalue2') + self.assertTrue(self.environ.is_loaded) + if has_sane_modules_system(): + self.assertModulesLoaded(self.environ.modules) + + self.environ.unload() + self.assertEqual(self.environ_save, renv.EnvironmentSnapshot()) + self.assertEnvironmentVariable(name='_fookey1', value='origfoo') + if has_sane_modules_system(): + self.assertFalse( + get_modules_system().is_module_loaded('testmod_foo')) + + @unittest.skipIf(not has_sane_modules_system(), + 'no modules systems supported') + def test_load_already_present(self): + get_modules_system().load_module('testmod_boo') + self.environ.load() + self.environ.unload() + self.assertTrue(get_modules_system().is_module_loaded('testmod_boo')) + + def test_equal(self): + env1 = renv.Environment('env1', modules=['foo', 'bar']) + env2 = renv.Environment('env1', modules=['bar', 'foo']) + self.assertEqual(env1, env2) + + def test_not_equal(self): + env1 = renv.Environment('env1', modules=['foo', 'bar']) + env2 = renv.Environment('env2', modules=['foo', 'bar']) + self.assertNotEqual(env1, env2) + + @unittest.skipIf(not has_sane_modules_system(), + 'no modules systems supported') + def test_conflicting_environments(self): + envfoo = renv.Environment(name='envfoo', + modules=['testmod_foo', 'testmod_boo']) + envbar = renv.Environment(name='envbar', modules=['testmod_bar']) + envfoo.load() + envbar.load() + for m in envbar.modules: + self.assertTrue(get_modules_system().is_module_loaded(m)) + + for m in envfoo.modules: + self.assertFalse(get_modules_system().is_module_loaded(m)) + + @unittest.skipIf(not has_sane_modules_system(), + 'no modules systems supported') + def test_conflict_environ_after_module_load(self): + get_modules_system().load_module('testmod_foo') + envfoo = renv.Environment(name='envfoo', modules=['testmod_foo']) + envfoo.load() + envfoo.unload() + self.assertTrue(get_modules_system().is_module_loaded('testmod_foo')) + + @unittest.skipIf(not has_sane_modules_system(), + 'no modules systems supported') + def test_conflict_environ_after_module_force_load(self): + get_modules_system().load_module('testmod_foo') + envbar = renv.Environment(name='envbar', modules=['testmod_bar']) + envbar.load() + envbar.unload() + self.assertTrue(get_modules_system().is_module_loaded('testmod_foo')) + + def test_swap(self): + from reframe.core.environments import swap_environments + + self.environ.load() + swap_environments(self.environ, self.environ_other) + self.assertFalse(self.environ.is_loaded) + self.assertTrue(self.environ_other.is_loaded) + + +class TestProgEnvironment(unittest.TestCase): + def setUp(self): + self.environ_save = renv.EnvironmentSnapshot() + self.executable = os.path.join(TEST_RESOURCES, 'hello') + + def tearDown(self): + # Remove generated executable ingoring file-not-found errors + force_remove_file(self.executable) + self.environ_save.load() + + def assertHelloMessage(self, executable=None): + if not executable: + executable = self.executable + + self.assertTrue(os_ext.grep_command_output(cmd=executable, + pattern='Hello, World\!')) + force_remove_file(executable) + + def compile_with_env(self, env, skip_fortran=False): + srcdir = os.path.join(TEST_RESOURCES, 'src') + env.cxxflags = '-O2' + env.load() + env.compile(sourcepath=os.path.join(srcdir, 'hello.c'), + executable=self.executable) + self.assertHelloMessage() + + env.compile(sourcepath=os.path.join(srcdir, 'hello.cpp'), + executable=self.executable) + self.assertHelloMessage() + + if not skip_fortran: + env.compile(sourcepath=os.path.join(srcdir, 'hello.f90'), + executable=self.executable) + self.assertHelloMessage() + + env.unload() + + def compile_dir_with_env(self, env, skip_fortran=False): + srcdir = os.path.join(TEST_RESOURCES, 'src') + env.cxxflags = '-O3' + env.load() + + executables = ['hello_c', 'hello_cpp'] + if skip_fortran: + env.compile(srcdir, makefile='Makefile.nofort') + else: + env.compile(srcdir) + executables.append('hello_fort') + + for e in executables: + self.assertHelloMessage(os.path.join(srcdir, e)) + + env.compile(sourcepath=srcdir, options='clean') + env.unload() + + def test_compile(self): + # Compile a 'Hello, World' with the builtin gcc/g++ + env = renv.ProgEnvironment(name='builtin-gcc', + cc='gcc', cxx='g++', ftn=None) + try: + self.compile_with_env(env, skip_fortran=True) + self.compile_dir_with_env(env, skip_fortran=True) + except CompilationError as e: + self.fail("Compilation failed\n") diff --git a/unittests/test_fields.py b/unittests/test_fields.py index 603ac7c780..a8d9256670 100644 --- a/unittests/test_fields.py +++ b/unittests/test_fields.py @@ -6,6 +6,15 @@ class TestFields(unittest.TestCase): + def test_not_set_attribute(self): + class FieldTester: + var = Field('var') + + c = FieldTester() + self.assertRaises(AttributeError, exec, "a = c.var", + globals(), locals()) + self.assertRaises(AttributeError, getattr, c, 'var') + def test_copy_on_write_field(self): class FieldTester: cow = CopyOnWriteField('cow') @@ -23,12 +32,13 @@ class FieldTester: var[1].append(5) self.assertEqual(tester.cow, [1, [2, 4], 3]) - def test_readonly_field(self): + def test_constant_field(self): class FieldTester: - ro = ReadOnlyField('foo') + ro = ConstantField('foo') tester = FieldTester() self.assertEqual(tester.ro, 'foo') + self.assertEqual(FieldTester.ro, 'foo') self.assertRaises(FieldError, exec, "tester.ro = 'bar'", globals(), locals()) @@ -318,6 +328,7 @@ def __init__(self, value): def test_sanity_field(self): warnings.simplefilter('ignore', ReframeDeprecationWarning) + class FieldTester: field = SanityPatternField('field') field_maybe_none = SanityPatternField('field_maybe_none', diff --git a/unittests/test_launchers.py b/unittests/test_launchers.py index 440fbbbf66..d5a2978ccc 100644 --- a/unittests/test_launchers.py +++ b/unittests/test_launchers.py @@ -1,139 +1,205 @@ -import re +import abc import unittest -from reframe.core.launchers import * -from reframe.core.schedulers import * +import reframe.core.launchers as launchers + +from reframe.core.launchers.registry import getlauncher +from reframe.core.schedulers import Job +from reframe.core.schedulers.local import LocalJob from reframe.core.shell import BashScriptBuilder -# The classes that inherit from _TestLauncher only test the launcher commands; -# nothing is actually launched (this is done in test_schedulers.py). -class _TestLauncher(unittest.TestCase): +class FakeJob(Job): + def submit(self): + pass + + def wait(self): + pass + + def cancel(self): + pass + + def finished(self): + pass + + +class _TestLauncher(abc.ABC, unittest.TestCase): + """Base class for launcher tests.""" + def setUp(self): self.builder = BashScriptBuilder() - # Pattern to match: must include only horizontal spaces [ \t] - # (\h in perl; in python \h might be introduced in future) - self.expected_launcher_patt = None - self.launcher_options = ['--foo'] - self.target_executable = 'hostname' + self.job = FakeJob(name='fake_job', + command='ls -l', + launcher=self.launcher, + num_tasks=4, + num_tasks_per_node=2, + num_tasks_per_core=1, + num_tasks_per_socket=1, + num_cpus_per_task=2, + use_smt=True, + time_limit=(0, 10, 0), + script_filename='fake_script', + stdout='fake_stdout', + stderr='fake_stderr', + sched_account='fake_account', + sched_partition='fake_partition', + sched_reservation='fake_reservation', + sched_nodelist="mynode", + sched_exclude_nodelist='fake_exclude_nodelist', + sched_exclusive_access='fake_exclude_access', + sched_options=['fake_options']) + + self.minimal_job = FakeJob(name='fake_job', + command='ls -l', + launcher=self.launcher) @property - def launcher_command(self): - return ' '.join([self.launcher.executable] + - self.launcher.fixed_options) + @abc.abstractmethod + def launcher(self): + """The launcher to be tested.""" @property - def expected_shell_script_patt(self): - return '^[ \t]*%s[ \t]+--foo[ \t]+hostname[ \t]*$' % \ - self.launcher_command + @abc.abstractmethod + def expected_command(self): + """The command expected to be emitted by the launcher.""" - def test_launcher(self): - self.assertIsNotNone(self.launcher) - self.assertIsNotNone( - # No MULTILINE mode here; a launcher must not contain new lines. - re.search(self.expected_launcher_patt, - self.launcher_command) - ) + @property + @abc.abstractmethod + def expected_minimal_command(self): + """The command expected to be emitted by the launcher.""" - def test_launcher_emit_command(self): - self.launcher.options = self.launcher_options - self.launcher.emit_run_command(self.target_executable, self.builder) - shell_script_text = self.builder.finalise() - self.assertIsNotNone(self.launcher) - self.assertIsNotNone( - re.search(self.expected_shell_script_patt, shell_script_text, - re.MULTILINE) - ) + def test_emit_command(self): + emitted_command = self.launcher.emit_run_command(self.job, + self.builder) + self.assertEqual(self.expected_command, emitted_command) + def test_emit_minimal_command(self): + emitted_command = self.launcher.emit_run_command(self.minimal_job, + self.builder) + self.assertEqual(self.expected_minimal_command, emitted_command) -class TestNativeSlurmLauncher(_TestLauncher): - def setUp(self): - super().setUp() - self.launcher = NativeSlurmLauncher(None) - self.expected_launcher_patt = '^[ \t]*srun[ \t]*$' +class TestSrunLauncher(_TestLauncher): + @property + def launcher(self): + return getlauncher('srun')(options=['--foo']) + + @property + def expected_command(self): + return 'srun --foo ls -l' + + @property + def expected_minimal_command(self): + return 'srun --foo ls -l' + + +class TestSrunallocLauncher(_TestLauncher): + + @property + def launcher(self): + return getlauncher('srunalloc')(options=['--foo']) + + @property + def expected_command(self): + return ('srun ' + '--job-name=fake_job ' + '--time=0:10:0 ' + '--output=fake_stdout ' + '--error=fake_stderr ' + '--ntasks=4 ' + '--ntasks-per-node=2 ' + '--ntasks-per-core=1 ' + '--ntasks-per-socket=1 ' + '--cpus-per-task=2 ' + '--partition=fake_partition ' + '--exclusive ' + '--hint=multithread ' + '--partition=fake_partition ' + '--account=fake_account ' + '--nodelist=mynode ' + '--exclude=fake_exclude_nodelist ' + '--foo ' + 'ls -l') + + @property + def expected_minimal_command(self): + return ('srun ' + '--job-name=fake_job ' + '--time=0:10:0 ' + '--output=fake_job.out ' + '--error=fake_job.err ' + '--ntasks=1 ' + '--foo ' + 'ls -l') class TestAlpsLauncher(_TestLauncher): - def setUp(self): - super().setUp() - self.launcher = AlpsLauncher(None) - self.expected_launcher_patt = '^[ \t]*aprun[ \t]+-B[ \t]*$' + @property + def launcher(self): + return getlauncher('alps')(options=['--foo']) + @property + def expected_command(self): + return 'aprun -B --foo ls -l' -class TestLauncherWrapperAlps(_TestLauncher): - def setUp(self): - super().setUp() - self.launcher = LauncherWrapper(AlpsLauncher(None), - 'ddt', '-o foo.out'.split()) - self.expected_launcher_patt = '^[ \t]*ddt[ \t]+-o[ \t]+foo.out' \ - '[ \t]+aprun[ \t]+-B[ \t]*$' + @property + def expected_minimal_command(self): + return 'aprun -B --foo ls -l' -class TestLauncherWrapperNativeSlurm(_TestLauncher): - def setUp(self): - super().setUp() - self.launcher = LauncherWrapper(NativeSlurmLauncher(None), - 'ddt', '-o foo.out'.split()) - self.expected_launcher_patt = '^[ \t]*ddt[ \t]+-o[ \t]+foo.out' \ - '[ \t]+srun[ \t]*$' +class TestMpirunLauncher(_TestLauncher): + @property + def launcher(self): + return getlauncher('mpirun')(options=['--foo']) + @property + def expected_command(self): + return 'mpirun -np 4 --foo ls -l' -class TestLocalLauncher(_TestLauncher): - def setUp(self): - super().setUp() - self.launcher = LocalLauncher(None) + @property + def expected_minimal_command(self): + return 'mpirun -np 1 --foo ls -l' - def test_launcher(self): - self.assertEqual('', self.launcher_command) +class TestMpiexecLauncher(_TestLauncher): @property - def expected_shell_script_patt(self): - return '^[ \t]*hostname[ \t]*$' + def launcher(self): + return getlauncher('mpiexec')(options=['--foo']) + @property + def expected_command(self): + return 'mpiexec -n 4 --foo ls -l' -class TestAbstractLauncher(_TestLauncher): - def setUp(self): - pass + @property + def expected_minimal_command(self): + return 'mpiexec -n 1 --foo ls -l' - def test_launcher(self): - # ABCs do not allow at all instantiation of abstract classes - self.assertRaises(TypeError, JobLauncher, None) - def test_launcher_emit_command(self): - pass +class TestLauncherWrapperAlps(_TestLauncher): + @property + def launcher(self): + return launchers.LauncherWrapper( + getlauncher('alps')(options=['--foo']), + 'ddt', ['--offline'] + ) + + @property + def expected_command(self): + return 'ddt --offline aprun -B --foo ls -l' + @property + def expected_minimal_command(self): + return 'ddt --offline aprun -B --foo ls -l' -class TestVisitLauncherNativeSlurm(_TestLauncher): - def setUp(self): - super().setUp() - self.job = SlurmJob(job_name='visittest', - job_environ_list=[], - job_script_builder=self.builder, - num_tasks=5, - num_tasks_per_node=2, - launcher_type=NativeSlurmLauncher) - self.launcher = VisitLauncher(self.job) - self.expected_launcher_patt = '^[ \t]*visit[ \t]+-np[ \t]+5[ \t]+' \ - '-nn[ \t]+3[ \t]+-l[ \t]+srun[ \t]*$' - self.launcher_options = ['-o data.nc'] - self.target_executable = '' +class TestLocalLauncher(_TestLauncher): @property - def expected_shell_script_patt(self): - return '^[ \t]*%s[ \t]+-o[ \t]+data.nc[ \t]*$' % self.launcher_command + def launcher(self): + return getlauncher('local')(['--foo']) + @property + def expected_command(self): + return 'ls -l' -class TestVisitLauncherLocal(_TestLauncher): - def setUp(self): - super().setUp() - self.job = LocalJob(job_name='visittest', - job_environ_list=[], - job_script_builder=self.builder) - self.launcher = VisitLauncher(self.job) - self.expected_launcher_patt = '^[ \t]*visit[ \t]*$' - self.launcher_options = ['-o data.nc'] - self.target_executable = '' - - @property - def expected_shell_script_patt(self): - return '^[ \t]*%s[ \t]+-o[ \t]+data.nc[ \t]*$' % self.launcher_command + @property + def expected_minimal_command(self): + return 'ls -l' diff --git a/unittests/test_logging.py b/unittests/test_logging.py index abce324a16..4fe6daef4f 100644 --- a/unittests/test_logging.py +++ b/unittests/test_logging.py @@ -95,22 +95,22 @@ def test_logger_levels(self): class TestLoggerConfiguration(unittest.TestCase): def setUp(self): - - self.logfile = 'reframe.log' + tmpfd, self.logfile = tempfile.mkstemp(dir='.') + os.close(tmpfd) self.logging_config = { 'level': 'INFO', 'handlers': { self.logfile: { 'level': 'WARNING', - 'format': '[%(asctime)s] %(levelname)s: %(message)s', + 'format': '[%(asctime)s] %(levelname)s: ' + '%(check_name)s: %(message)s', 'datefmt': '%F', 'append': True, } } } - self.logger = None self.check = RegressionTest( - 'random_check', '.', System('foosys'), ResourcesManager() + 'random_check', '.', System('gagsys'), ResourcesManager() ) def tearDown(self): @@ -118,7 +118,7 @@ def tearDown(self): os.remove(self.logfile) def found_in_logfile(self, string): - for handler in self.logger.handlers: + for handler in getlogger().logger.handlers: handler.flush() handler.close() @@ -128,46 +128,44 @@ def found_in_logfile(self, string): return found - def set_logger(self): - from reframe.core.logging import load_from_dict - self.logger = load_from_dict(self.logging_config) - def close_handlers(self): - for h in self.logger.handlers: + for h in getlogger().logger.handlers: h.close() def flush_handlers(self): - for h in self.logger.handlers: + for h in getlogger().logger.handlers: h.flush() def test_valid_level(self): - self.set_logger() - self.assertEqual(INFO, self.logger.getEffectiveLevel()) + configure_logging(self.logging_config) + self.assertEqual(INFO, getlogger().getEffectiveLevel()) def test_no_handlers(self): del self.logging_config['handlers'] - self.assertRaises(ConfigurationError, self.set_logger) + self.assertRaises(ConfigurationError, + configure_logging, self.logging_config) def test_empty_handlers(self): self.logging_config['handlers'] = {} - self.assertRaises(ConfigurationError, self.set_logger) + self.assertRaises(ConfigurationError, + configure_logging, self.logging_config) def test_handler_level(self): - self.set_logger() - self.logger.info('foo') - self.logger.warning('bar') + configure_logging(self.logging_config) + getlogger().info('foo') + getlogger().warning('bar') self.assertFalse(self.found_in_logfile('foo')) self.assertTrue(self.found_in_logfile('bar')) def test_handler_append(self): - self.set_logger() - self.logger.warning('foo') + configure_logging(self.logging_config) + getlogger().warning('foo') self.close_handlers() # Reload logger - self.set_logger() - self.logger.warning('bar') + configure_logging(self.logging_config) + getlogger().warning('bar') self.assertTrue(self.found_in_logfile('foo')) self.assertTrue(self.found_in_logfile('bar')) @@ -185,22 +183,21 @@ def test_handler_noappend(self): } } - self.set_logger() - self.logger.warning('foo') + configure_logging(self.logging_config) + getlogger().warning('foo') self.close_handlers() # Reload logger - self.set_logger() - self.logger.warning('bar') + configure_logging(self.logging_config) + getlogger().warning('bar') self.assertFalse(self.found_in_logfile('foo')) self.assertTrue(self.found_in_logfile('bar')) - # FIXME: this test is not robust + # FIXME: this test is not so robust def test_date_format(self): - self.set_logger() - self.logger.warning('foo') - self.flush_handlers() + configure_logging(self.logging_config) + getlogger().warning('foo') self.assertTrue(self.found_in_logfile(datetime.now().strftime('%F'))) def test_stream_handler_stdout(self): @@ -210,10 +207,10 @@ def test_stream_handler_stdout(self): '&1': {}, } } - self.set_logger() - - self.assertEqual(len(self.logger.handlers), 1) - handler = self.logger.handlers[0] + configure_logging(self.logging_config) + raw_logger = getlogger().logger + self.assertEqual(len(raw_logger.handlers), 1) + handler = raw_logger.handlers[0] self.assertTrue(isinstance(handler, StreamHandler)) self.assertEqual(handler.stream, sys.stdout) @@ -225,10 +222,11 @@ def test_stream_handler_stderr(self): '&2': {}, } } - self.set_logger() - self.assertEqual(len(self.logger.handlers), 1) - handler = self.logger.handlers[0] + configure_logging(self.logging_config) + raw_logger = getlogger().logger + self.assertEqual(len(raw_logger.handlers), 1) + handler = raw_logger.handlers[0] self.assertTrue(isinstance(handler, StreamHandler)) self.assertEqual(handler.stream, sys.stderr) @@ -241,8 +239,8 @@ def test_multiple_handlers(self): self.logfile: {}, } } - self.set_logger() - self.assertEqual(len(self.logger.handlers), 2) + configure_logging(self.logging_config) + self.assertEqual(len(getlogger().logger.handlers), 2) def test_global_noconfig(self): # This is to test the case when no configuration is set, but since the @@ -250,14 +248,39 @@ def test_global_noconfig(self): # 'no-config' state by passing `None` to `configure_logging()` configure_logging(None) - frontend_logger = getlogger('frontend') - check_logger = getlogger('check', self.check) - self.assertEqual(None, frontend_logger.logger) - self.assertEqual(None, check_logger.logger) + self.assertIs(getlogger(), null_logger) def test_global_config(self): configure_logging(self.logging_config) - frontend_logger = getlogger('frontend') - check_logger = getlogger('check', self.check) - self.assertNotEqual(None, frontend_logger.logger) - self.assertNotEqual(None, check_logger.logger) + self.assertIsNot(getlogger(), null_logger) + + def test_logging_context(self): + configure_logging(self.logging_config) + with logging_context() as logger: + self.assertIs(logger, getlogger()) + self.assertIsNot(logger, null_logger) + getlogger().error('error from context') + + self.assertTrue(self.found_in_logfile('reframe')) + self.assertTrue(self.found_in_logfile('error from context')) + + def test_logging_context_check(self): + configure_logging(self.logging_config) + with logging_context(check=self.check): + getlogger().error('error from context') + + self.assertTrue(self.found_in_logfile('random_check')) + self.assertTrue(self.found_in_logfile('error from context')) + + def test_logging_context_error(self): + configure_logging(self.logging_config) + try: + with logging_context(exc_log_level=ERROR): + raise ReframeError('error from context') + + self.fail('logging_context did not propagate the exception') + except ReframeError: + pass + + self.assertTrue(self.found_in_logfile('reframe')) + self.assertTrue(self.found_in_logfile('error from context')) diff --git a/unittests/test_modules.py b/unittests/test_modules.py new file mode 100644 index 0000000000..af741dd208 --- /dev/null +++ b/unittests/test_modules.py @@ -0,0 +1,131 @@ +import abc +import os +import unittest +import reframe.core.modules as modules + +from reframe.core.environments import EnvironmentSnapshot +from reframe.core.exceptions import ModuleError, ReframeError +from unittests.fixtures import TEST_MODULES, has_sane_modules_system + + +class _TestModulesSystem(unittest.TestCase): + def setUp(self): + self.modules_system = modules.get_modules_system() + self.environ_save = EnvironmentSnapshot() + self.modules_system.searchpath_add(TEST_MODULES) + + def tearDown(self): + self.environ_save.load() + + def test_searchpath(self): + self.assertIn(TEST_MODULES, self.modules_system.searchpath) + + self.modules_system.searchpath_remove(TEST_MODULES) + self.assertNotIn(TEST_MODULES, self.modules_system.searchpath) + + def test_module_load(self): + self.assertRaises(ModuleError, self.modules_system.load_module, 'foo') + self.assertFalse(self.modules_system.is_module_loaded('foo')) + self.assertNotIn('foo', self.modules_system.loaded_modules()) + + self.modules_system.load_module('testmod_foo') + self.assertTrue(self.modules_system.is_module_loaded('testmod_foo')) + self.assertIn('testmod_foo', self.modules_system.loaded_modules()) + self.assertIn('TESTMOD_FOO', os.environ) + + self.modules_system.unload_module('testmod_foo') + self.assertFalse(self.modules_system.is_module_loaded('testmod_foo')) + self.assertNotIn('testmod_foo', self.modules_system.loaded_modules()) + self.assertNotIn('TESTMOD_FOO', os.environ) + + def test_module_load_force(self): + self.modules_system.load_module('testmod_foo') + + unloaded = self.modules_system.load_module('testmod_foo', force=True) + self.assertEqual(0, len(unloaded)) + self.assertTrue(self.modules_system.is_module_loaded('testmod_foo')) + + unloaded = self.modules_system.load_module('testmod_bar', True) + self.assertTrue(self.modules_system.is_module_loaded('testmod_bar')) + self.assertFalse(self.modules_system.is_module_loaded('testmod_foo')) + self.assertIn('testmod_foo', unloaded) + self.assertIn('TESTMOD_BAR', os.environ) + + def test_module_unload_all(self): + self.modules_system.load_module('testmod_base') + self.modules_system.unload_all() + self.assertEqual(0, len(self.modules_system.loaded_modules())) + + def test_module_list(self): + self.modules_system.load_module('testmod_foo') + self.assertIn('testmod_foo', self.modules_system.loaded_modules()) + self.modules_system.unload_module('testmod_foo') + + def test_module_conflict_list(self): + conflict_list = self.modules_system.conflicted_modules('testmod_bar') + self.assertIn('testmod_foo', conflict_list) + self.assertIn('testmod_boo', conflict_list) + + +class TestTModModulesSystem(_TestModulesSystem): + def setUp(self): + try: + modules.init_modules_system('tmod') + except ReframeError: + self.skipTest('tmod not supported') + else: + super().setUp() + + +class TestNoModModulesSystem(_TestModulesSystem): + def setUp(self): + try: + modules.init_modules_system() + except ReframeError: + self.skipTest('nomod not supported') + else: + super().setUp() + + # Simply test that no exceptions are thrown + def test_searchpath(self): + self.modules_system.searchpath_remove(TEST_MODULES) + + def test_module_load(self): + self.modules_system.load_module('foo') + self.modules_system.unload_module('foo') + + def test_module_load_force(self): + self.modules_system.load_module('foo', force=True) + + def test_module_unload_all(self): + self.modules_system.unload_all() + + def test_module_list(self): + self.assertEqual(0, len(self.modules_system.loaded_modules())) + + def test_module_conflict_list(self): + self.assertEqual(0, len(self.modules_system.conflicted_modules('foo'))) + + +class TestModule(unittest.TestCase): + def setUp(self): + self.module = modules.Module('foo/1.2') + + def test_name_version(self): + self.assertEqual(self.module.name, 'foo') + self.assertEqual(self.module.version, '1.2') + + def test_equal(self): + self.assertEqual(modules.Module('foo'), modules.Module('foo')) + self.assertEqual(modules.Module('foo/1.2'), modules.Module('foo/1.2')) + self.assertEqual(modules.Module('foo'), modules.Module('foo/1.2')) + self.assertEqual(hash(modules.Module('foo')), + hash(modules.Module('foo'))) + self.assertEqual(hash(modules.Module('foo/1.2')), + hash(modules.Module('foo/1.2'))) + self.assertEqual(hash(modules.Module('foo')), + hash(modules.Module('foo/1.2'))) + self.assertNotEqual(modules.Module('foo/1.2'), + modules.Module('foo/1.3')) + self.assertNotEqual(modules.Module('foo'), modules.Module('bar')) + self.assertNotEqual(modules.Module('foo'), modules.Module('foobar')) diff --git a/unittests/test_parsers.py b/unittests/test_parsers.py index 61bd8694a5..2cf279a7a5 100644 --- a/unittests/test_parsers.py +++ b/unittests/test_parsers.py @@ -12,7 +12,7 @@ from reframe.frontend.resources import ResourcesManager from reframe.utility.functions import standard_threshold from reframe.utility.parsers import * -from unittests.fixtures import get_setup_config +from unittests.fixtures import get_test_config class StatefulParserTest(unittest.TestCase): @@ -20,7 +20,7 @@ def setUp(self): # Ignore deprecation warnings warnings.simplefilter('ignore', ReframeDeprecationWarning) - self.system, self.partition, self.environ = get_setup_config() + self.system, self.partition, self.environ = get_test_config() self.resourcesdir = tempfile.mkdtemp(dir='unittests') self.resources = ResourcesManager(prefix=self.resourcesdir) self.test = RegressionTest('test_performance', diff --git a/unittests/test_pipeline.py b/unittests/test_pipeline.py index a45d6a80e0..7e97bd4bf8 100644 --- a/unittests/test_pipeline.py +++ b/unittests/test_pipeline.py @@ -7,28 +7,26 @@ import reframe.settings as settings import reframe.utility.sanity as sn +import unittests.fixtures as fixtures from reframe.core.exceptions import (ReframeDeprecationWarning, ReframeError, CompilationError) from reframe.core.pipeline import * -from reframe.core.modules import * +from reframe.core.modules import get_modules_system from reframe.frontend.loader import * from reframe.frontend.resources import ResourcesManager from reframe.utility.functions import standard_threshold -from unittests.fixtures import TEST_MODULES, get_setup_config -from unittests.fixtures import system_with_scheduler - -class TestRegression(unittest.TestCase): +class TestRegressionTest(unittest.TestCase): def setUp(self): # Ignore deprecation warnings warnings.simplefilter('ignore', ReframeDeprecationWarning) - module_path_add([TEST_MODULES]) + get_modules_system().searchpath_add(fixtures.TEST_MODULES) # Load a system configuration - self.system, self.partition, self.progenv = get_setup_config() + self.system, self.partition, self.progenv = fixtures.get_test_config() self.resourcesdir = tempfile.mkdtemp(dir='unittests') self.loader = RegressionCheckLoader(['unittests/resources']) self.resources = ResourcesManager(prefix=self.resourcesdir) @@ -38,7 +36,7 @@ def tearDown(self): warnings.simplefilter('default', ReframeDeprecationWarning) def setup_from_site(self): - self.partition = system_with_scheduler(None) + self.partition = fixtures.partition_with_scheduler(None) # pick the first environment of partition if self.partition.environs: @@ -76,7 +74,7 @@ def test_environ_setup(self): test.setup(self.partition, self.progenv) for m in test.modules: - self.assertTrue(module_present(m)) + self.assertTrue(get_modules_system().is_module_loaded(m)) for k, v in test.variables.items(): self.assertEqual(os.environ[k], v) @@ -96,7 +94,7 @@ def _run_test(self, test, compile_only=False, performance_result=True): for f in self.keep_files_list(test, compile_only): self.assertTrue(os.path.exists(f)) - @unittest.skipIf(not system_with_scheduler(None), + @unittest.skipIf(not fixtures.partition_with_scheduler(None), 'job submission not supported') def test_hellocheck(self): self.setup_from_site() @@ -109,7 +107,7 @@ def test_hellocheck(self): test.valid_prog_environs = [self.progenv.name] self._run_test(test) - @unittest.skipIf(not system_with_scheduler(None), + @unittest.skipIf(not fixtures.partition_with_scheduler(None), 'job submission not supported') def test_hellocheck_new_sanity(self): self.setup_from_site() @@ -127,7 +125,7 @@ def test_hellocheck_new_sanity(self): test.valid_prog_environs = [self.progenv.name] self._run_test(test) - @unittest.skipIf(not system_with_scheduler(None), + @unittest.skipIf(not fixtures.partition_with_scheduler(None), 'job submission not supported') def test_hellocheck_make(self): self.setup_from_site() @@ -315,7 +313,7 @@ def setUp(self): warnings.simplefilter('ignore', ReframeDeprecationWarning) # Load test site configuration - self.system, self.partition, self.progenv = get_setup_config() + self.system, self.partition, self.progenv = fixtures.get_test_config() # Set up RegressionTest instance self.resourcesdir = tempfile.mkdtemp(dir='unittests') @@ -870,7 +868,7 @@ def setUp(self): warnings.simplefilter('ignore', ReframeDeprecationWarning) # Load test site configuration - self.system, self.partition, self.progenv = get_setup_config() + self.system, self.partition, self.progenv = fixtures.get_test_config() # Set up RegressionTest instance self.resourcesdir = tempfile.mkdtemp(dir='unittests') diff --git a/unittests/test_sanity_functions.py b/unittests/test_sanity_functions.py index 978b6aba53..4bb6bdf80c 100644 --- a/unittests/test_sanity_functions.py +++ b/unittests/test_sanity_functions.py @@ -560,7 +560,7 @@ def test_safe_format(self): self.assertEqual(s, _format(s, 'bacon')) self.assertEqual('There is egg and spam.', _format(s, 'egg', 'spam')) self.assertEqual('There is egg and bacon.', - _format(s, 'egg', 'bacon', 'spam')) + _format(s, 'egg', 'bacon', 'spam')) s = 'no placeholders' self.assertEqual(s, _format(s)) diff --git a/unittests/test_schedulers.py b/unittests/test_schedulers.py index 18ef01d2cf..42c610c7cc 100644 --- a/unittests/test_schedulers.py +++ b/unittests/test_schedulers.py @@ -1,227 +1,140 @@ +import abc import os import re +import shutil +import tempfile +import time import unittest +import reframe.utility.os as os_ext from datetime import datetime -from tempfile import NamedTemporaryFile - from reframe.core.environments import Environment -from reframe.core.launchers import * -from reframe.core.modules import module_path_add -from reframe.core.schedulers import * +from reframe.core.exceptions import ReframeError +from reframe.core.launchers.local import LocalLauncher +from reframe.core.schedulers.registry import getscheduler from reframe.core.shell import BashScriptBuilder -from reframe.frontend.loader import autodetect_system, SiteConfiguration from reframe.settings import settings -from unittests.fixtures import ( - force_remove_file, system_with_scheduler, TEST_MODULES, TEST_RESOURCES -) +from unittests.fixtures import TEST_RESOURCES, partition_with_scheduler class _TestJob(unittest.TestCase): def setUp(self): - module_path_add([TEST_MODULES]) - self.site_config = SiteConfiguration() - self.site_config.load_from_dict(settings.site_configuration) - - self.stdout_f = NamedTemporaryFile( - dir='.', suffix='.out', delete=False) - self.stderr_f = NamedTemporaryFile( - dir='.', suffix='.err', delete=False) - self.script_f = NamedTemporaryFile(dir='.', suffix='.sh', delete=False) - - # Close all files and let whoever interested to open them. Otherwise a - # local job may fail with a 'Text file busy' error - self.stdout_f.close() - self.stderr_f.close() - self.script_f.close() - - def tearDown(self): - force_remove_file(self.stdout_f.name) - force_remove_file(self.stderr_f.name) - force_remove_file(self.script_f.name) - - def assertProcessDied(self, pid): - try: - os.kill(pid, 0) - self.fail('process %s is still alive' % pid) - except (ProcessLookupError, PermissionError): - pass - - -class TestSlurmJob(_TestJob): - def setUp(self): - super().setUp() - - self.num_tasks = 4 - self.num_tasks_per_node = 2 - self.testjob = SlurmJob( - job_name='testjob', - job_environ_list=[ - Environment(name='foo', modules=['testmod_foo']) - ], - job_script_builder=BashScriptBuilder(login=True), - script_filename=self.script_f.name, - num_tasks=self.num_tasks, - num_tasks_per_node=self.num_tasks_per_node, - stdout=self.stdout_f.name, - stderr=self.stderr_f.name, - launcher_type=NativeSlurmLauncher + self.workdir = tempfile.mkdtemp(dir='unittests') + self.testjob = self.job_type( + name='testjob', + command='hostname', + launcher=self.launcher, + environs=[Environment(name='foo', modules=['testmod_foo'])], + workdir=self.workdir, + script_filename=os_ext.mkstemp_path( + dir=self.workdir, suffix='.sh'), + stdout=os_ext.mkstemp_path(dir=self.workdir, suffix='.out'), + stderr=os_ext.mkstemp_path(dir=self.workdir, suffix='.err'), ) - self.testjob.pre_run = ['echo prerun', 'echo prerun'] + self.builder = BashScriptBuilder() + self.testjob.pre_run = ['echo prerun'] self.testjob.post_run = ['echo postrun'] - def setup_job(self, scheduler): - partition = system_with_scheduler(scheduler) - self.testjob.options += partition.access - - def _test_submission(self, ignore_lines=None): - self.testjob.submit('hostname') + def tearDown(self): + shutil.rmtree(self.workdir) + + @property + @abc.abstractmethod + def job_type(self): + """Return a concrete job class.""" + + @property + @abc.abstractmethod + def launcher(self): + """Return a launcher to use for this test.""" + + @abc.abstractmethod + def assertScriptSanity(self, script_file): + """Assert the sanity of the produced script file.""" + with open(self.testjob.script_filename) as fp: + matches = re.findall(r'echo prerun|echo postrun|hostname', + fp.read()) + self.assertEqual(['echo prerun', 'hostname', 'echo postrun'], + matches) + + def test_prepare(self): + self.testjob.prepare(self.builder) + self.assertScriptSanity(self.testjob.script_filename) + + def test_submit(self): + self.testjob.prepare(self.builder) + self.testjob.submit() + self.assertIsNotNone(self.testjob.jobid) self.testjob.wait() - self.assertEqual(self.testjob.state, SLURM_JOB_COMPLETED) - self.assertEqual(self.testjob.exitcode, 0) - - # Check if job has run on the correct number of nodes - nodes = set() - num_tasks = 0 - num_prerun = 0 - num_postrun = 0 - before_run = True - with open(self.testjob.stdout) as f: - for line in f: - if ignore_lines and re.search(ignore_lines, line): - continue - - if before_run and re.search('^prerun', line): - num_prerun += 1 - elif not before_run and re.search('^postrun', line): - num_postrun += 1 - else: - # The rest of the lines must be from the job - nodes.add(line) - num_tasks += 1 - before_run = False - - self.assertEqual(2, num_prerun) - self.assertEqual(1, num_postrun) - self.assertEqual(num_tasks, self.num_tasks) - self.assertEqual(len(nodes), self.num_tasks / self.num_tasks_per_node) - - def _test_state_poll(self): - t_sleep = datetime.now() - self.testjob.submit('sleep 3') + self.assertEqual(0, self.testjob.exitcode) + + def test_submit_timelimit(self, check_elapsed_time=True): + self.testjob._command = 'sleep 10' + self.testjob._time_limit = (0, 0, 2) + self.testjob.prepare(self.builder) + t_job = datetime.now() + self.testjob.submit() + self.assertIsNotNone(self.testjob.jobid) self.testjob.wait() - t_sleep = datetime.now() - t_sleep + t_job = datetime.now() - t_job + if check_elapsed_time: + self.assertGreaterEqual(t_job.total_seconds(), 2) + self.assertLess(t_job.total_seconds(), 3) - self.assertEqual(self.testjob.state, SLURM_JOB_COMPLETED) - self.assertEqual(self.testjob.exitcode, 0) - self.assertGreaterEqual(t_sleep.total_seconds(), 3) + with open(self.testjob.stdout) as fp: + self.assertIsNone(re.search('postrun', fp.read())) - @unittest.skipIf(not system_with_scheduler(None), - 'job submission not supported') def test_cancel(self): - self.setup_job(None) - self.testjob.submit('sleep 5') + self.testjob._command = 'sleep 3' + self.testjob.prepare(self.builder) + t_job = datetime.now() + self.testjob.submit() self.testjob.cancel() - - # Cancel waits for job to finish + t_job = datetime.now() - t_job self.assertTrue(self.testjob.finished()) - self.assertEqual(self.testjob.state, SLURM_JOB_CANCELLED) + self.assertLess(t_job.total_seconds(), 3) def test_cancel_before_submit(self): - self.testjob.cancel() - - @unittest.skipIf(not system_with_scheduler('nativeslurm'), - 'native SLURM not supported') - def test_submit_slurm(self): - self.setup_job('nativeslurm') - self._test_submission() - - @unittest.skipIf(not system_with_scheduler('nativeslurm'), - 'native SLURM not supported') - def test_state_poll_slurm(self): - self.setup_job('nativeslurm') - self._test_state_poll() - - @unittest.skipIf(not system_with_scheduler('slurm+alps'), - 'SLURM+ALPS not supported') - def test_submit_alps(self): - from reframe.launchers import AlpsLauncher - - self.setup_job('slurm+alps') - self.testjob.launcher = AlpsLauncher(self.testjob) - self._test_submission(ignore_lines='^Application (\d+) resources\:') - - @unittest.skipIf(not system_with_scheduler('slurm+alps'), - 'SLURM+ALPS not supported') - def test_state_poll_alps(self): - from reframe.launchers import AlpsLauncher - - self.setup_job('slurm+alps') - self.testjob.launcher = AlpsLauncher(self.testjob) - self._test_state_poll() - - -class TestLocalJob(_TestJob): - def setUp(self): - super().setUp() - self.testjob = LocalJob(job_name='localjob', - job_environ_list=[], - job_script_builder=BashScriptBuilder(), - stdout=self.stdout_f.name, - stderr=self.stderr_f.name, - script_filename=self.script_f.name) - - def test_submission(self): - self.testjob.submit('sleep 1 && echo hello') - t_wait = datetime.now() + self.testjob._command = 'sleep 3' + self.testjob.prepare(self.builder) + self.assertRaises(ReframeError, self.testjob.cancel) + + def test_wait_before_submit(self): + self.testjob._command = 'sleep 3' + self.testjob.prepare(self.builder) + self.assertRaises(ReframeError, self.testjob.wait) + + def test_poll(self): + self.testjob._command = 'sleep 1' + self.testjob.prepare(self.builder) + self.testjob.submit() + self.assertFalse(self.testjob.finished()) self.testjob.wait() - t_wait = datetime.now() - t_wait - self.assertGreaterEqual(t_wait.total_seconds(), 1) - self.assertEqual(self.testjob.state, LOCAL_JOB_SUCCESS) - self.assertEqual(self.testjob.exitcode, 0) - with open(self.testjob.stdout) as f: - self.assertEqual(f.read(), 'hello\n') - # Double wait; job state must not change - self.testjob.wait() - self.assertEqual(self.testjob.state, LOCAL_JOB_SUCCESS) +class TestLocalJob(_TestJob): + def assertProcessDied(self, pid): + try: + os.kill(pid, 0) + self.fail('process %s is still alive' % pid) + except (ProcessLookupError, PermissionError): + pass - def test_submission_timelimit(self): - self.testjob._time_limit = (0, 0, 2) + @property + def job_type(self): + return getscheduler('local') - t_job = datetime.now() - self.testjob.submit('echo before && sleep 10 && echo after') - self.testjob.wait() - t_job = datetime.now() - t_job + @property + def launcher(self): + return LocalLauncher() - self.assertEqual(self.testjob.state, LOCAL_JOB_TIMEOUT) - self.assertNotEqual(self.testjob.exitcode, 0) - with open(self.testjob.stdout) as f: - self.assertEqual(f.read(), 'before\n') + def test_submit_timelimit(self): + from reframe.core.schedulers.local import LOCAL_JOB_TIMEOUT - self.assertGreaterEqual(t_job.total_seconds(), 2) - self.assertLess(t_job.total_seconds(), 10) - - # Double wait; job state must not change - self.testjob.wait() + super().test_submit_timelimit() self.assertEqual(self.testjob.state, LOCAL_JOB_TIMEOUT) - def test_cancel(self): - t_job = datetime.now() - self.testjob.submit('sleep 5') - self.testjob.cancel() - t_job = datetime.now() - t_job - - # Cancel waits for the job to finish - self.assertTrue(self.testjob.finished()) - self.assertLess(t_job.total_seconds(), 5) - self.assertEqual(self.testjob.state, LOCAL_JOB_FAILURE) - - def test_cancel_before_submit(self): - self.testjob.cancel() - def test_cancel_with_grace(self): # This test emulates a spawned process that ignores the SIGTERM signal # and also spawns another process: @@ -234,12 +147,16 @@ def test_cancel_with_grace(self): # killed immediately after the grace period of 2 seconds expires. # # We also check that the additional spawned process is also killed. + from reframe.core.schedulers.local import LOCAL_JOB_TIMEOUT + self.testjob._command = 'sleep 5 &' self.testjob._time_limit = (0, 1, 0) self.testjob.cancel_grace_period = 2 self.testjob.pre_run = ['trap -- "" TERM'] self.testjob.post_run = ['echo $!', 'wait'] - self.testjob.submit('sleep 5 &') + + self.testjob.prepare(self.builder) + self.testjob.submit() # Stall a bit here to let the the spawned process start and install its # signal handler for SIGTERM @@ -273,10 +190,15 @@ def test_cancel_term_ignore(self): # spawned sleep will ignore it. We need to make sure that our # implementation grants the sleep process a grace period and then # kills it. + from reframe.core.schedulers.local import LOCAL_JOB_TIMEOUT - prog = os.path.join(TEST_RESOURCES, 'src', 'sleep_deeply.sh') + self.testjob.pre_run = [] + self.testjob.port_run = [] + self.testjob._command = os.path.join(TEST_RESOURCES, + 'src', 'sleep_deeply.sh') self.testjob.cancel_grace_period = 2 - self.testjob.submit(prog) + self.testjob.prepare(self.builder) + self.testjob.submit() # Stall a bit here to let the the spawned process start and install its # signal handler for SIGTERM @@ -296,3 +218,87 @@ def test_cancel_term_ignore(self): # Verify that the spawned sleep is killed, too self.assertProcessDied(sleep_pid) + + +class TestSlurmJob(_TestJob): + import reframe.core.schedulers.slurm as slurm + + @property + def job_type(self): + return getscheduler('slurm') + + @property + def launcher(self): + return LocalLauncher() + + def setup_from_sysconfig(self): + partition = partition_with_scheduler('slurm') + self.testjob.options += partition.access + + def test_prepare(self): + # Mock up a job submission + self.testjob._time_limit = (0, 5, 0) + self.testjob._num_tasks = 16 + self.testjob._num_tasks_per_node = 2 + self.testjob._num_tasks_per_core = 1 + self.testjob._num_tasks_per_socket = 1 + self.testjob._num_cpus_per_task = 18 + self.testjob._use_smt = True + self.testjob._sched_nodelist = 'nid000[00-17]' + self.testjob._sched_exclude_nodelist = 'nid00016' + self.testjob._sched_partition = 'foo' + self.testjob._sched_reservation = 'bar' + self.testjob._sched_account = 'spam' + self.testjob._sched_exclusive_access = True + super().test_prepare() + expected_directives = set([ + '#SBATCH --job-name="testjob"', + '#SBATCH --time=0:5:0', + '#SBATCH --output=%s' % self.testjob.stdout, + '#SBATCH --error=%s' % self.testjob.stderr, + '#SBATCH --ntasks=%s' % self.testjob.num_tasks, + '#SBATCH --ntasks-per-node=%s' % self.testjob.num_tasks_per_node, + '#SBATCH --ntasks-per-core=%s' % self.testjob.num_tasks_per_core, + ('#SBATCH --ntasks-per-socket=%s' % + self.testjob.num_tasks_per_socket), + '#SBATCH --cpus-per-task=%s' % self.testjob.num_cpus_per_task, + '#SBATCH --hint=multithread', + '#SBATCH --nodelist=%s' % self.testjob.sched_nodelist, + '#SBATCH --exclude=%s' % self.testjob.sched_exclude_nodelist, + '#SBATCH --partition=%s' % self.testjob.sched_partition, + '#SBATCH --reservation=%s' % self.testjob.sched_reservation, + '#SBATCH --account=%s' % self.testjob.sched_account, + '#SBATCH --exclusive' + ]) + with open(self.testjob.script_filename) as fp: + found_directives = set(re.findall(r'^\#SBATCH .*', fp.read(), + re.MULTILINE)) + + self.assertEqual(expected_directives, found_directives) + + @unittest.skipIf(not partition_with_scheduler('slurm'), + 'Slurm scheduler not supported') + def test_submit(self): + self.setup_from_sysconfig() + super().test_submit() + + @unittest.skipIf(not partition_with_scheduler('slurm'), + 'Slurm scheduler not supported') + def test_submit_timelimit(self): + # Skip this test for Slurm, since we the minimum time limit is 1min + self.skipTest("Slurm's minimum time limit is 60s") + + @unittest.skipIf(not partition_with_scheduler('slurm'), + 'Slurm scheduler not supported') + def test_cancel(self): + from reframe.core.schedulers.slurm import SLURM_JOB_CANCELLED + + self.setup_from_sysconfig() + super().test_cancel() + self.assertEqual(self.testjob.state, SLURM_JOB_CANCELLED) + + @unittest.skipIf(not partition_with_scheduler('slurm'), + 'Slurm scheduler not supported') + def test_poll(self): + self.setup_from_sysconfig() + super().test_poll() diff --git a/unittests/test_utility.py b/unittests/test_utility.py index ed3b1e2bf6..f9277c7d14 100644 --- a/unittests/test_utility.py +++ b/unittests/test_utility.py @@ -4,6 +4,7 @@ import tempfile import unittest +import reframe.core.debug as debug import reframe.utility.os as os_ext from reframe.core.environments import EnvironmentSnapshot @@ -239,3 +240,38 @@ def test_always_true(self): self.assertTrue(always_true(0, None)) self.assertTrue(always_true(230, 321.)) self.assertTrue(always_true('foo', 232, foo=12, bar='h')) + + +class TestDebugRepr(unittest.TestCase): + def test_builtin_types(self): + # builtin types must use the default repr() + self.assertEqual(repr(1), debug.repr(1)) + self.assertEqual(repr(1.2), debug.repr(1.2)) + self.assertEqual(repr([1, 2, 3]), debug.repr([1, 2, 3])) + self.assertEqual(repr({1, 2, 3}), debug.repr({1, 2, 3})) + self.assertEqual(repr({1, 2, 3}), debug.repr({1, 2, 3})) + self.assertEqual(repr({'a': 1, 'b': {2, 3}}), + debug.repr({'a': 1, 'b': {2, 3}})) + + def test_obj_repr(self): + class C: + def __repr__(self): + return debug.repr(self) + + class D: + def __repr__(self): + return debug.repr(self) + + c = C() + c._a = -1 + c.a = 1 + c.b = {1, 2, 3} + c.d = D() + c.d.a = 2 + c.d.b = 3 + + rep = repr(c) + self.assertIn('unittests.test_utility', rep) + self.assertIn('_a=%r' % c._a, rep) + self.assertIn('b=%r' % c.b, rep) + self.assertIn('D(...)', rep)