diff --git a/setup.py b/setup.py index b42639eea..30d3d1878 100644 --- a/setup.py +++ b/setup.py @@ -198,6 +198,7 @@ def run(self): 'types-pkg_resources', 'types-pyyaml', 'typing-extensions>=3.10', + 'urllib3<2.0', 'vcrpy>=4.1.1', 'yapf==0.31.0', ], diff --git a/superbench/runner/ansible.py b/superbench/runner/ansible.py index c012edc5c..fc71b7bd6 100644 --- a/superbench/runner/ansible.py +++ b/superbench/runner/ansible.py @@ -59,11 +59,12 @@ def __init__(self, config): self._config['cmdline'] += ' --ask-pass --ask-become-pass' logger.info(self._config) - def run(self, ansible_config, sudo=False): # pragma: no cover + def run(self, ansible_config, cancel_callback=None, sudo=False): # pragma: no cover """Run Ansible runner. Args: ansible_config (dict): Ansible config dict. + cancel_callback (Callable): Ansible runner cancel callback. sudo (bool): Run as sudo or not. Defaults to False. Returns: @@ -73,7 +74,7 @@ def run(self, ansible_config, sudo=False): # pragma: no cover logger.info('Run as sudo ...') ansible_config['cmdline'] += ' --become' with tempfile.TemporaryDirectory(prefix='ansible') as tmpdir: - r = ansible_runner.run(private_data_dir=tmpdir, **ansible_config) + r = ansible_runner.run(private_data_dir=tmpdir, cancel_callback=cancel_callback, **ansible_config) logger.debug(r.stats) if r.rc == 0: logger.info('Run succeed, return code {}.'.format(r.rc)) diff --git a/superbench/runner/runner.py b/superbench/runner/runner.py index 28b5c7186..d91020bfb 100644 --- a/superbench/runner/runner.py +++ b/superbench/runner/runner.py @@ -4,8 +4,10 @@ """SuperBench Runner.""" import os +import sys import json import random +import signal from pathlib import Path from pprint import pformat from collections import defaultdict @@ -233,6 +235,18 @@ def fetch_results(self): # pragma: no cover ) ) + def __signal_handler(self, signum, frame): + """Signal handler for runner. + + Args: + signum (int): Signal number. + frame (FrameType): Timeout frame. + """ + if signum == signal.SIGINT or signum == signal.SIGTERM: + logger.info('Killed by %s, exiting ...', signal.Signals(signum).name) + self.cleanup() + sys.exit(128 + signum) + def __create_results_summary(self): # pragma: no cover """Create the result summary file of all nodes.""" all_results = list() @@ -438,12 +452,17 @@ def _run_proc(self, benchmark_name, mode, vars): # we do not expect timeout in ansible unless subprocess hangs ansible_runner_config['timeout'] = timeout + 60 - rc = self._ansible_client.run(ansible_runner_config, sudo=(not self._docker_config.skip)) + # overwrite ansible runner's default signal handler with main process's + rc = self._ansible_client.run( + ansible_runner_config, cancel_callback=lambda: None, sudo=(not self._docker_config.skip) + ) return rc def run(self): """Run the SuperBench benchmarks distributedly.""" self.check_env() + signal.signal(signal.SIGINT, self.__signal_handler) + signal.signal(signal.SIGTERM, self.__signal_handler) for benchmark_name in self._sb_benchmarks: if benchmark_name not in self._sb_enabled_benchmarks: continue