microsoft · yukirora · Jun 29, 2023 · May 23, 2023 · May 23, 2023 · May 23, 2023
@@ -165,6 +165,26 @@ Execute GPT2 model benchmark in default configuration:
 sb exec --config-override superbench.enable="['gpt2_models']"
 ```
 
+### `sb node info`
+Get system info on the local node.
+
+```bash title="SB CLI"
+sb node info [--output-dir]
+```
+
+#### Optional arguments
+
+| Name           | Default | Description                                                                 |
+|----------------|---------|-----------------------------------------------------------------------------|
+| `--output-dir` | `None`  | Path to output directory, outputs/{datetime} will be used if not specified. |
+
+#### Examples
+
+Get system info on the local node and save it into the `outputs` dir:
+```bash title="SB CLI"
+sb node info --output-dir outputs
+```
+
 ### `sb result diagnosis`
 
 Filter the defective machines automatically from benchmarking results according to rules defined in rule file.
@@ -335,6 +355,54 @@ sb run --no-docker --host-list localhost --config-override \
   superbench.enable=kernel-launch superbench.env.SB_MICRO_PATH=/path/to/superbenchmark
 ```
 
+### `sb run-info`
+
+```bash title="SB CLI"
+sb run-info [--docker-image]
+          [--docker-password]
+          [--docker-username]
+          [--host-file]
+          [--host-list]
+          [--host-password]
+          [--host-username]
+          [--no-image-pull]
+          [--output-dir]
+          [--private-key]
+```
+
+#### Optional arguments
+
+| Name                  | Default                 | Description                                                                       |
+|-----------------------|-------------------------|-----------------------------------------------------------------------------------|
+| `--docker-image` `-i` | `superbench/superbench` | Docker image URI, [here](./user-tutorial/container-images.mdx) listed all images. |
+| `--docker-password`   | `None`                  | Docker registry password if authentication is needed.                             |
+| `--docker-username`   | `None`                  | Docker registry username if authentication is needed.                             |
+| `--host-file` `-f`    | `None`                  | Path to Ansible inventory host file.                                              |
+| `--host-list` `-l`    | `None`                  | Comma separated host list.                                                        |
+| `--host-password`     | `None`                  | Host password or key passphase if needed.                                         |
+| `--host-username`     | `None`                  | Host username if needed.                                                          |
+| `--no-image-pull`     | `False`                 | Skip pull and use local Docker image.                                             |
+| `--output-dir`        | `None`                  | Path to output directory, outputs/{datetime} will be used if not specified.       |
+| `--private-key`       | `None`                  | Path to private key if needed.                                                    |
+
+#### Global arguments
+
+| Name          | Default | Description        |
+|---------------|---------|--------------------|
+| `--help` `-h` | N/A     | Show help message. |
+
+#### Examples
+
+Collect system info on local GPU node:
+```bash title="SB CLI"
+sb run-info --host-list localhost
+```
+
+Collect system info on all nodes in `./host.ini`:
+```bash title="SB CLI"
+sb run-info --host-file ./host.ini
+```
+
 ### `sb version`
 
 Print the current SuperBench CLI version.

@@ -4,6 +4,8 @@ id: system-config
 
 # System Config Info
 
+This tool is to collect the system information automatically on the tested GPU nodes including the following hardware categories:
+
 - [System](#system)
 - [Memory](#memory)
 - [CPU](#cpu)
@@ -12,7 +14,21 @@ id: system-config
 - [Accelerator](#accelerator)
 - [PCIe](#pcie)
 
-## Parameter amd Details
+## Usage
+
+1. [Install SuperBench](../getting-started/installation.mdx) on the local machine.
+
+2. Prepare the host file of the tested GPU nodes using [Ansible Inventory](../getting-started/configuration.md#ansible-inventory) on the local machine.
+
+3. After installing the Superbnech and the host file is ready, you can start to collect the sys info automatically using  `sb run-info` command. The detailed command can be found from [SuperBench CLI](../cli.md).
+
+  ```
+  sb run-info -f host.ini --output-dir ${output-dir}
+  ```
+
+4. After the command finished, you can find the output system info json file `sys-info.json` of each node under \${output_dir}/nodes/${node_name}.
+
+## Parameter and Details
 
 ### System
 

@@ -23,6 +23,7 @@ def load_command_table(self, args):
             g.command('deploy', 'deploy_command_handler')
             g.command('exec', 'exec_command_handler')
             g.command('run', 'run_command_handler')
+            g.command('run-info', 'run_info_command_handler')
         with CommandGroup(self, 'benchmark', 'superbench.cli._benchmark_handler#{}') as g:
             g.command('list', 'benchmark_list_command_handler')
             g.command('list-parameters', 'benchmark_list_params_command_handler')

@@ -319,3 +319,52 @@ def run_command_handler(
     runner.run()
     if runner.get_failure_count() != 0:
         sys.exit(runner.get_failure_count())
+
+
+def run_info_command_handler(
+    docker_image='superbench/superbench',
+    docker_username=None,
+    docker_password=None,
+    no_image_pull=False,
+    host_file=None,
+    host_list=None,
+    host_username=None,
+    host_password=None,
+    output_dir=None,
+    private_key=None
+):
+    """Collect the system info on all given nodes.
+
+    Args:
+        docker_image (str, optional): Docker image URI. Defaults to superbench/superbench:latest.
+        docker_username (str, optional): Docker registry username if authentication is needed. Defaults to None.
+        docker_password (str, optional): Docker registry password if authentication is needed. Defaults to None.
+        no_image_pull (bool, optional): Skip pull and use local Docker image. Defaults to False.
+        host_file (str, optional): Path to Ansible inventory host file. Defaults to None.
+        host_list (str, optional): Comma separated host list. Defaults to None.
+        host_username (str, optional): Host username if needed. Defaults to None.
+        host_password (str, optional): Host password or key passphase if needed. Defaults to None.
+        output_dir (str, optional): Path to output directory. Defaults to None.
+        private_key (str, optional): Path to private key if needed. Defaults to None.
+
+    Raises:
+        CLIError: If input arguments are invalid.
+    """
+    docker_config, ansible_config, sb_config, sb_output_dir = process_runner_arguments(
+        docker_image=docker_image,
+        docker_username=docker_username,
+        docker_password=docker_password,
+        no_docker=False,
+        no_image_pull=no_image_pull,
+        host_file=host_file,
+        host_list=host_list,
+        host_username=host_username,
+        host_password=host_password,
+        output_dir=output_dir,
+        private_key=private_key,
+    )
+
+    runner = SuperBenchRunner(sb_config, docker_config, ansible_config, sb_output_dir)
+    runner.run_sys_info()
+    if runner.get_failure_count() != 0:
+        sys.exit(runner.get_failure_count())
@@ -65,6 +65,16 @@
             --config-override superbench.enable=kernel-launch superbench.env.SB_MICRO_PATH=/path/to/superbenchmark
 """.format(cli_name=CLI_NAME)
 
+helps['run-info'] = """
+    type: command
+    short-summary: Collect the system info distributedly.
+    examples:
+        - name: Collect system info on local GPU node
+          text: {cli_name} run-info --host-list localhost
+        - name: Collect system info on all nodes in ./host.ini"
+          text: {cli_name} run-info --host-file ./host.ini
+""".format(cli_name=CLI_NAME)
+
 helps['benchmark'] = """
     type: group
     short-summary: Commands to manage benchmarks.

@@ -3,17 +3,24 @@
 
 """SuperBench CLI node subgroup command handler."""
 
+import json
+from pathlib import Path
+
 from superbench.tools import SystemInfo
+from superbench.common.utils import create_sb_output_dir
 
 
-def info_command_handler():
+def info_command_handler(output_dir=None):
     """Get node hardware info.
 
-    Returns:
-        dict: node info.
+    Args:
+        output_dir (str): Output directory.
     """
     try:
+        output_dir = create_sb_output_dir(output_dir)
         info = SystemInfo().get_all()
+        output_dir_path = Path(output_dir)
+        with open(output_dir_path / 'sys_info.json', 'w') as f:
+            json.dump(info, f)
     except Exception as ex:
         raise RuntimeError('Failed to get node info.') from ex
-    return info
@@ -197,6 +197,24 @@ def deploy(self):    # pragma: no cover
             )
         self._ansible_client.run(self._ansible_client.get_playbook_config('deploy.yaml', extravars=extravars))
 
+    def run_sys_info(self):
+        """Run the system info on all nodes."""
+        self.check_env()
+
+        logger.info('Runner is going to run node info.')
+
+        fcmd = "docker exec sb-workspace bash -c '{command}'"
+        if self._docker_config.skip:
+            fcmd = "bash -c 'cd $SB_WORKSPACE && {command}'"
+        ansible_runner_config = self._ansible_client.get_shell_config(
+            fcmd.format(command='sb node info --output-dir {output_dir}'.format(output_dir=self._sb_output_dir))
+        )
+        ansible_rc = self._ansible_client.run(ansible_runner_config, sudo=(not self._docker_config.skip))
+
+        if ansible_rc != 0:
+            self.cleanup()
+        self.fetch_results()
+
     def check_env(self):    # pragma: no cover
         """Check SuperBench environment."""
         logger.info('Checking SuperBench environment.')