From 981b1826e88ad868b441a00a3a0010fbba3cf981 Mon Sep 17 00:00:00 2001 From: yukirora Date: Mon, 28 Jun 2021 17:07:21 +0800 Subject: [PATCH 01/10] add example and benchmark file of rdma loopback --- .../benchmarks/rdma_loopback_performance.py | 23 ++ .../benchmarks/micro_benchmarks/__init__.py | 3 +- .../rdma_loopback_performance.py | 215 ++++++++++++++++++ superbench/common/utils/__init__.py | 2 +- superbench/common/utils/network.py | 19 ++ .../test_computation_communication_overlap.py | 3 +- .../test_rdma_loopback_performance.py | 0 .../micro_benchmarks/test_sharding_matmul.py | 3 +- tests/benchmarks/utils.py | 17 +- 9 files changed, 266 insertions(+), 19 deletions(-) create mode 100644 examples/benchmarks/rdma_loopback_performance.py create mode 100644 superbench/benchmarks/micro_benchmarks/rdma_loopback_performance.py create mode 100644 superbench/common/utils/network.py create mode 100644 tests/benchmarks/micro_benchmarks/test_rdma_loopback_performance.py diff --git a/examples/benchmarks/rdma_loopback_performance.py b/examples/benchmarks/rdma_loopback_performance.py new file mode 100644 index 000000000..e001b00b3 --- /dev/null +++ b/examples/benchmarks/rdma_loopback_performance.py @@ -0,0 +1,23 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""Model benchmark example for RDMA loopback performance. + +Commands to run: + python3 examples/benchmarks/rdma_loopback_performance_performance.py +""" + +from superbench.benchmarks import BenchmarkRegistry +from superbench.common.utils import logger + +if __name__ == '__main__': + parameters = '--ib_index 0 --numa 1' + context = BenchmarkRegistry.create_benchmark_context('rdma-loopback', parameters=parameters) + + benchmark = BenchmarkRegistry.launch_benchmark(context) + if benchmark: + logger.info( + 'benchmark: {}, return code: {}, result: {}'.format( + benchmark.name, benchmark.return_code, benchmark.result + ) + ) diff --git a/superbench/benchmarks/micro_benchmarks/__init__.py b/superbench/benchmarks/micro_benchmarks/__init__.py index 3b1b820f0..660eb5a9e 100644 --- a/superbench/benchmarks/micro_benchmarks/__init__.py +++ b/superbench/benchmarks/micro_benchmarks/__init__.py @@ -10,8 +10,9 @@ from superbench.benchmarks.micro_benchmarks.cublas_function import CublasBenchmark from superbench.benchmarks.micro_benchmarks.cudnn_function import CudnnBenchmark from superbench.benchmarks.micro_benchmarks.gemm_flops_performance import GemmFlopsCuda +from superbench.benchmarks.micro_benchmarks.rdma_loopback_performance import RDMALoopback __all__ = [ 'MicroBenchmark', 'MicroBenchmarkWithInvoke', 'ShardingMatmul', 'ComputationCommunicationOverlap', 'KernelLaunch', - 'CublasBenchmark', 'CudnnBenchmark', 'GemmFlopsCuda' + 'CublasBenchmark', 'CudnnBenchmark', 'GemmFlopsCuda', 'RDMALoopback' ] diff --git a/superbench/benchmarks/micro_benchmarks/rdma_loopback_performance.py b/superbench/benchmarks/micro_benchmarks/rdma_loopback_performance.py new file mode 100644 index 000000000..c6eef8af0 --- /dev/null +++ b/superbench/benchmarks/micro_benchmarks/rdma_loopback_performance.py @@ -0,0 +1,215 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""Module of the RDMA loopback benchmarks.""" + +import os +import subprocess + +from superbench.common.utils import logger +from superbench.common.utils import network +from superbench.benchmarks import BenchmarkRegistry, ReturnCode +from superbench.benchmarks.micro_benchmarks import MicroBenchmarkWithInvoke + + +class RDMALoopback(MicroBenchmarkWithInvoke): + """The RDMA loopback performance benchmark class.""" + def __init__(self, name, parameters=''): + """Constructor. + + Args: + name (str): benchmark name. + parameters (str): benchmark parameters. + """ + super().__init__(name, parameters) + + self._bin_name = 'run_perftest_loopback' + self.__support_ib_commands = ['ib_write_bw', 'ib_read_bw', 'ib_send_bw'] + self.__message_sizes = ['8388608', '4194304', '2097152', '1048576'] + + def add_parser_arguments(self): + """Add the specified arguments.""" + super().add_parser_arguments() + + self._parser.add_argument( + '--ib_index', + type=int, + default=0, + required=True, + help='The index of ib device.', + ) + self._parser.add_argument( + '--n', + type=int, + default=20000, + required=False, + help='The iterations of running ib command', + ) + self._parser.add_argument( + '--size', + type=int, + default=8388608, + required=False, + help='The message size of running ib command. E.g. {}.'.format(' '.join(self.__message_sizes)), + ) + self._parser.add_argument( + '--commands', + type=str, + nargs='+', + default='ib_write_bw', + help='The ib command used to run. E.g. {}.'.format(' '.join(self.__support_ib_commands)), + ) + self._parser.add_argument( + '--mode', + type=str, + default='AF', + help='The mode used to run ib command. Eg, AF(all message size) or S(single message size)', + ) + self._parser.add_argument( + '--numa', + type=int, + default=0, + required=True, + help='The index of numa node.', + ) + + def __get_ib_devices(self): + """Get available ordered IB devices in the system and filter ethernet devices.""" + # command = 'ls -l /sys/class/infiniband/* | awk \'{print $9}\' | sort | awk -F\'/\' \'{print $5}\'' + command = "ibv_devinfo | awk '$1 ~ /hca_id/||/link_layer:/ {print $1,$2}' | awk '{print $2}'" + output = subprocess.run( + command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True, check=False, universal_newlines=True + ) + lines = output.stdout.splitlines() + ib_devices = [] + for i in range(len(lines) - 1): + if 'InfiniBand' in lines[i + 1]: + ib_devices.append(lines[i]) + return ib_devices + + def __get_numa_cores(self, numa_index): + """Get the last two cores from different physical cpu core of NUMA. + + Args: + numa_index (int): the index of numa node. + + Return: + The last two cores from different physical cpu core of NUMA. + """ + command = 'numactl --hardware | grep "node {} cpus:"'.format(numa_index) + output = subprocess.run( + command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True, check=False, universal_newlines=True + ) + return output.stdout.splitlines()[0].split(' ') + + def _preprocess(self): + """Preprocess/preparation operations before the benchmarking. + + Return: + True if _preprocess() succeed. + """ + if not super()._preprocess(): + return False + + if not isinstance(self._args.commands, list): + self._args.commands = [self._args.commands] + self._args.commands = [command.lower() for command in self._args.commands] + + self._args.mode = self._args.mode.upper() + + if str(self._args.size) not in self.__message_sizes: + self._result.set_return_code(ReturnCode.INVALID_ARGUMENT) + logger.error( + 'Unsupported message size - benchmark: {}, size: {}, expect: {}.'.format( + self._name, self._args.size, self.__message_sizes + ) + ) + return False + + command_mode = '' + if self._args.mode == 'AF': + command_mode = ' -a' + elif self._args.mode == 'S': + command_mode = ' -s ' + self._args.size + else: + self._result.set_return_code(ReturnCode.INVALID_ARGUMENT) + logger.error( + 'Unsupported args mode - benchmark: {}, mode: {}, expect: {}.'.format( + self._name, self._args.mode, 'AF or S' + ) + ) + return False + + for ib_command in self._args.commands: + if ib_command not in self.__support_ib_commands: + self._result.set_return_code(ReturnCode.INVALID_ARGUMENT) + logger.error( + 'Unsupported ib command - benchmark: {}, command: {}, expected: {}.'.format( + self._name, ib_command, self.__support_ib_commands + ) + ) + return False + else: + command = os.path.join(self._args.bin_dir, self._bin_name) + numa_cores = self.__get_numa_cores(self._args.numa) + server_core = int(numa_cores[-1]) + client_core = int(numa_cores[-3]) + command += ' ' + str(server_core) + ' ' + str(client_core) + command += ' ' + ib_command + command += command_mode + ' -F' + command += ' --iters=' + str(self._args.n) + command += ' -d ' + self.__get_ib_devices()[self._args.ib_index] + command += ' -p ' + str(network.get_free_port()) + self._commands.append(command) + + return True + + def _process_raw_result(self, cmd_idx, raw_output): + """Function to parse raw results and save the summarized results. + + self._result.add_raw_data() and self._result.add_result() need to be called to save the results. + + Args: + cmd_idx (int): the index of command corresponding with the raw_output. + raw_output (str): raw output string of the micro-benchmark. + + Return: + True if the raw output string is valid and result can be extracted. + """ + ib_command = self._args.commands[cmd_idx] + self._result.add_raw_data( + 'raw_output_' + str(self._args.ib_index) + '_' + ib_command + '_' + self._args.mode, raw_output + ) + + valid = False + content = raw_output.splitlines() + try: + metric_set = set() + for line in content: + for i in range(len(self.__message_sizes)): + if self.__message_sizes[i] in line: + values = list(filter(None, line.split(' '))) + avg_bw = float(values[-2]) + metric = 'RDMA_{}_{}_{}_{}_{}_avg'.format( + str(self._args.ib_index), self._args.mode, self.__message_sizes[i], str(self._args.n), + ib_command + ) + if metric not in metric_set: + metric_set.add(metric) + self._result.add_result(metric, avg_bw) + valid = True + except BaseException: + valid = False + finally: + if valid is False: + logger.error( + 'The result format is invalid - round: {}, benchmark: {}, raw output: {}.'.format( + self._curr_run_index, self._name, raw_output + ) + ) + return False + + return True + + +BenchmarkRegistry.register_benchmark('rdma-loopback', RDMALoopback, parameters='--ib_index 0 --numa 1') diff --git a/superbench/common/utils/__init__.py b/superbench/common/utils/__init__.py index 708eed456..a339d41fd 100644 --- a/superbench/common/utils/__init__.py +++ b/superbench/common/utils/__init__.py @@ -9,4 +9,4 @@ nv_helper = LazyImport('superbench.common.utils.nvidia_helper') -__all__ = ['SuperBenchLogger', 'logger', 'create_output_dir', 'get_sb_config', 'LazyImport', 'nv_helper'] +__all__ = ['SuperBenchLogger', 'logger', 'create_output_dir', 'get_sb_config', 'LazyImport', 'nv_helper', 'network'] diff --git a/superbench/common/utils/network.py b/superbench/common/utils/network.py new file mode 100644 index 000000000..4e96f24b4 --- /dev/null +++ b/superbench/common/utils/network.py @@ -0,0 +1,19 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""Network Utility.""" + +import socket +from contextlib import closing + + +def get_free_port(): + """Get a free port in current system. + + Return: + port (int): a free port in current system. + """ + with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: + s.bind(('', 0)) + s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + return s.getsockname()[1] diff --git a/tests/benchmarks/micro_benchmarks/test_computation_communication_overlap.py b/tests/benchmarks/micro_benchmarks/test_computation_communication_overlap.py index 381dec3ac..598bdab09 100644 --- a/tests/benchmarks/micro_benchmarks/test_computation_communication_overlap.py +++ b/tests/benchmarks/micro_benchmarks/test_computation_communication_overlap.py @@ -7,6 +7,7 @@ from tests.helper import decorator import tests.benchmarks.utils as utils +from superbench.common.utils import network from superbench.benchmarks import BenchmarkRegistry, Framework, BenchmarkType, ReturnCode from superbench.benchmarks.micro_benchmarks.computation_communication_overlap \ import ComputationCommunicationOverlap, ComputationKernelType @@ -56,7 +57,7 @@ def test_pytorch_computation_communication_overlap_fake_distributed(): parameters='--num_warmup 5 --num_steps 10 --ratio 5', framework=Framework.PYTORCH ) - utils.setup_simulated_ddp_distributed_env(1, 0, utils.get_free_port()) + utils.setup_simulated_ddp_distributed_env(1, 0, network.get_free_port()) benchmark = BenchmarkRegistry.launch_benchmark(context) # Check basic information. diff --git a/tests/benchmarks/micro_benchmarks/test_rdma_loopback_performance.py b/tests/benchmarks/micro_benchmarks/test_rdma_loopback_performance.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/benchmarks/micro_benchmarks/test_sharding_matmul.py b/tests/benchmarks/micro_benchmarks/test_sharding_matmul.py index c828d7e18..03428d3b1 100644 --- a/tests/benchmarks/micro_benchmarks/test_sharding_matmul.py +++ b/tests/benchmarks/micro_benchmarks/test_sharding_matmul.py @@ -5,6 +5,7 @@ import tests.benchmarks.utils as utils from tests.helper import decorator +from superbench.common.utils import network from superbench.benchmarks import BenchmarkRegistry, Platform, Framework, BenchmarkType, ReturnCode from superbench.benchmarks.micro_benchmarks.sharding_matmul import ShardingMatmul, ShardingMode @@ -22,7 +23,7 @@ def test_pytorch_sharding_matmul(): assert (BenchmarkRegistry.is_benchmark_context_valid(context)) - utils.setup_simulated_ddp_distributed_env(1, 0, utils.get_free_port()) + utils.setup_simulated_ddp_distributed_env(1, 0, network.get_free_port()) benchmark = BenchmarkRegistry.launch_benchmark(context) # Check basic information. diff --git a/tests/benchmarks/utils.py b/tests/benchmarks/utils.py index 3ef897c5e..87c46b1d8 100644 --- a/tests/benchmarks/utils.py +++ b/tests/benchmarks/utils.py @@ -4,12 +4,11 @@ """Utilities for benchmark tests.""" import os -import socket -from contextlib import closing import multiprocessing as multiprocessing from multiprocessing import Process from superbench.benchmarks import BenchmarkRegistry +from superbench.common.utils import network def clean_simulated_ddp_distributed_env(): @@ -21,18 +20,6 @@ def clean_simulated_ddp_distributed_env(): os.environ.pop('MASTER_PORT') -def get_free_port(): - """Get a free port in current system. - - Return: - port (int): a free port in current system. - """ - with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: - s.bind(('', 0)) - s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - return s.getsockname()[1] - - def setup_simulated_ddp_distributed_env(world_size, local_rank, port): """Function to setup the simulated DDP distributed envionment variables.""" os.environ['WORLD_SIZE'] = str(world_size) @@ -58,7 +45,7 @@ def simulated_ddp_distributed_benchmark(context, world_size): Return: results (list): list of benchmark results from #world_size number of processes. """ - port = get_free_port() + port = network.get_free_port() process_list = [] multiprocessing.set_start_method('spawn') From e1e5fb85ccf01a2d19f4802a622ba9d075487b99 Mon Sep 17 00:00:00 2001 From: yukirora Date: Mon, 28 Jun 2021 20:56:12 +0800 Subject: [PATCH 02/10] add test --- dockerfile/cuda11.1.1.dockerfile | 1 + .../rdma_loopback_performance.py | 46 ++--- superbench/benchmarks/return_code.py | 1 + superbench/common/utils/network.py | 16 ++ .../test_rdma_loopback_performance.py | 161 ++++++++++++++++++ 5 files changed, 196 insertions(+), 29 deletions(-) diff --git a/dockerfile/cuda11.1.1.dockerfile b/dockerfile/cuda11.1.1.dockerfile index 5beb60b43..833a9d6de 100644 --- a/dockerfile/cuda11.1.1.dockerfile +++ b/dockerfile/cuda11.1.1.dockerfile @@ -36,6 +36,7 @@ RUN apt-get update && \ util-linux \ vim \ wget \ + numactl \ && \ apt-get autoremove && \ apt-get clean && \ diff --git a/superbench/benchmarks/micro_benchmarks/rdma_loopback_performance.py b/superbench/benchmarks/micro_benchmarks/rdma_loopback_performance.py index c6eef8af0..106546a31 100644 --- a/superbench/benchmarks/micro_benchmarks/rdma_loopback_performance.py +++ b/superbench/benchmarks/micro_benchmarks/rdma_loopback_performance.py @@ -73,20 +73,6 @@ def add_parser_arguments(self): help='The index of numa node.', ) - def __get_ib_devices(self): - """Get available ordered IB devices in the system and filter ethernet devices.""" - # command = 'ls -l /sys/class/infiniband/* | awk \'{print $9}\' | sort | awk -F\'/\' \'{print $5}\'' - command = "ibv_devinfo | awk '$1 ~ /hca_id/||/link_layer:/ {print $1,$2}' | awk '{print $2}'" - output = subprocess.run( - command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True, check=False, universal_newlines=True - ) - lines = output.stdout.splitlines() - ib_devices = [] - for i in range(len(lines) - 1): - if 'InfiniBand' in lines[i + 1]: - ib_devices.append(lines[i]) - return ib_devices - def __get_numa_cores(self, numa_index): """Get the last two cores from different physical cpu core of NUMA. @@ -150,18 +136,22 @@ def _preprocess(self): ) return False else: - command = os.path.join(self._args.bin_dir, self._bin_name) - numa_cores = self.__get_numa_cores(self._args.numa) - server_core = int(numa_cores[-1]) - client_core = int(numa_cores[-3]) - command += ' ' + str(server_core) + ' ' + str(client_core) - command += ' ' + ib_command - command += command_mode + ' -F' - command += ' --iters=' + str(self._args.n) - command += ' -d ' + self.__get_ib_devices()[self._args.ib_index] - command += ' -p ' + str(network.get_free_port()) - self._commands.append(command) - + try: + command = os.path.join(self._args.bin_dir, self._bin_name) + numa_cores = self.__get_numa_cores(self._args.numa) + server_core = int(numa_cores[-1]) + client_core = int(numa_cores[-3]) + command += ' ' + str(server_core) + ' ' + str(client_core) + command += ' ' + ib_command + command += command_mode + ' -F' + command += ' --iters=' + str(self._args.n) + command += ' -d ' + network.get_ib_devices()[self._args.ib_index] + command += ' -p ' + str(network.get_free_port()) + self._commands.append(command) + except BaseException: + self._result.set_return_code(ReturnCode.MICROBENCHMARK_DEVICE_GETTING_FAILURE) + logger.error('Getting devices failure - benchmark: {}.'.format(self._name)) + return False return True def _process_raw_result(self, cmd_idx, raw_output): @@ -177,9 +167,7 @@ def _process_raw_result(self, cmd_idx, raw_output): True if the raw output string is valid and result can be extracted. """ ib_command = self._args.commands[cmd_idx] - self._result.add_raw_data( - 'raw_output_' + str(self._args.ib_index) + '_' + ib_command + '_' + self._args.mode, raw_output - ) + self._result.add_raw_data('raw_output_' + str(cmd_idx) + '_IB' + str(self._args.ib_index), raw_output) valid = False content = raw_output.splitlines() diff --git a/superbench/benchmarks/return_code.py b/superbench/benchmarks/return_code.py index 0991ddb22..da207d01a 100644 --- a/superbench/benchmarks/return_code.py +++ b/superbench/benchmarks/return_code.py @@ -28,3 +28,4 @@ class ReturnCode(Enum): MICROBENCHMARK_EXECUTION_FAILURE = 32 MICROBENCHMARK_RESULT_PARSING_FAILURE = 33 MICROBENCHMARK_UNSUPPORTED_ARCHITECTURE = 34 + MICROBENCHMARK_DEVICE_GETTING_FAILURE = 35 diff --git a/superbench/common/utils/network.py b/superbench/common/utils/network.py index 4e96f24b4..0ea767491 100644 --- a/superbench/common/utils/network.py +++ b/superbench/common/utils/network.py @@ -4,6 +4,7 @@ """Network Utility.""" import socket +import subprocess from contextlib import closing @@ -17,3 +18,18 @@ def get_free_port(): s.bind(('', 0)) s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) return s.getsockname()[1] + + +def get_ib_devices(): + """Get available ordered IB devices in the system and filter ethernet devices.""" + # command = 'ls -l /sys/class/infiniband/* | awk \'{print $9}\' | sort | awk -F\'/\' \'{print $5}\'' + command = "ibv_devinfo | awk '$1 ~ /hca_id/||/link_layer:/ {print $1,$2}' | awk '{print $2}'" + output = subprocess.run( + command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True, check=False, universal_newlines=True + ) + lines = output.stdout.splitlines() + ib_devices = [] + for i in range(len(lines) - 1): + if 'InfiniBand' in lines[i + 1]: + ib_devices.append(lines[i]) + return ib_devices diff --git a/tests/benchmarks/micro_benchmarks/test_rdma_loopback_performance.py b/tests/benchmarks/micro_benchmarks/test_rdma_loopback_performance.py index e69de29bb..4ac69c02b 100644 --- a/tests/benchmarks/micro_benchmarks/test_rdma_loopback_performance.py +++ b/tests/benchmarks/micro_benchmarks/test_rdma_loopback_performance.py @@ -0,0 +1,161 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""Tests for rdma-loopback benchmark.""" + +import os +import numbers +import unittest +from pathlib import Path + +from superbench.benchmarks import BenchmarkRegistry, ReturnCode, Platform, BenchmarkType +from superbench.common.utils import network + + +class RDMALoopbackTest(unittest.TestCase): + """Tests for RDMALoopback benchmark.""" + def create_fake_bin(self): + """Method called to prepare the test fixture.""" + # Create fake binary file just for testing. + os.environ['SB_MICRO_PATH'] = '/tmp/superbench/' + binary_path = os.path.join(os.getenv('SB_MICRO_PATH'), 'bin') + Path(binary_path).mkdir(parents=True, exist_ok=True) + self.__binary_file = Path(os.path.join(binary_path, 'run_perftest_loopback')) + self.__binary_file.touch(mode=0o755, exist_ok=True) + + def test_rdma_loopback_performance(self): + """Test rdma-loopback benchmark.""" + # Condition without RDMA devices + if (len(network.get_ib_devices()) < 1): + # Test for registry. + benchmark_name = 'rdma-loopback' + (benchmark_class, + predefine_params) = BenchmarkRegistry._BenchmarkRegistry__select_benchmark(benchmark_name, Platform.CPU) + assert (benchmark_class) + + # Test for preprocess + parameters = '--ib_index 0 --numa 0 --n 2000' + benchmark = benchmark_class(benchmark_name, parameters=parameters) + self.create_fake_bin() + ret = benchmark._preprocess() + assert (ret is False) + assert (benchmark.return_code is ReturnCode.MICROBENCHMARK_DEVICE_GETTING_FAILURE) + + raw_output_AF = """ + ************************************ + * Waiting for client to connect... * + ************************************ + --------------------------------------------------------------------------------------- + RDMA_Write BW Test + Dual-port : OFF Device : ibP257p0s0 + Number of qps : 1 Transport type : IB + Connection type : RC Using SRQ : OFF + PCIe relax order: ON + --------------------------------------------------------------------------------------- + RDMA_Write BW Test + Dual-port : OFF Device : ibP257p0s0 + Number of qps : 1 Transport type : IB + Connection type : RC Using SRQ : OFF + PCIe relax order: ON + ibv_wr* API : ON + TX depth : 128 + CQ Moderation : 100 + Mtu : 4096[B] + Link type : IB + Max inline data : 0[B] + rdma_cm QPs : OFF + Data ex. method : Ethernet + --------------------------------------------------------------------------------------- + ibv_wr* API : ON + CQ Moderation : 100 + Mtu : 4096[B] + Link type : IB + Max inline data : 0[B] + rdma_cm QPs : OFF + Data ex. method : Ethernet + --------------------------------------------------------------------------------------- + local address: LID 0xd06 QPN 0x092f PSN 0x3ff1bc RKey 0x080329 VAddr 0x007fc97ff50000 + local address: LID 0xd06 QPN 0x092e PSN 0x3eb82d RKey 0x080228 VAddr 0x007f19adcbf000 + remote address: LID 0xd06 QPN 0x092e PSN 0x3eb82d RKey 0x080228 VAddr 0x007f19adcbf000 + remote address: LID 0xd06 QPN 0x092f PSN 0x3ff1bc RKey 0x080329 VAddr 0x007fc97ff50000 + --------------------------------------------------------------------------------------- + --------------------------------------------------------------------------------------- + #bytes #iterations BW peak[MB/sec] BW average[MB/sec] MsgRate[Mpps] + #bytes #iterations BW peak[MB/sec] BW average[MB/sec] MsgRate[Mpps] + 2 2000 5.32 5.30 2.778732 + 4 2000 10.65 10.64 2.788833 + 8 2000 21.30 21.27 2.787609 + 16 2000 42.60 42.55 2.788268 + 32 2000 84.90 82.82 2.713896 + 64 2000 173.55 171.66 2.812504 + 128 2000 362.27 353.83 2.898535 + 256 2000 687.82 679.37 2.782698 + 512 2000 1337.12 1311.59 2.686135 + 1024 2000 2674.25 2649.39 2.712980 + 2048 2000 5248.56 5118.18 2.620509 + 4096 2000 10034.02 9948.41 2.546793 + 8192 2000 18620.51 12782.56 1.636168 + 16384 2000 23115.27 16782.50 1.074080 + 32768 2000 22927.94 18586.03 0.594753 + 65536 2000 23330.56 21167.79 0.338685 + 131072 2000 22750.35 21443.14 0.171545 + 262144 2000 22673.63 22411.35 0.089645 + 524288 2000 22679.02 22678.86 0.045358 + 1048576 2000 22817.06 22816.86 0.022817 + 2097152 2000 22919.37 22919.27 0.011460 + 4194304 2000 23277.93 23277.91 0.005819 + 8388608 2000 23240.68 23240.68 0.002905 + --------------------------------------------------------------------------------------- + 8388608 2000 23240.68 23240.68 0.002905 + --------------------------------------------------------------------------------------- + """ + assert (benchmark._process_raw_result(0, raw_output_AF)) + + self.__binary_file.unlink() + + # Condition with RDMA devices + else: + # Test for registry, preprocess and run. + parameters = '--ib_index 0 --numa 0 --n 2000' + context = BenchmarkRegistry.create_benchmark_context('rdma-loopback', parameters=parameters) + + assert (BenchmarkRegistry.is_benchmark_context_valid(context)) + benchmark = BenchmarkRegistry.launch_benchmark(context) + + # Check results and metrics. + assert (benchmark.run_count == 1) + assert (benchmark.return_code == ReturnCode.SUCCESS) + assert ('raw_output_0_IB0' in benchmark.raw_data) + assert (len(benchmark.raw_data['raw_output_0_IB0']) == 1) + assert (isinstance(benchmark.raw_data['raw_output_0_IB0'][0], str)) + + # Test for process_raw_data. + # Positive case - valid raw output. + metric_list = [] + message_sizes = ['8388608', '4194304', '2097152', '1048576'] + for ib_command in benchmark._args.commands: + for size in message_sizes: + metric = 'RDMA_{}_{}_{}_{}_{}_avg'.format( + str(benchmark._args.ib_index), benchmark._args.mode, size, str(benchmark._args.n), ib_command + ) + metric_list.append(metric) + for metric in metric_list: + assert (metric in benchmark.result) + assert (len(benchmark.result[metric]) == 1) + assert (isinstance(benchmark.result[metric][0], numbers.Number)) + + # Negative case - Add invalid raw output. + assert (benchmark._process_raw_result(0, 'Invalid raw output') is False) + + # Check basic information. + assert (benchmark.name == 'rdma-loopback') + assert (benchmark.type == BenchmarkType.MICRO) + assert (benchmark._bin_name == 'run_perftest_loopback') + + # Check parameters specified in BenchmarkContext. + assert (benchmark._args.ib_index == 0) + assert (benchmark._args.numa == 0) + assert (benchmark._args.n == 2000) + assert (benchmark._args.size == 8388608) + assert (benchmark._args.commands == ['ib_write_bw']) + assert (benchmark._args.mode == 'AF') From cafcc3fffc62bd07a8cfb828a0876b733e6d58c1 Mon Sep 17 00:00:00 2001 From: yukirora Date: Tue, 29 Jun 2021 19:40:27 +0800 Subject: [PATCH 03/10] revise get_ib_devices to consider multi port condition --- superbench/common/utils/network.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/superbench/common/utils/network.py b/superbench/common/utils/network.py index 0ea767491..f0b03695e 100644 --- a/superbench/common/utils/network.py +++ b/superbench/common/utils/network.py @@ -22,7 +22,6 @@ def get_free_port(): def get_ib_devices(): """Get available ordered IB devices in the system and filter ethernet devices.""" - # command = 'ls -l /sys/class/infiniband/* | awk \'{print $9}\' | sort | awk -F\'/\' \'{print $5}\'' command = "ibv_devinfo | awk '$1 ~ /hca_id/||/link_layer:/ {print $1,$2}' | awk '{print $2}'" output = subprocess.run( command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True, check=False, universal_newlines=True @@ -30,6 +29,6 @@ def get_ib_devices(): lines = output.stdout.splitlines() ib_devices = [] for i in range(len(lines) - 1): - if 'InfiniBand' in lines[i + 1]: + if 'InfiniBand' in lines[i + 1] and 'InfiniBand' not in lines[i]: ib_devices.append(lines[i]) return ib_devices From befeb3ebc4d01fe80ed28801e6db761f4b16c881 Mon Sep 17 00:00:00 2001 From: root Date: Wed, 30 Jun 2021 09:06:47 +0000 Subject: [PATCH 04/10] add support for runner to parallel running --- .../benchmarks/rdma_loopback_performance.py | 7 +- .../rdma_loopback_performance.py | 31 +- superbench/config/default.yaml | 11 + .../test_rdma_loopback_performance.py | 317 ++++++++++-------- 4 files changed, 221 insertions(+), 145 deletions(-) diff --git a/examples/benchmarks/rdma_loopback_performance.py b/examples/benchmarks/rdma_loopback_performance.py index e001b00b3..014089a8f 100644 --- a/examples/benchmarks/rdma_loopback_performance.py +++ b/examples/benchmarks/rdma_loopback_performance.py @@ -1,18 +1,17 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. -"""Model benchmark example for RDMA loopback performance. +"""Micro benchmark example for RDMA loopback performance. Commands to run: - python3 examples/benchmarks/rdma_loopback_performance_performance.py + python examples/benchmarks/rdma_loopback_performance_performance.py """ from superbench.benchmarks import BenchmarkRegistry from superbench.common.utils import logger if __name__ == '__main__': - parameters = '--ib_index 0 --numa 1' - context = BenchmarkRegistry.create_benchmark_context('rdma-loopback', parameters=parameters) + context = BenchmarkRegistry.create_benchmark_context('rdma-loopback') benchmark = BenchmarkRegistry.launch_benchmark(context) if benchmark: diff --git a/superbench/benchmarks/micro_benchmarks/rdma_loopback_performance.py b/superbench/benchmarks/micro_benchmarks/rdma_loopback_performance.py index 106546a31..a3d198c1a 100644 --- a/superbench/benchmarks/micro_benchmarks/rdma_loopback_performance.py +++ b/superbench/benchmarks/micro_benchmarks/rdma_loopback_performance.py @@ -35,7 +35,7 @@ def add_parser_arguments(self): '--ib_index', type=int, default=0, - required=True, + required=False, help='The index of ib device.', ) self._parser.add_argument( @@ -69,7 +69,7 @@ def add_parser_arguments(self): '--numa', type=int, default=0, - required=True, + required=False, help='The index of numa node.', ) @@ -88,6 +88,19 @@ def __get_numa_cores(self, numa_index): ) return output.stdout.splitlines()[0].split(' ') + def __get_arguments_from_env(self): + """Read environment variables from runner used for parallel and fill in ib_index and numa_node_index. + + Get 'PROC_RANK'(rank of current process) 'IB_DEVICES' 'NUMA_NODES' environment variables + Get ib_index and numa_node_index according to 'NUMA_NODES'['PROC_RANK'] and 'IB_DEVICES'['PROC_RANK'] + """ + if os.getenv('PROC_RANK'): + rank = int(os.getenv('PROC_RANK')) + if os.getenv('IB_DEVICES'): + self._args.ib_index = int(os.getenv('IB_DEVICES').split(',')[rank]) + if os.getenv('NUMA_NODES'): + self._args.numa = int(os.getenv('NUMA_NODES').split(',')[rank]) + def _preprocess(self): """Preprocess/preparation operations before the benchmarking. @@ -97,12 +110,15 @@ def _preprocess(self): if not super()._preprocess(): return False + self.__get_arguments_from_env() + + # Format the arguments if not isinstance(self._args.commands, list): self._args.commands = [self._args.commands] self._args.commands = [command.lower() for command in self._args.commands] - self._args.mode = self._args.mode.upper() + # Check whether arguments are valid if str(self._args.size) not in self.__message_sizes: self._result.set_return_code(ReturnCode.INVALID_ARGUMENT) logger.error( @@ -111,12 +127,11 @@ def _preprocess(self): ) ) return False - command_mode = '' if self._args.mode == 'AF': command_mode = ' -a' elif self._args.mode == 'S': - command_mode = ' -s ' + self._args.size + command_mode = ' -s ' + str(self._args.size) else: self._result.set_return_code(ReturnCode.INVALID_ARGUMENT) logger.error( @@ -148,9 +163,9 @@ def _preprocess(self): command += ' -d ' + network.get_ib_devices()[self._args.ib_index] command += ' -p ' + str(network.get_free_port()) self._commands.append(command) - except BaseException: + except BaseException as e: self._result.set_return_code(ReturnCode.MICROBENCHMARK_DEVICE_GETTING_FAILURE) - logger.error('Getting devices failure - benchmark: {}.'.format(self._name)) + logger.error('Getting devices failure - benchmark: {}, message: {}.'.format(self._name, str(e))) return False return True @@ -200,4 +215,4 @@ def _process_raw_result(self, cmd_idx, raw_output): return True -BenchmarkRegistry.register_benchmark('rdma-loopback', RDMALoopback, parameters='--ib_index 0 --numa 1') +BenchmarkRegistry.register_benchmark('rdma-loopback', RDMALoopback) diff --git a/superbench/config/default.yaml b/superbench/config/default.yaml index 491cad2f2..5e43dd485 100644 --- a/superbench/config/default.yaml +++ b/superbench/config/default.yaml @@ -27,6 +27,17 @@ superbench: model_action: - train benchmarks: + rdma-loopback: + enable: true + modes: + - name: local + proc_num: 4 + prefix: PROC_RANK={proc_rank} IB_DEVICES=0,2,4,6 NUMA_NODES=1,0,3,2 + parallel: yes + - name: local + proc_num: 4 + prefix: PROC_RANK={proc_rank} IB_DEVICES=1,3,5,7 NUMA_NODES=1,0,3,2 + parallel: yes kernel-launch: <<: *default_local_mode gemm-flops: diff --git a/tests/benchmarks/micro_benchmarks/test_rdma_loopback_performance.py b/tests/benchmarks/micro_benchmarks/test_rdma_loopback_performance.py index 4ac69c02b..dbd6aba4f 100644 --- a/tests/benchmarks/micro_benchmarks/test_rdma_loopback_performance.py +++ b/tests/benchmarks/micro_benchmarks/test_rdma_loopback_performance.py @@ -25,137 +25,188 @@ def create_fake_bin(self): def test_rdma_loopback_performance(self): """Test rdma-loopback benchmark.""" - # Condition without RDMA devices - if (len(network.get_ib_devices()) < 1): - # Test for registry. - benchmark_name = 'rdma-loopback' - (benchmark_class, - predefine_params) = BenchmarkRegistry._BenchmarkRegistry__select_benchmark(benchmark_name, Platform.CPU) - assert (benchmark_class) - - # Test for preprocess - parameters = '--ib_index 0 --numa 0 --n 2000' - benchmark = benchmark_class(benchmark_name, parameters=parameters) - self.create_fake_bin() - ret = benchmark._preprocess() - assert (ret is False) - assert (benchmark.return_code is ReturnCode.MICROBENCHMARK_DEVICE_GETTING_FAILURE) - - raw_output_AF = """ - ************************************ - * Waiting for client to connect... * - ************************************ - --------------------------------------------------------------------------------------- - RDMA_Write BW Test - Dual-port : OFF Device : ibP257p0s0 - Number of qps : 1 Transport type : IB - Connection type : RC Using SRQ : OFF - PCIe relax order: ON - --------------------------------------------------------------------------------------- - RDMA_Write BW Test - Dual-port : OFF Device : ibP257p0s0 - Number of qps : 1 Transport type : IB - Connection type : RC Using SRQ : OFF - PCIe relax order: ON - ibv_wr* API : ON - TX depth : 128 - CQ Moderation : 100 - Mtu : 4096[B] - Link type : IB - Max inline data : 0[B] - rdma_cm QPs : OFF - Data ex. method : Ethernet - --------------------------------------------------------------------------------------- - ibv_wr* API : ON - CQ Moderation : 100 - Mtu : 4096[B] - Link type : IB - Max inline data : 0[B] - rdma_cm QPs : OFF - Data ex. method : Ethernet - --------------------------------------------------------------------------------------- - local address: LID 0xd06 QPN 0x092f PSN 0x3ff1bc RKey 0x080329 VAddr 0x007fc97ff50000 - local address: LID 0xd06 QPN 0x092e PSN 0x3eb82d RKey 0x080228 VAddr 0x007f19adcbf000 - remote address: LID 0xd06 QPN 0x092e PSN 0x3eb82d RKey 0x080228 VAddr 0x007f19adcbf000 - remote address: LID 0xd06 QPN 0x092f PSN 0x3ff1bc RKey 0x080329 VAddr 0x007fc97ff50000 - --------------------------------------------------------------------------------------- - --------------------------------------------------------------------------------------- - #bytes #iterations BW peak[MB/sec] BW average[MB/sec] MsgRate[Mpps] - #bytes #iterations BW peak[MB/sec] BW average[MB/sec] MsgRate[Mpps] - 2 2000 5.32 5.30 2.778732 - 4 2000 10.65 10.64 2.788833 - 8 2000 21.30 21.27 2.787609 - 16 2000 42.60 42.55 2.788268 - 32 2000 84.90 82.82 2.713896 - 64 2000 173.55 171.66 2.812504 - 128 2000 362.27 353.83 2.898535 - 256 2000 687.82 679.37 2.782698 - 512 2000 1337.12 1311.59 2.686135 - 1024 2000 2674.25 2649.39 2.712980 - 2048 2000 5248.56 5118.18 2.620509 - 4096 2000 10034.02 9948.41 2.546793 - 8192 2000 18620.51 12782.56 1.636168 - 16384 2000 23115.27 16782.50 1.074080 - 32768 2000 22927.94 18586.03 0.594753 - 65536 2000 23330.56 21167.79 0.338685 - 131072 2000 22750.35 21443.14 0.171545 - 262144 2000 22673.63 22411.35 0.089645 - 524288 2000 22679.02 22678.86 0.045358 - 1048576 2000 22817.06 22816.86 0.022817 - 2097152 2000 22919.37 22919.27 0.011460 - 4194304 2000 23277.93 23277.91 0.005819 - 8388608 2000 23240.68 23240.68 0.002905 - --------------------------------------------------------------------------------------- - 8388608 2000 23240.68 23240.68 0.002905 - --------------------------------------------------------------------------------------- + raw_output = {} + raw_output['AF'] = """ +************************************ +* Waiting for client to connect... * +************************************ +--------------------------------------------------------------------------------------- + RDMA_Write BW Test +Dual-port : OFF Device : ibP257p0s0 +Number of qps : 1 Transport type : IB +Connection type : RC Using SRQ : OFF +PCIe relax order: ON +--------------------------------------------------------------------------------------- + RDMA_Write BW Test +Dual-port : OFF Device : ibP257p0s0 +Number of qps : 1 Transport type : IB +Connection type : RC Using SRQ : OFF +PCIe relax order: ON +ibv_wr* API : ON +TX depth : 128 +CQ Moderation : 100 +Mtu : 4096[B] +Link type : IB +Max inline data : 0[B] +rdma_cm QPs : OFF +Data ex. method : Ethernet +--------------------------------------------------------------------------------------- +ibv_wr* API : ON +CQ Moderation : 100 +Mtu : 4096[B] +Link type : IB +Max inline data : 0[B] +rdma_cm QPs : OFF +Data ex. method : Ethernet +--------------------------------------------------------------------------------------- +local address: LID 0xd06 QPN 0x092f PSN 0x3ff1bc RKey 0x080329 VAddr 0x007fc97ff50000 +local address: LID 0xd06 QPN 0x092e PSN 0x3eb82d RKey 0x080228 VAddr 0x007f19adcbf000 +remote address: LID 0xd06 QPN 0x092e PSN 0x3eb82d RKey 0x080228 VAddr 0x007f19adcbf000 +remote address: LID 0xd06 QPN 0x092f PSN 0x3ff1bc RKey 0x080329 VAddr 0x007fc97ff50000 +--------------------------------------------------------------------------------------- +--------------------------------------------------------------------------------------- +#bytes #iterations BW peak[MB/sec] BW average[MB/sec] MsgRate[Mpps] +#bytes #iterations BW peak[MB/sec] BW average[MB/sec] MsgRate[Mpps] +2 2000 5.32 5.30 2.778732 +4 2000 10.65 10.64 2.788833 +8 2000 21.30 21.27 2.787609 +16 2000 42.60 42.55 2.788268 +32 2000 84.90 82.82 2.713896 +64 2000 173.55 171.66 2.812504 +128 2000 362.27 353.83 2.898535 +256 2000 687.82 679.37 2.782698 +512 2000 1337.12 1311.59 2.686135 +1024 2000 2674.25 2649.39 2.712980 +2048 2000 5248.56 5118.18 2.620509 +4096 2000 10034.02 9948.41 2.546793 +8192 2000 18620.51 12782.56 1.636168 +16384 2000 23115.27 16782.50 1.074080 +32768 2000 22927.94 18586.03 0.594753 +65536 2000 23330.56 21167.79 0.338685 +131072 2000 22750.35 21443.14 0.171545 +262144 2000 22673.63 22411.35 0.089645 +524288 2000 22679.02 22678.86 0.045358 +1048576 2000 22817.06 22816.86 0.022817 +2097152 2000 22919.37 22919.27 0.011460 +4194304 2000 23277.93 23277.91 0.005819 +8388608 2000 23240.68 23240.68 0.002905 +--------------------------------------------------------------------------------------- +8388608 2000 23240.68 23240.68 0.002905 +--------------------------------------------------------------------------------------- """ - assert (benchmark._process_raw_result(0, raw_output_AF)) - - self.__binary_file.unlink() - - # Condition with RDMA devices - else: - # Test for registry, preprocess and run. - parameters = '--ib_index 0 --numa 0 --n 2000' - context = BenchmarkRegistry.create_benchmark_context('rdma-loopback', parameters=parameters) - - assert (BenchmarkRegistry.is_benchmark_context_valid(context)) - benchmark = BenchmarkRegistry.launch_benchmark(context) - - # Check results and metrics. - assert (benchmark.run_count == 1) - assert (benchmark.return_code == ReturnCode.SUCCESS) - assert ('raw_output_0_IB0' in benchmark.raw_data) - assert (len(benchmark.raw_data['raw_output_0_IB0']) == 1) - assert (isinstance(benchmark.raw_data['raw_output_0_IB0'][0], str)) - - # Test for process_raw_data. - # Positive case - valid raw output. - metric_list = [] - message_sizes = ['8388608', '4194304', '2097152', '1048576'] - for ib_command in benchmark._args.commands: - for size in message_sizes: - metric = 'RDMA_{}_{}_{}_{}_{}_avg'.format( - str(benchmark._args.ib_index), benchmark._args.mode, size, str(benchmark._args.n), ib_command - ) - metric_list.append(metric) - for metric in metric_list: - assert (metric in benchmark.result) - assert (len(benchmark.result[metric]) == 1) - assert (isinstance(benchmark.result[metric][0], numbers.Number)) - - # Negative case - Add invalid raw output. - assert (benchmark._process_raw_result(0, 'Invalid raw output') is False) - - # Check basic information. - assert (benchmark.name == 'rdma-loopback') - assert (benchmark.type == BenchmarkType.MICRO) - assert (benchmark._bin_name == 'run_perftest_loopback') - - # Check parameters specified in BenchmarkContext. - assert (benchmark._args.ib_index == 0) - assert (benchmark._args.numa == 0) - assert (benchmark._args.n == 2000) - assert (benchmark._args.size == 8388608) - assert (benchmark._args.commands == ['ib_write_bw']) - assert (benchmark._args.mode == 'AF') + raw_output['S'] = """ + RDMA_Write BW Test + Dual-port : OFF Device : ibP257p0s0 + Number of qps : 1 Transport type : IB + Connection type : RC Using SRQ : OFF + PCIe relax order: ON + TX depth : 128 + CQ Moderation : 1 + Mtu : 4096[B] + Link type : IB + Max inline data : 0[B] + rdma_cm QPs : OFF + Data ex. method : Ethernet +--------------------------------------------------------------------------------------- + local address: LID 0xd06 QPN 0x095f PSN 0x3c9e82 RKey 0x080359 VAddr 0x007f9fc479c000 + remote address: LID 0xd06 QPN 0x095e PSN 0xbd024b RKey 0x080258 VAddr 0x007fe62504b000 +--------------------------------------------------------------------------------------- + #bytes #iterations BW peak[MB/sec] BW average[MB/sec] MsgRate[Mpps] + 8388608 20000 24056.74 24056.72 0.003007 +************************************ +* Waiting for client to connect... * +************************************ +--------------------------------------------------------------------------------------- + RDMA_Write BW Test + Dual-port : OFF Device : ibP257p0s0 + Number of qps : 1 Transport type : IB + Connection type : RC Using SRQ : OFF + PCIe relax order: ON + CQ Moderation : 1 + Mtu : 4096[B] + Link type : IB + Max inline data : 0[B] + rdma_cm QPs : OFF + Data ex. method : Ethernet +--------------------------------------------------------------------------------------- + local address: LID 0xd06 QPN 0x095e PSN 0xbd024b RKey 0x080258 VAddr 0x007fe62504b000 + remote address: LID 0xd06 QPN 0x095f PSN 0x3c9e82 RKey 0x080359 VAddr 0x007f9fc479c000 +--------------------------------------------------------------------------------------- + #bytes #iterations BW peak[MB/sec] BW average[MB/sec] MsgRate[Mpps] + 8388608 20000 24056.74 24056.72 0.003007 +--------------------------------------------------------------------------------------- + +--------------------------------------------------------------------------------------- +--------------------------------------------------------------------------------------- +""" + for mode in ['AF', 'S']: + # Test without RDMA devices + if (len(network.get_ib_devices()) < 1): + self.create_fake_bin() + # Check registry. + benchmark_name = 'rdma-loopback' + (benchmark_class, predefine_params + ) = BenchmarkRegistry._BenchmarkRegistry__select_benchmark(benchmark_name, Platform.CPU) + assert (benchmark_class) + + # Check preprocess + parameters = '--ib_index 0 --numa 0 --n 2000 --mode ' + mode + benchmark = benchmark_class(benchmark_name, parameters=parameters) + ret = benchmark._preprocess() + assert (ret is False) + assert (benchmark.return_code is ReturnCode.MICROBENCHMARK_DEVICE_GETTING_FAILURE) + + assert (benchmark._process_raw_result(0, raw_output[mode])) + + self.__binary_file.unlink() + + # Test with RDMA devices + else: + # Check registry, preprocess and run. + parameters = '--ib_index 0 --numa 0 --n 2000 --mode ' + mode + context = BenchmarkRegistry.create_benchmark_context('rdma-loopback', parameters=parameters) + + assert (BenchmarkRegistry.is_benchmark_context_valid(context)) + benchmark = BenchmarkRegistry.launch_benchmark(context) + + # Check raw_data. + assert (benchmark.run_count == 1) + assert (benchmark.return_code == ReturnCode.SUCCESS) + assert ('raw_output_0_IB0' in benchmark.raw_data) + assert (len(benchmark.raw_data['raw_output_0_IB0']) == 1) + assert (isinstance(benchmark.raw_data['raw_output_0_IB0'][0], str)) + + # Check function process_raw_data. + # Positive case - valid raw output. + metric_list = [] + message_sizes = [] + if mode == 'AF': + message_sizes = ['8388608', '4194304', '2097152', '1048576'] + elif mode == 'S': + message_sizes = [benchmark._args.size] + for ib_command in benchmark._args.commands: + for size in message_sizes: + metric = 'RDMA_{}_{}_{}_{}_{}_avg'.format( + str(benchmark._args.ib_index), benchmark._args.mode, size, str(benchmark._args.n), ib_command + ) + metric_list.append(metric) + for metric in metric_list: + assert (metric in benchmark.result) + assert (len(benchmark.result[metric]) == 1) + assert (isinstance(benchmark.result[metric][0], numbers.Number)) + + # Negative case - Add invalid raw output. + assert (benchmark._process_raw_result(0, 'Invalid raw output') is False) + + # Check basic information. + assert (benchmark.name == 'rdma-loopback') + assert (benchmark.type == BenchmarkType.MICRO) + assert (benchmark._bin_name == 'run_perftest_loopback') + + # Check parameters specified in BenchmarkContext. + assert (benchmark._args.ib_index == 0) + assert (benchmark._args.numa == 0) + assert (benchmark._args.n == 2000) + assert (benchmark._args.size == 8388608) + assert (benchmark._args.commands == ['ib_write_bw']) + assert (benchmark._args.mode == mode) From d1480d92a09e4176db1eb344f446306732ec03f5 Mon Sep 17 00:00:00 2001 From: yukirora Date: Thu, 1 Jul 2021 09:08:59 +0800 Subject: [PATCH 05/10] revise test a little bit --- .../test_rdma_loopback_performance.py | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/tests/benchmarks/micro_benchmarks/test_rdma_loopback_performance.py b/tests/benchmarks/micro_benchmarks/test_rdma_loopback_performance.py index dbd6aba4f..8314192af 100644 --- a/tests/benchmarks/micro_benchmarks/test_rdma_loopback_performance.py +++ b/tests/benchmarks/micro_benchmarks/test_rdma_loopback_performance.py @@ -14,14 +14,20 @@ class RDMALoopbackTest(unittest.TestCase): """Tests for RDMALoopback benchmark.""" - def create_fake_bin(self): + def setUp(self): """Method called to prepare the test fixture.""" - # Create fake binary file just for testing. - os.environ['SB_MICRO_PATH'] = '/tmp/superbench/' - binary_path = os.path.join(os.getenv('SB_MICRO_PATH'), 'bin') - Path(binary_path).mkdir(parents=True, exist_ok=True) - self.__binary_file = Path(os.path.join(binary_path, 'run_perftest_loopback')) - self.__binary_file.touch(mode=0o755, exist_ok=True) + if (len(network.get_ib_devices()) < 1): + # Create fake binary file just for testing. + os.environ['SB_MICRO_PATH'] = '/tmp/superbench/' + binary_path = os.path.join(os.getenv('SB_MICRO_PATH'), 'bin') + Path(binary_path).mkdir(parents=True, exist_ok=True) + self.__binary_file = Path(os.path.join(binary_path, 'run_perftest_loopback')) + self.__binary_file.touch(mode=0o755, exist_ok=True) + + def tearDown(self): + """Method called after the test method has been called and the result recorded.""" + if (len(network.get_ib_devices()) < 1): + self.__binary_file.unlink() def test_rdma_loopback_performance(self): """Test rdma-loopback benchmark.""" @@ -142,7 +148,6 @@ def test_rdma_loopback_performance(self): for mode in ['AF', 'S']: # Test without RDMA devices if (len(network.get_ib_devices()) < 1): - self.create_fake_bin() # Check registry. benchmark_name = 'rdma-loopback' (benchmark_class, predefine_params @@ -158,8 +163,6 @@ def test_rdma_loopback_performance(self): assert (benchmark._process_raw_result(0, raw_output[mode])) - self.__binary_file.unlink() - # Test with RDMA devices else: # Check registry, preprocess and run. From 18b935181d8733495e29a43d2fd0ca858fd645f2 Mon Sep 17 00:00:00 2001 From: root Date: Wed, 7 Jul 2021 07:37:17 +0000 Subject: [PATCH 06/10] add gitsubmodule for perftest --- .gitmodules | 3 +++ third_party/Makefile | 12 +++++++++--- third_party/perftest | 1 + 3 files changed, 13 insertions(+), 3 deletions(-) create mode 160000 third_party/perftest diff --git a/.gitmodules b/.gitmodules index 07f7559ce..4f3732b45 100644 --- a/.gitmodules +++ b/.gitmodules @@ -2,3 +2,6 @@ path = third_party/cutlass url = https://github.com/NVIDIA/cutlass.git branch = v2.4.0 +[submodule "third_party/perftest"] + path = third_party/perftest + url = https://github.com/linux-rdma/perftest.git diff --git a/third_party/Makefile b/third_party/Makefile index 64f3e6d8d..9839e0788 100755 --- a/third_party/Makefile +++ b/third_party/Makefile @@ -4,15 +4,21 @@ SB_MICRO_PATH ?= "/usr/local" -.PHONY: all cutlass +.PHONY: all cutlass perftest # Build all targets. -all: cutlass +all: cutlass perftest # Build cutlass. -cutlass: +cutlass: ifneq (,$(wildcard cutlass/CMakeLists.txt)) cmake -DCMAKE_INSTALL_BINDIR=$(SB_MICRO_PATH)/bin -DCMAKE_INSTALL_LIBDIR=$(SB_MICRO_PATH)/lib -DCMAKE_BUILD_TYPE=Release \ -DCUTLASS_NVCC_ARCHS='70;80' -DCUTLASS_ENABLE_EXAMPLES=OFF -DCUTLASS_ENABLE_TESTS=OFF -S ./cutlass -B ./cutlass/build cmake --build ./cutlass/build -j 8 --target install endif +# Build perftest. +perftest: +ifneq (,$(wildcard perftest/autogen.sh)) + cd perftest && ./autogen.sh && ./configure CUDA_H_PATH=/usr/local/cuda/include/cuda.h --prefix=$(SB_MICRO_PATH) && make -j && make install +endif + diff --git a/third_party/perftest b/third_party/perftest new file mode 160000 index 000000000..7504ce48a --- /dev/null +++ b/third_party/perftest @@ -0,0 +1 @@ +Subproject commit 7504ce48ac396a02f4d00de359257b2cb8458f06 From d688ed80cc0935a470f284da47a6776c72b97e2c Mon Sep 17 00:00:00 2001 From: yukirora Date: Wed, 7 Jul 2021 09:34:54 +0000 Subject: [PATCH 07/10] remove util related code --- superbench/common/utils/__init__.py | 2 +- superbench/common/utils/network.py | 34 ------------------- .../test_computation_communication_overlap.py | 3 +- .../micro_benchmarks/test_sharding_matmul.py | 3 +- tests/benchmarks/utils.py | 17 ++++++++-- 5 files changed, 18 insertions(+), 41 deletions(-) delete mode 100644 superbench/common/utils/network.py diff --git a/superbench/common/utils/__init__.py b/superbench/common/utils/__init__.py index a339d41fd..708eed456 100644 --- a/superbench/common/utils/__init__.py +++ b/superbench/common/utils/__init__.py @@ -9,4 +9,4 @@ nv_helper = LazyImport('superbench.common.utils.nvidia_helper') -__all__ = ['SuperBenchLogger', 'logger', 'create_output_dir', 'get_sb_config', 'LazyImport', 'nv_helper', 'network'] +__all__ = ['SuperBenchLogger', 'logger', 'create_output_dir', 'get_sb_config', 'LazyImport', 'nv_helper'] diff --git a/superbench/common/utils/network.py b/superbench/common/utils/network.py deleted file mode 100644 index f0b03695e..000000000 --- a/superbench/common/utils/network.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -"""Network Utility.""" - -import socket -import subprocess -from contextlib import closing - - -def get_free_port(): - """Get a free port in current system. - - Return: - port (int): a free port in current system. - """ - with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: - s.bind(('', 0)) - s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - return s.getsockname()[1] - - -def get_ib_devices(): - """Get available ordered IB devices in the system and filter ethernet devices.""" - command = "ibv_devinfo | awk '$1 ~ /hca_id/||/link_layer:/ {print $1,$2}' | awk '{print $2}'" - output = subprocess.run( - command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True, check=False, universal_newlines=True - ) - lines = output.stdout.splitlines() - ib_devices = [] - for i in range(len(lines) - 1): - if 'InfiniBand' in lines[i + 1] and 'InfiniBand' not in lines[i]: - ib_devices.append(lines[i]) - return ib_devices diff --git a/tests/benchmarks/micro_benchmarks/test_computation_communication_overlap.py b/tests/benchmarks/micro_benchmarks/test_computation_communication_overlap.py index 598bdab09..381dec3ac 100644 --- a/tests/benchmarks/micro_benchmarks/test_computation_communication_overlap.py +++ b/tests/benchmarks/micro_benchmarks/test_computation_communication_overlap.py @@ -7,7 +7,6 @@ from tests.helper import decorator import tests.benchmarks.utils as utils -from superbench.common.utils import network from superbench.benchmarks import BenchmarkRegistry, Framework, BenchmarkType, ReturnCode from superbench.benchmarks.micro_benchmarks.computation_communication_overlap \ import ComputationCommunicationOverlap, ComputationKernelType @@ -57,7 +56,7 @@ def test_pytorch_computation_communication_overlap_fake_distributed(): parameters='--num_warmup 5 --num_steps 10 --ratio 5', framework=Framework.PYTORCH ) - utils.setup_simulated_ddp_distributed_env(1, 0, network.get_free_port()) + utils.setup_simulated_ddp_distributed_env(1, 0, utils.get_free_port()) benchmark = BenchmarkRegistry.launch_benchmark(context) # Check basic information. diff --git a/tests/benchmarks/micro_benchmarks/test_sharding_matmul.py b/tests/benchmarks/micro_benchmarks/test_sharding_matmul.py index 03428d3b1..c828d7e18 100644 --- a/tests/benchmarks/micro_benchmarks/test_sharding_matmul.py +++ b/tests/benchmarks/micro_benchmarks/test_sharding_matmul.py @@ -5,7 +5,6 @@ import tests.benchmarks.utils as utils from tests.helper import decorator -from superbench.common.utils import network from superbench.benchmarks import BenchmarkRegistry, Platform, Framework, BenchmarkType, ReturnCode from superbench.benchmarks.micro_benchmarks.sharding_matmul import ShardingMatmul, ShardingMode @@ -23,7 +22,7 @@ def test_pytorch_sharding_matmul(): assert (BenchmarkRegistry.is_benchmark_context_valid(context)) - utils.setup_simulated_ddp_distributed_env(1, 0, network.get_free_port()) + utils.setup_simulated_ddp_distributed_env(1, 0, utils.get_free_port()) benchmark = BenchmarkRegistry.launch_benchmark(context) # Check basic information. diff --git a/tests/benchmarks/utils.py b/tests/benchmarks/utils.py index 87c46b1d8..3ef897c5e 100644 --- a/tests/benchmarks/utils.py +++ b/tests/benchmarks/utils.py @@ -4,11 +4,12 @@ """Utilities for benchmark tests.""" import os +import socket +from contextlib import closing import multiprocessing as multiprocessing from multiprocessing import Process from superbench.benchmarks import BenchmarkRegistry -from superbench.common.utils import network def clean_simulated_ddp_distributed_env(): @@ -20,6 +21,18 @@ def clean_simulated_ddp_distributed_env(): os.environ.pop('MASTER_PORT') +def get_free_port(): + """Get a free port in current system. + + Return: + port (int): a free port in current system. + """ + with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: + s.bind(('', 0)) + s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + return s.getsockname()[1] + + def setup_simulated_ddp_distributed_env(world_size, local_rank, port): """Function to setup the simulated DDP distributed envionment variables.""" os.environ['WORLD_SIZE'] = str(world_size) @@ -45,7 +58,7 @@ def simulated_ddp_distributed_benchmark(context, world_size): Return: results (list): list of benchmark results from #world_size number of processes. """ - port = network.get_free_port() + port = get_free_port() process_list = [] multiprocessing.set_start_method('spawn') From ebf068a2d7e915d80020aa51339b3d57edcb45e4 Mon Sep 17 00:00:00 2001 From: yukirora Date: Tue, 13 Jul 2021 16:23:36 +0800 Subject: [PATCH 08/10] rename from RDMA to IB --- ...formance.py => ib_loopback_performance.py} | 6 +++--- .../benchmarks/micro_benchmarks/__init__.py | 4 ++-- ...formance.py => ib_loopback_performance.py} | 10 +++++----- superbench/config/default.yaml | 2 +- ...nce.py => test_ib_loopback_performance.py} | 20 +++++++++---------- 5 files changed, 21 insertions(+), 21 deletions(-) rename examples/benchmarks/{rdma_loopback_performance.py => ib_loopback_performance.py} (70%) rename superbench/benchmarks/micro_benchmarks/{rdma_loopback_performance.py => ib_loopback_performance.py} (96%) rename tests/benchmarks/micro_benchmarks/{test_rdma_loopback_performance.py => test_ib_loopback_performance.py} (95%) diff --git a/examples/benchmarks/rdma_loopback_performance.py b/examples/benchmarks/ib_loopback_performance.py similarity index 70% rename from examples/benchmarks/rdma_loopback_performance.py rename to examples/benchmarks/ib_loopback_performance.py index 014089a8f..0d3b8433b 100644 --- a/examples/benchmarks/rdma_loopback_performance.py +++ b/examples/benchmarks/ib_loopback_performance.py @@ -1,17 +1,17 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. -"""Micro benchmark example for RDMA loopback performance. +"""Micro benchmark example for IB loopback performance. Commands to run: - python examples/benchmarks/rdma_loopback_performance_performance.py + python examples/benchmarks/ib_loopback_performance_performance.py """ from superbench.benchmarks import BenchmarkRegistry from superbench.common.utils import logger if __name__ == '__main__': - context = BenchmarkRegistry.create_benchmark_context('rdma-loopback') + context = BenchmarkRegistry.create_benchmark_context('ib-loopback') benchmark = BenchmarkRegistry.launch_benchmark(context) if benchmark: diff --git a/superbench/benchmarks/micro_benchmarks/__init__.py b/superbench/benchmarks/micro_benchmarks/__init__.py index 660eb5a9e..442d8e67d 100644 --- a/superbench/benchmarks/micro_benchmarks/__init__.py +++ b/superbench/benchmarks/micro_benchmarks/__init__.py @@ -10,9 +10,9 @@ from superbench.benchmarks.micro_benchmarks.cublas_function import CublasBenchmark from superbench.benchmarks.micro_benchmarks.cudnn_function import CudnnBenchmark from superbench.benchmarks.micro_benchmarks.gemm_flops_performance import GemmFlopsCuda -from superbench.benchmarks.micro_benchmarks.rdma_loopback_performance import RDMALoopback +from superbench.benchmarks.micro_benchmarks.ib_loopback_performance import IBLoopback __all__ = [ 'MicroBenchmark', 'MicroBenchmarkWithInvoke', 'ShardingMatmul', 'ComputationCommunicationOverlap', 'KernelLaunch', - 'CublasBenchmark', 'CudnnBenchmark', 'GemmFlopsCuda', 'RDMALoopback' + 'CublasBenchmark', 'CudnnBenchmark', 'GemmFlopsCuda', 'IBLoopback' ] diff --git a/superbench/benchmarks/micro_benchmarks/rdma_loopback_performance.py b/superbench/benchmarks/micro_benchmarks/ib_loopback_performance.py similarity index 96% rename from superbench/benchmarks/micro_benchmarks/rdma_loopback_performance.py rename to superbench/benchmarks/micro_benchmarks/ib_loopback_performance.py index a3d198c1a..cc23f0582 100644 --- a/superbench/benchmarks/micro_benchmarks/rdma_loopback_performance.py +++ b/superbench/benchmarks/micro_benchmarks/ib_loopback_performance.py @@ -1,7 +1,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. -"""Module of the RDMA loopback benchmarks.""" +"""Module of the IB loopback benchmarks.""" import os import subprocess @@ -12,8 +12,8 @@ from superbench.benchmarks.micro_benchmarks import MicroBenchmarkWithInvoke -class RDMALoopback(MicroBenchmarkWithInvoke): - """The RDMA loopback performance benchmark class.""" +class IBLoopback(MicroBenchmarkWithInvoke): + """The IB loopback performance benchmark class.""" def __init__(self, name, parameters=''): """Constructor. @@ -193,7 +193,7 @@ def _process_raw_result(self, cmd_idx, raw_output): if self.__message_sizes[i] in line: values = list(filter(None, line.split(' '))) avg_bw = float(values[-2]) - metric = 'RDMA_{}_{}_{}_{}_{}_avg'.format( + metric = 'IB_{}_{}_{}_{}_{}_avg'.format( str(self._args.ib_index), self._args.mode, self.__message_sizes[i], str(self._args.n), ib_command ) @@ -215,4 +215,4 @@ def _process_raw_result(self, cmd_idx, raw_output): return True -BenchmarkRegistry.register_benchmark('rdma-loopback', RDMALoopback) +BenchmarkRegistry.register_benchmark('ib-loopback', IBLoopback) diff --git a/superbench/config/default.yaml b/superbench/config/default.yaml index 5e43dd485..be6ce4777 100644 --- a/superbench/config/default.yaml +++ b/superbench/config/default.yaml @@ -27,7 +27,7 @@ superbench: model_action: - train benchmarks: - rdma-loopback: + ib-loopback: enable: true modes: - name: local diff --git a/tests/benchmarks/micro_benchmarks/test_rdma_loopback_performance.py b/tests/benchmarks/micro_benchmarks/test_ib_loopback_performance.py similarity index 95% rename from tests/benchmarks/micro_benchmarks/test_rdma_loopback_performance.py rename to tests/benchmarks/micro_benchmarks/test_ib_loopback_performance.py index 8314192af..37882327d 100644 --- a/tests/benchmarks/micro_benchmarks/test_rdma_loopback_performance.py +++ b/tests/benchmarks/micro_benchmarks/test_ib_loopback_performance.py @@ -1,7 +1,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -"""Tests for rdma-loopback benchmark.""" +"""Tests for ib-loopback benchmark.""" import os import numbers @@ -12,8 +12,8 @@ from superbench.common.utils import network -class RDMALoopbackTest(unittest.TestCase): - """Tests for RDMALoopback benchmark.""" +class IBLoopbackTest(unittest.TestCase): + """Tests for IBLoopback benchmark.""" def setUp(self): """Method called to prepare the test fixture.""" if (len(network.get_ib_devices()) < 1): @@ -29,8 +29,8 @@ def tearDown(self): if (len(network.get_ib_devices()) < 1): self.__binary_file.unlink() - def test_rdma_loopback_performance(self): - """Test rdma-loopback benchmark.""" + def test_ib_loopback_performance(self): + """Test ib-loopback benchmark.""" raw_output = {} raw_output['AF'] = """ ************************************ @@ -146,10 +146,10 @@ def test_rdma_loopback_performance(self): --------------------------------------------------------------------------------------- """ for mode in ['AF', 'S']: - # Test without RDMA devices + # Test without ib devices if (len(network.get_ib_devices()) < 1): # Check registry. - benchmark_name = 'rdma-loopback' + benchmark_name = 'ib-loopback' (benchmark_class, predefine_params ) = BenchmarkRegistry._BenchmarkRegistry__select_benchmark(benchmark_name, Platform.CPU) assert (benchmark_class) @@ -163,11 +163,11 @@ def test_rdma_loopback_performance(self): assert (benchmark._process_raw_result(0, raw_output[mode])) - # Test with RDMA devices + # Test with ib devices else: # Check registry, preprocess and run. parameters = '--ib_index 0 --numa 0 --n 2000 --mode ' + mode - context = BenchmarkRegistry.create_benchmark_context('rdma-loopback', parameters=parameters) + context = BenchmarkRegistry.create_benchmark_context('ib-loopback', parameters=parameters) assert (BenchmarkRegistry.is_benchmark_context_valid(context)) benchmark = BenchmarkRegistry.launch_benchmark(context) @@ -202,7 +202,7 @@ def test_rdma_loopback_performance(self): assert (benchmark._process_raw_result(0, 'Invalid raw output') is False) # Check basic information. - assert (benchmark.name == 'rdma-loopback') + assert (benchmark.name == 'ib-loopback') assert (benchmark.type == BenchmarkType.MICRO) assert (benchmark._bin_name == 'run_perftest_loopback') From 16e7c172b3dda5941a3e737f403e6cf4a6d853ad Mon Sep 17 00:00:00 2001 From: yukirora Date: Tue, 13 Jul 2021 17:50:52 +0800 Subject: [PATCH 09/10] fix test issue and rename metric --- .../benchmarks/micro_benchmarks/ib_loopback_performance.py | 5 ++--- .../micro_benchmarks/test_ib_loopback_performance.py | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/superbench/benchmarks/micro_benchmarks/ib_loopback_performance.py b/superbench/benchmarks/micro_benchmarks/ib_loopback_performance.py index cc23f0582..5e0288c90 100644 --- a/superbench/benchmarks/micro_benchmarks/ib_loopback_performance.py +++ b/superbench/benchmarks/micro_benchmarks/ib_loopback_performance.py @@ -193,9 +193,8 @@ def _process_raw_result(self, cmd_idx, raw_output): if self.__message_sizes[i] in line: values = list(filter(None, line.split(' '))) avg_bw = float(values[-2]) - metric = 'IB_{}_{}_{}_{}_{}_avg'.format( - str(self._args.ib_index), self._args.mode, self.__message_sizes[i], str(self._args.n), - ib_command + metric = 'IB_Avg_{}'.format( + str(self._args.ib_index) ) if metric not in metric_set: metric_set.add(metric) diff --git a/tests/benchmarks/micro_benchmarks/test_ib_loopback_performance.py b/tests/benchmarks/micro_benchmarks/test_ib_loopback_performance.py index 37882327d..147422ce7 100644 --- a/tests/benchmarks/micro_benchmarks/test_ib_loopback_performance.py +++ b/tests/benchmarks/micro_benchmarks/test_ib_loopback_performance.py @@ -189,8 +189,8 @@ def test_ib_loopback_performance(self): message_sizes = [benchmark._args.size] for ib_command in benchmark._args.commands: for size in message_sizes: - metric = 'RDMA_{}_{}_{}_{}_{}_avg'.format( - str(benchmark._args.ib_index), benchmark._args.mode, size, str(benchmark._args.n), ib_command + metric = 'IB_Avg_{}'.format( + str(benchmark._args.ib_index) ) metric_list.append(metric) for metric in metric_list: From 31779b76a57cbdac53df93cd983ce95f8df8f85a Mon Sep 17 00:00:00 2001 From: yukirora Date: Tue, 13 Jul 2021 17:53:09 +0800 Subject: [PATCH 10/10] format and lint --- .gitmodules | 3 --- .../benchmarks/micro_benchmarks/ib_loopback_performance.py | 5 +---- .../micro_benchmarks/test_ib_loopback_performance.py | 4 +--- third_party/Makefile | 1 - 4 files changed, 2 insertions(+), 11 deletions(-) diff --git a/.gitmodules b/.gitmodules index f6d5d46c2..4f3732b45 100644 --- a/.gitmodules +++ b/.gitmodules @@ -2,9 +2,6 @@ path = third_party/cutlass url = https://github.com/NVIDIA/cutlass.git branch = v2.4.0 -[submodule "third_party/cuda-samples"] - path = third_party/cuda-samples - url = https://github.com/NVIDIA/cuda-samples.git [submodule "third_party/perftest"] path = third_party/perftest url = https://github.com/linux-rdma/perftest.git diff --git a/superbench/benchmarks/micro_benchmarks/ib_loopback_performance.py b/superbench/benchmarks/micro_benchmarks/ib_loopback_performance.py index 5e0288c90..39fde4748 100644 --- a/superbench/benchmarks/micro_benchmarks/ib_loopback_performance.py +++ b/superbench/benchmarks/micro_benchmarks/ib_loopback_performance.py @@ -181,7 +181,6 @@ def _process_raw_result(self, cmd_idx, raw_output): Return: True if the raw output string is valid and result can be extracted. """ - ib_command = self._args.commands[cmd_idx] self._result.add_raw_data('raw_output_' + str(cmd_idx) + '_IB' + str(self._args.ib_index), raw_output) valid = False @@ -193,9 +192,7 @@ def _process_raw_result(self, cmd_idx, raw_output): if self.__message_sizes[i] in line: values = list(filter(None, line.split(' '))) avg_bw = float(values[-2]) - metric = 'IB_Avg_{}'.format( - str(self._args.ib_index) - ) + metric = 'IB_Avg_{}'.format(str(self._args.ib_index)) if metric not in metric_set: metric_set.add(metric) self._result.add_result(metric, avg_bw) diff --git a/tests/benchmarks/micro_benchmarks/test_ib_loopback_performance.py b/tests/benchmarks/micro_benchmarks/test_ib_loopback_performance.py index 147422ce7..093f41140 100644 --- a/tests/benchmarks/micro_benchmarks/test_ib_loopback_performance.py +++ b/tests/benchmarks/micro_benchmarks/test_ib_loopback_performance.py @@ -189,9 +189,7 @@ def test_ib_loopback_performance(self): message_sizes = [benchmark._args.size] for ib_command in benchmark._args.commands: for size in message_sizes: - metric = 'IB_Avg_{}'.format( - str(benchmark._args.ib_index) - ) + metric = 'IB_Avg_{}'.format(str(benchmark._args.ib_index)) metric_list.append(metric) for metric in metric_list: assert (metric in benchmark.result) diff --git a/third_party/Makefile b/third_party/Makefile index 2282b400c..0a96030bc 100755 --- a/third_party/Makefile +++ b/third_party/Makefile @@ -34,6 +34,5 @@ endif # The version we use is v4.5-0.2, which is the latest release tag of perftest perftest: ifneq (,$(wildcard perftest/autogen.sh)) - cd perftest && git checkout v4.5-0.2 cd perftest && ./autogen.sh && ./configure CUDA_H_PATH=/usr/local/cuda/include/cuda.h --prefix=$(SB_MICRO_PATH) && make -j && make install endif \ No newline at end of file