diff --git a/dockerfile/cuda11.1.1.dockerfile b/dockerfile/cuda11.1.1.dockerfile index d8dcf5e8b..ae8e695e8 100644 --- a/dockerfile/cuda11.1.1.dockerfile +++ b/dockerfile/cuda11.1.1.dockerfile @@ -36,6 +36,7 @@ RUN apt-get update && \ util-linux \ vim \ wget \ + numactl \ && \ apt-get autoremove && \ apt-get clean && \ diff --git a/examples/benchmarks/ib_loopback_performance.py b/examples/benchmarks/ib_loopback_performance.py new file mode 100644 index 000000000..0d3b8433b --- /dev/null +++ b/examples/benchmarks/ib_loopback_performance.py @@ -0,0 +1,22 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""Micro benchmark example for IB loopback performance. + +Commands to run: + python examples/benchmarks/ib_loopback_performance_performance.py +""" + +from superbench.benchmarks import BenchmarkRegistry +from superbench.common.utils import logger + +if __name__ == '__main__': + context = BenchmarkRegistry.create_benchmark_context('ib-loopback') + + benchmark = BenchmarkRegistry.launch_benchmark(context) + if benchmark: + logger.info( + 'benchmark: {}, return code: {}, result: {}'.format( + benchmark.name, benchmark.return_code, benchmark.result + ) + ) diff --git a/superbench/benchmarks/micro_benchmarks/__init__.py b/superbench/benchmarks/micro_benchmarks/__init__.py index 5f3b76ac8..a257cba36 100644 --- a/superbench/benchmarks/micro_benchmarks/__init__.py +++ b/superbench/benchmarks/micro_benchmarks/__init__.py @@ -11,8 +11,9 @@ from superbench.benchmarks.micro_benchmarks.cudnn_function import CudnnBenchmark from superbench.benchmarks.micro_benchmarks.gemm_flops_performance import GemmFlopsCuda from superbench.benchmarks.micro_benchmarks.cuda_memory_bw_performance import CudaMemBwBenchmark +from superbench.benchmarks.micro_benchmarks.ib_loopback_performance import IBLoopback __all__ = [ 'MicroBenchmark', 'MicroBenchmarkWithInvoke', 'ShardingMatmul', 'ComputationCommunicationOverlap', 'KernelLaunch', - 'CublasBenchmark', 'CudnnBenchmark', 'GemmFlopsCuda', 'CudaMemBwBenchmark' + 'CublasBenchmark', 'CudnnBenchmark', 'GemmFlopsCuda', 'CudaMemBwBenchmark', 'IBLoopback' ] diff --git a/superbench/benchmarks/micro_benchmarks/ib_loopback_performance.py b/superbench/benchmarks/micro_benchmarks/ib_loopback_performance.py new file mode 100644 index 000000000..39fde4748 --- /dev/null +++ b/superbench/benchmarks/micro_benchmarks/ib_loopback_performance.py @@ -0,0 +1,214 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""Module of the IB loopback benchmarks.""" + +import os +import subprocess + +from superbench.common.utils import logger +from superbench.common.utils import network +from superbench.benchmarks import BenchmarkRegistry, ReturnCode +from superbench.benchmarks.micro_benchmarks import MicroBenchmarkWithInvoke + + +class IBLoopback(MicroBenchmarkWithInvoke): + """The IB loopback performance benchmark class.""" + def __init__(self, name, parameters=''): + """Constructor. + + Args: + name (str): benchmark name. + parameters (str): benchmark parameters. + """ + super().__init__(name, parameters) + + self._bin_name = 'run_perftest_loopback' + self.__support_ib_commands = ['ib_write_bw', 'ib_read_bw', 'ib_send_bw'] + self.__message_sizes = ['8388608', '4194304', '2097152', '1048576'] + + def add_parser_arguments(self): + """Add the specified arguments.""" + super().add_parser_arguments() + + self._parser.add_argument( + '--ib_index', + type=int, + default=0, + required=False, + help='The index of ib device.', + ) + self._parser.add_argument( + '--n', + type=int, + default=20000, + required=False, + help='The iterations of running ib command', + ) + self._parser.add_argument( + '--size', + type=int, + default=8388608, + required=False, + help='The message size of running ib command. E.g. {}.'.format(' '.join(self.__message_sizes)), + ) + self._parser.add_argument( + '--commands', + type=str, + nargs='+', + default='ib_write_bw', + help='The ib command used to run. E.g. {}.'.format(' '.join(self.__support_ib_commands)), + ) + self._parser.add_argument( + '--mode', + type=str, + default='AF', + help='The mode used to run ib command. Eg, AF(all message size) or S(single message size)', + ) + self._parser.add_argument( + '--numa', + type=int, + default=0, + required=False, + help='The index of numa node.', + ) + + def __get_numa_cores(self, numa_index): + """Get the last two cores from different physical cpu core of NUMA. + + Args: + numa_index (int): the index of numa node. + + Return: + The last two cores from different physical cpu core of NUMA. + """ + command = 'numactl --hardware | grep "node {} cpus:"'.format(numa_index) + output = subprocess.run( + command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True, check=False, universal_newlines=True + ) + return output.stdout.splitlines()[0].split(' ') + + def __get_arguments_from_env(self): + """Read environment variables from runner used for parallel and fill in ib_index and numa_node_index. + + Get 'PROC_RANK'(rank of current process) 'IB_DEVICES' 'NUMA_NODES' environment variables + Get ib_index and numa_node_index according to 'NUMA_NODES'['PROC_RANK'] and 'IB_DEVICES'['PROC_RANK'] + """ + if os.getenv('PROC_RANK'): + rank = int(os.getenv('PROC_RANK')) + if os.getenv('IB_DEVICES'): + self._args.ib_index = int(os.getenv('IB_DEVICES').split(',')[rank]) + if os.getenv('NUMA_NODES'): + self._args.numa = int(os.getenv('NUMA_NODES').split(',')[rank]) + + def _preprocess(self): + """Preprocess/preparation operations before the benchmarking. + + Return: + True if _preprocess() succeed. + """ + if not super()._preprocess(): + return False + + self.__get_arguments_from_env() + + # Format the arguments + if not isinstance(self._args.commands, list): + self._args.commands = [self._args.commands] + self._args.commands = [command.lower() for command in self._args.commands] + self._args.mode = self._args.mode.upper() + + # Check whether arguments are valid + if str(self._args.size) not in self.__message_sizes: + self._result.set_return_code(ReturnCode.INVALID_ARGUMENT) + logger.error( + 'Unsupported message size - benchmark: {}, size: {}, expect: {}.'.format( + self._name, self._args.size, self.__message_sizes + ) + ) + return False + command_mode = '' + if self._args.mode == 'AF': + command_mode = ' -a' + elif self._args.mode == 'S': + command_mode = ' -s ' + str(self._args.size) + else: + self._result.set_return_code(ReturnCode.INVALID_ARGUMENT) + logger.error( + 'Unsupported args mode - benchmark: {}, mode: {}, expect: {}.'.format( + self._name, self._args.mode, 'AF or S' + ) + ) + return False + + for ib_command in self._args.commands: + if ib_command not in self.__support_ib_commands: + self._result.set_return_code(ReturnCode.INVALID_ARGUMENT) + logger.error( + 'Unsupported ib command - benchmark: {}, command: {}, expected: {}.'.format( + self._name, ib_command, self.__support_ib_commands + ) + ) + return False + else: + try: + command = os.path.join(self._args.bin_dir, self._bin_name) + numa_cores = self.__get_numa_cores(self._args.numa) + server_core = int(numa_cores[-1]) + client_core = int(numa_cores[-3]) + command += ' ' + str(server_core) + ' ' + str(client_core) + command += ' ' + ib_command + command += command_mode + ' -F' + command += ' --iters=' + str(self._args.n) + command += ' -d ' + network.get_ib_devices()[self._args.ib_index] + command += ' -p ' + str(network.get_free_port()) + self._commands.append(command) + except BaseException as e: + self._result.set_return_code(ReturnCode.MICROBENCHMARK_DEVICE_GETTING_FAILURE) + logger.error('Getting devices failure - benchmark: {}, message: {}.'.format(self._name, str(e))) + return False + return True + + def _process_raw_result(self, cmd_idx, raw_output): + """Function to parse raw results and save the summarized results. + + self._result.add_raw_data() and self._result.add_result() need to be called to save the results. + + Args: + cmd_idx (int): the index of command corresponding with the raw_output. + raw_output (str): raw output string of the micro-benchmark. + + Return: + True if the raw output string is valid and result can be extracted. + """ + self._result.add_raw_data('raw_output_' + str(cmd_idx) + '_IB' + str(self._args.ib_index), raw_output) + + valid = False + content = raw_output.splitlines() + try: + metric_set = set() + for line in content: + for i in range(len(self.__message_sizes)): + if self.__message_sizes[i] in line: + values = list(filter(None, line.split(' '))) + avg_bw = float(values[-2]) + metric = 'IB_Avg_{}'.format(str(self._args.ib_index)) + if metric not in metric_set: + metric_set.add(metric) + self._result.add_result(metric, avg_bw) + valid = True + except BaseException: + valid = False + finally: + if valid is False: + logger.error( + 'The result format is invalid - round: {}, benchmark: {}, raw output: {}.'.format( + self._curr_run_index, self._name, raw_output + ) + ) + return False + + return True + + +BenchmarkRegistry.register_benchmark('ib-loopback', IBLoopback) diff --git a/superbench/benchmarks/return_code.py b/superbench/benchmarks/return_code.py index 0991ddb22..da207d01a 100644 --- a/superbench/benchmarks/return_code.py +++ b/superbench/benchmarks/return_code.py @@ -28,3 +28,4 @@ class ReturnCode(Enum): MICROBENCHMARK_EXECUTION_FAILURE = 32 MICROBENCHMARK_RESULT_PARSING_FAILURE = 33 MICROBENCHMARK_UNSUPPORTED_ARCHITECTURE = 34 + MICROBENCHMARK_DEVICE_GETTING_FAILURE = 35 diff --git a/superbench/config/default.yaml b/superbench/config/default.yaml index 1b15f7c2a..a3db4b6ab 100644 --- a/superbench/config/default.yaml +++ b/superbench/config/default.yaml @@ -28,6 +28,16 @@ superbench: model_action: - train benchmarks: + ib-loopback: + enable: true + modes: + - name: local + proc_num: 4 + prefix: PROC_RANK={proc_rank} IB_DEVICES=0,2,4,6 NUMA_NODES=1,0,3,2 + parallel: yes + - name: local + proc_num: 4 + prefix: PROC_RANK={proc_rank} IB_DEVICES=1,3,5,7 NUMA_NODES=1,0,3,2 mem-bw: enable: true modes: diff --git a/tests/benchmarks/micro_benchmarks/test_ib_loopback_performance.py b/tests/benchmarks/micro_benchmarks/test_ib_loopback_performance.py new file mode 100644 index 000000000..093f41140 --- /dev/null +++ b/tests/benchmarks/micro_benchmarks/test_ib_loopback_performance.py @@ -0,0 +1,213 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""Tests for ib-loopback benchmark.""" + +import os +import numbers +import unittest +from pathlib import Path + +from superbench.benchmarks import BenchmarkRegistry, ReturnCode, Platform, BenchmarkType +from superbench.common.utils import network + + +class IBLoopbackTest(unittest.TestCase): + """Tests for IBLoopback benchmark.""" + def setUp(self): + """Method called to prepare the test fixture.""" + if (len(network.get_ib_devices()) < 1): + # Create fake binary file just for testing. + os.environ['SB_MICRO_PATH'] = '/tmp/superbench/' + binary_path = os.path.join(os.getenv('SB_MICRO_PATH'), 'bin') + Path(binary_path).mkdir(parents=True, exist_ok=True) + self.__binary_file = Path(os.path.join(binary_path, 'run_perftest_loopback')) + self.__binary_file.touch(mode=0o755, exist_ok=True) + + def tearDown(self): + """Method called after the test method has been called and the result recorded.""" + if (len(network.get_ib_devices()) < 1): + self.__binary_file.unlink() + + def test_ib_loopback_performance(self): + """Test ib-loopback benchmark.""" + raw_output = {} + raw_output['AF'] = """ +************************************ +* Waiting for client to connect... * +************************************ +--------------------------------------------------------------------------------------- + RDMA_Write BW Test +Dual-port : OFF Device : ibP257p0s0 +Number of qps : 1 Transport type : IB +Connection type : RC Using SRQ : OFF +PCIe relax order: ON +--------------------------------------------------------------------------------------- + RDMA_Write BW Test +Dual-port : OFF Device : ibP257p0s0 +Number of qps : 1 Transport type : IB +Connection type : RC Using SRQ : OFF +PCIe relax order: ON +ibv_wr* API : ON +TX depth : 128 +CQ Moderation : 100 +Mtu : 4096[B] +Link type : IB +Max inline data : 0[B] +rdma_cm QPs : OFF +Data ex. method : Ethernet +--------------------------------------------------------------------------------------- +ibv_wr* API : ON +CQ Moderation : 100 +Mtu : 4096[B] +Link type : IB +Max inline data : 0[B] +rdma_cm QPs : OFF +Data ex. method : Ethernet +--------------------------------------------------------------------------------------- +local address: LID 0xd06 QPN 0x092f PSN 0x3ff1bc RKey 0x080329 VAddr 0x007fc97ff50000 +local address: LID 0xd06 QPN 0x092e PSN 0x3eb82d RKey 0x080228 VAddr 0x007f19adcbf000 +remote address: LID 0xd06 QPN 0x092e PSN 0x3eb82d RKey 0x080228 VAddr 0x007f19adcbf000 +remote address: LID 0xd06 QPN 0x092f PSN 0x3ff1bc RKey 0x080329 VAddr 0x007fc97ff50000 +--------------------------------------------------------------------------------------- +--------------------------------------------------------------------------------------- +#bytes #iterations BW peak[MB/sec] BW average[MB/sec] MsgRate[Mpps] +#bytes #iterations BW peak[MB/sec] BW average[MB/sec] MsgRate[Mpps] +2 2000 5.32 5.30 2.778732 +4 2000 10.65 10.64 2.788833 +8 2000 21.30 21.27 2.787609 +16 2000 42.60 42.55 2.788268 +32 2000 84.90 82.82 2.713896 +64 2000 173.55 171.66 2.812504 +128 2000 362.27 353.83 2.898535 +256 2000 687.82 679.37 2.782698 +512 2000 1337.12 1311.59 2.686135 +1024 2000 2674.25 2649.39 2.712980 +2048 2000 5248.56 5118.18 2.620509 +4096 2000 10034.02 9948.41 2.546793 +8192 2000 18620.51 12782.56 1.636168 +16384 2000 23115.27 16782.50 1.074080 +32768 2000 22927.94 18586.03 0.594753 +65536 2000 23330.56 21167.79 0.338685 +131072 2000 22750.35 21443.14 0.171545 +262144 2000 22673.63 22411.35 0.089645 +524288 2000 22679.02 22678.86 0.045358 +1048576 2000 22817.06 22816.86 0.022817 +2097152 2000 22919.37 22919.27 0.011460 +4194304 2000 23277.93 23277.91 0.005819 +8388608 2000 23240.68 23240.68 0.002905 +--------------------------------------------------------------------------------------- +8388608 2000 23240.68 23240.68 0.002905 +--------------------------------------------------------------------------------------- + """ + raw_output['S'] = """ + RDMA_Write BW Test + Dual-port : OFF Device : ibP257p0s0 + Number of qps : 1 Transport type : IB + Connection type : RC Using SRQ : OFF + PCIe relax order: ON + TX depth : 128 + CQ Moderation : 1 + Mtu : 4096[B] + Link type : IB + Max inline data : 0[B] + rdma_cm QPs : OFF + Data ex. method : Ethernet +--------------------------------------------------------------------------------------- + local address: LID 0xd06 QPN 0x095f PSN 0x3c9e82 RKey 0x080359 VAddr 0x007f9fc479c000 + remote address: LID 0xd06 QPN 0x095e PSN 0xbd024b RKey 0x080258 VAddr 0x007fe62504b000 +--------------------------------------------------------------------------------------- + #bytes #iterations BW peak[MB/sec] BW average[MB/sec] MsgRate[Mpps] + 8388608 20000 24056.74 24056.72 0.003007 +************************************ +* Waiting for client to connect... * +************************************ +--------------------------------------------------------------------------------------- + RDMA_Write BW Test + Dual-port : OFF Device : ibP257p0s0 + Number of qps : 1 Transport type : IB + Connection type : RC Using SRQ : OFF + PCIe relax order: ON + CQ Moderation : 1 + Mtu : 4096[B] + Link type : IB + Max inline data : 0[B] + rdma_cm QPs : OFF + Data ex. method : Ethernet +--------------------------------------------------------------------------------------- + local address: LID 0xd06 QPN 0x095e PSN 0xbd024b RKey 0x080258 VAddr 0x007fe62504b000 + remote address: LID 0xd06 QPN 0x095f PSN 0x3c9e82 RKey 0x080359 VAddr 0x007f9fc479c000 +--------------------------------------------------------------------------------------- + #bytes #iterations BW peak[MB/sec] BW average[MB/sec] MsgRate[Mpps] + 8388608 20000 24056.74 24056.72 0.003007 +--------------------------------------------------------------------------------------- + +--------------------------------------------------------------------------------------- +--------------------------------------------------------------------------------------- +""" + for mode in ['AF', 'S']: + # Test without ib devices + if (len(network.get_ib_devices()) < 1): + # Check registry. + benchmark_name = 'ib-loopback' + (benchmark_class, predefine_params + ) = BenchmarkRegistry._BenchmarkRegistry__select_benchmark(benchmark_name, Platform.CPU) + assert (benchmark_class) + + # Check preprocess + parameters = '--ib_index 0 --numa 0 --n 2000 --mode ' + mode + benchmark = benchmark_class(benchmark_name, parameters=parameters) + ret = benchmark._preprocess() + assert (ret is False) + assert (benchmark.return_code is ReturnCode.MICROBENCHMARK_DEVICE_GETTING_FAILURE) + + assert (benchmark._process_raw_result(0, raw_output[mode])) + + # Test with ib devices + else: + # Check registry, preprocess and run. + parameters = '--ib_index 0 --numa 0 --n 2000 --mode ' + mode + context = BenchmarkRegistry.create_benchmark_context('ib-loopback', parameters=parameters) + + assert (BenchmarkRegistry.is_benchmark_context_valid(context)) + benchmark = BenchmarkRegistry.launch_benchmark(context) + + # Check raw_data. + assert (benchmark.run_count == 1) + assert (benchmark.return_code == ReturnCode.SUCCESS) + assert ('raw_output_0_IB0' in benchmark.raw_data) + assert (len(benchmark.raw_data['raw_output_0_IB0']) == 1) + assert (isinstance(benchmark.raw_data['raw_output_0_IB0'][0], str)) + + # Check function process_raw_data. + # Positive case - valid raw output. + metric_list = [] + message_sizes = [] + if mode == 'AF': + message_sizes = ['8388608', '4194304', '2097152', '1048576'] + elif mode == 'S': + message_sizes = [benchmark._args.size] + for ib_command in benchmark._args.commands: + for size in message_sizes: + metric = 'IB_Avg_{}'.format(str(benchmark._args.ib_index)) + metric_list.append(metric) + for metric in metric_list: + assert (metric in benchmark.result) + assert (len(benchmark.result[metric]) == 1) + assert (isinstance(benchmark.result[metric][0], numbers.Number)) + + # Negative case - Add invalid raw output. + assert (benchmark._process_raw_result(0, 'Invalid raw output') is False) + + # Check basic information. + assert (benchmark.name == 'ib-loopback') + assert (benchmark.type == BenchmarkType.MICRO) + assert (benchmark._bin_name == 'run_perftest_loopback') + + # Check parameters specified in BenchmarkContext. + assert (benchmark._args.ib_index == 0) + assert (benchmark._args.numa == 0) + assert (benchmark._args.n == 2000) + assert (benchmark._args.size == 8388608) + assert (benchmark._args.commands == ['ib_write_bw']) + assert (benchmark._args.mode == mode)