Skip to content

Commit

Permalink
add ib loopback benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
yukirora committed Jul 16, 2021
1 parent 419dea2 commit 363d365
Show file tree
Hide file tree
Showing 7 changed files with 455 additions and 1 deletion.
1 change: 1 addition & 0 deletions dockerfile/cuda11.1.1.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ RUN apt-get update && \
util-linux \
vim \
wget \
numactl \
&& \
apt-get autoremove && \
apt-get clean && \
Expand Down
22 changes: 22 additions & 0 deletions examples/benchmarks/ib_loopback_performance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

"""Micro benchmark example for IB loopback performance.
Commands to run:
python examples/benchmarks/ib_loopback_performance_performance.py
"""

from superbench.benchmarks import BenchmarkRegistry
from superbench.common.utils import logger

if __name__ == '__main__':
context = BenchmarkRegistry.create_benchmark_context('ib-loopback')

benchmark = BenchmarkRegistry.launch_benchmark(context)
if benchmark:
logger.info(
'benchmark: {}, return code: {}, result: {}'.format(
benchmark.name, benchmark.return_code, benchmark.result
)
)
3 changes: 2 additions & 1 deletion superbench/benchmarks/micro_benchmarks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@
from superbench.benchmarks.micro_benchmarks.cudnn_function import CudnnBenchmark
from superbench.benchmarks.micro_benchmarks.gemm_flops_performance import GemmFlopsCuda
from superbench.benchmarks.micro_benchmarks.cuda_memory_bw_performance import CudaMemBwBenchmark
from superbench.benchmarks.micro_benchmarks.ib_loopback_performance import IBLoopback

__all__ = [
'MicroBenchmark', 'MicroBenchmarkWithInvoke', 'ShardingMatmul', 'ComputationCommunicationOverlap', 'KernelLaunch',
'CublasBenchmark', 'CudnnBenchmark', 'GemmFlopsCuda', 'CudaMemBwBenchmark'
'CublasBenchmark', 'CudnnBenchmark', 'GemmFlopsCuda', 'CudaMemBwBenchmark', 'IBLoopback'
]
206 changes: 206 additions & 0 deletions superbench/benchmarks/micro_benchmarks/ib_loopback_performance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

"""Module of the IB loopback benchmarks."""

import os
import subprocess

from superbench.common.utils import logger
from superbench.common.utils import network
from superbench.benchmarks import BenchmarkRegistry, ReturnCode
from superbench.benchmarks.micro_benchmarks import MicroBenchmarkWithInvoke


class IBLoopback(MicroBenchmarkWithInvoke):
"""The IB loopback performance benchmark class."""
def __init__(self, name, parameters=''):
"""Constructor.
Args:
name (str): benchmark name.
parameters (str): benchmark parameters.
"""
super().__init__(name, parameters)

self._bin_name = 'run_perftest_loopback'
self.__support_ib_commands = ['ib_write_bw', 'ib_read_bw', 'ib_send_bw']
self.__message_sizes = ['8388608', '4194304', '2097152', '1048576']

def add_parser_arguments(self):
"""Add the specified arguments."""
super().add_parser_arguments()

self._parser.add_argument(
'--ib_index',
type=int,
default=0,
required=False,
help='The index of ib device.',
)
self._parser.add_argument(
'--n',
type=int,
default=20000,
required=False,
help='The iterations of running ib command',
)
self._parser.add_argument(
'--size',
type=int,
default=8388608,
required=False,
help='The message size of running ib command. E.g. {}.'.format(' '.join(self.__message_sizes)),
)
self._parser.add_argument(
'--commands',
type=str,
nargs='+',
default='ib_write_bw',
help='The ib command used to run. E.g. {}.'.format(' '.join(self.__support_ib_commands)),
)
self._parser.add_argument(
'--mode',
type=str,
default='AF',
help='The mode used to run ib command. Eg, AF(all message size) or S(single message size)',
)
self._parser.add_argument(
'--numa',
type=int,
default=0,
required=False,
help='The index of numa node.',
)

def __get_numa_cores(self, numa_index):
"""Get the last two cores from different physical cpu core of NUMA<numa_index>.
Args:
numa_index (int): the index of numa node.
Return:
The last two cores from different physical cpu core of NUMA<numa_index>.
"""
command = 'numactl --hardware | grep "node {} cpus:"'.format(numa_index)
output = subprocess.run(
command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True, check=False, universal_newlines=True
)
return output.stdout.splitlines()[0].split(' ')

def __get_arguments_from_env(self):
"""Read environment variables from runner used for parallel and fill in ib_index and numa_node_index.
Get 'PROC_RANK'(rank of current process) 'IB_DEVICES' 'NUMA_NODES' environment variables
Get ib_index and numa_node_index according to 'NUMA_NODES'['PROC_RANK'] and 'IB_DEVICES'['PROC_RANK']
"""
if os.getenv('PROC_RANK'):
rank = int(os.getenv('PROC_RANK'))
if os.getenv('IB_DEVICES'):
self._args.ib_index = int(os.getenv('IB_DEVICES').split(',')[rank])
if os.getenv('NUMA_NODES'):
self._args.numa = int(os.getenv('NUMA_NODES').split(',')[rank])

def _preprocess(self):
"""Preprocess/preparation operations before the benchmarking.
Return:
True if _preprocess() succeed.
"""
if not super()._preprocess():
return False

self.__get_arguments_from_env()

# Format the arguments
if not isinstance(self._args.commands, list):
self._args.commands = [self._args.commands]
self._args.commands = [command.lower() for command in self._args.commands]
self._args.mode = self._args.mode.upper()

# Check whether arguments are valid
if str(self._args.size) not in self.__message_sizes:
self._result.set_return_code(ReturnCode.INVALID_ARGUMENT)
logger.error(
'Unsupported message size - benchmark: {}, size: {}, expect: {}.'.format(
self._name, self._args.size, self.__message_sizes
)
)
return False
command_mode = ''
if self._args.mode == 'AF':
command_mode = ' -a'
elif self._args.mode == 'S':
command_mode = ' -s ' + str(self._args.size)
else:
self._result.set_return_code(ReturnCode.INVALID_ARGUMENT)
logger.error(
'Unsupported args mode - benchmark: {}, mode: {}, expect: {}.'.format(
self._name, self._args.mode, 'AF or S'
)
)
return False

for ib_command in self._args.commands:
if ib_command not in self.__support_ib_commands:
self._result.set_return_code(ReturnCode.INVALID_ARGUMENT)
logger.error(
'Unsupported ib command - benchmark: {}, command: {}, expected: {}.'.format(
self._name, ib_command, self.__support_ib_commands
)
)
return False
else:
try:
command = os.path.join(self._args.bin_dir, self._bin_name)
numa_cores = self.__get_numa_cores(self._args.numa)
server_core = int(numa_cores[-1])
client_core = int(numa_cores[-3])
command += ' ' + str(server_core) + ' ' + str(client_core)
command += ' ' + ib_command
command += command_mode + ' -F'
command += ' --iters=' + str(self._args.n)
command += ' -d ' + network.get_ib_devices()[self._args.ib_index]
command += ' -p ' + str(network.get_free_port())
self._commands.append(command)
except BaseException as e:
self._result.set_return_code(ReturnCode.MICROBENCHMARK_DEVICE_GETTING_FAILURE)
logger.error('Getting devices failure - benchmark: {}, message: {}.'.format(self._name, str(e)))
return False
return True

def _process_raw_result(self, cmd_idx, raw_output):
"""Function to parse raw results and save the summarized results.
self._result.add_raw_data() and self._result.add_result() need to be called to save the results.
Args:
cmd_idx (int): the index of command corresponding with the raw_output.
raw_output (str): raw output string of the micro-benchmark.
Return:
True if the raw output string is valid and result can be extracted.
"""
self._result.add_raw_data('raw_output_' + str(cmd_idx) + '_IB' + str(self._args.ib_index), raw_output)

valid = False
content = raw_output.splitlines()
try:
metric_set = set()
for line in content:
for i in range(len(self.__message_sizes)):
if self.__message_sizes[i] in line:
values = list(filter(None, line.split(' ')))
avg_bw = float(values[-2])
metric = 'IB_Avg_{}'.format(str(self._args.ib_index))
if metric not in metric_set:
metric_set.add(metric)
self._result.add_result(metric, avg_bw)
valid = True
except BaseException:
valid = False
finally:
if valid is False:
logger.error(
'The result format is invalid - round: {}, benchmark: {}, raw output: {}.'.format(
self._curr_run_index, self._name, raw_output
)
)
return False

return True


BenchmarkRegistry.register_benchmark('ib-loopback', IBLoopback)
1 change: 1 addition & 0 deletions superbench/benchmarks/return_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,4 @@ class ReturnCode(Enum):
MICROBENCHMARK_EXECUTION_FAILURE = 32
MICROBENCHMARK_RESULT_PARSING_FAILURE = 33
MICROBENCHMARK_UNSUPPORTED_ARCHITECTURE = 34
MICROBENCHMARK_DEVICE_GETTING_FAILURE = 35
10 changes: 10 additions & 0 deletions superbench/config/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,16 @@ superbench:
model_action:
- train
benchmarks:
ib-loopback:
enable: true
modes:
- name: local
proc_num: 4
prefix: PROC_RANK={proc_rank} IB_DEVICES=0,2,4,6 NUMA_NODES=1,0,3,2
parallel: yes
- name: local
proc_num: 4
prefix: PROC_RANK={proc_rank} IB_DEVICES=1,3,5,7 NUMA_NODES=1,0,3,2
mem-bw:
enable: true
modes:
Expand Down

0 comments on commit 363d365

Please sign in to comment.