Skip to content

Commit

Permalink
Added IB detecter in tools.setup_helpers
Browse files Browse the repository at this point in the history
  • Loading branch information
teng-li committed Jan 24, 2018
1 parent 40b9fa8 commit b5a4201
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 1 deletion.
5 changes: 4 additions & 1 deletion setup.py
Expand Up @@ -25,6 +25,7 @@
from tools.setup_helpers.split_types import split_types
from tools.setup_helpers.generate_code import generate_code
from tools.setup_helpers.ninja_builder import NinjaBuilder, ninja_build_ext
from tools.setup_helpers.ib_detect import WITH_IB_DEVICES

DEBUG = check_env_flag('DEBUG')

Expand Down Expand Up @@ -137,7 +138,9 @@ def build_libs(libs):
my_env["CUDNN_LIB_DIR"] = CUDNN_LIB_DIR
my_env["CUDNN_LIBRARY"] = CUDNN_LIBRARY
my_env["CUDNN_INCLUDE_DIR"] = CUDNN_INCLUDE_DIR
if WITH_GLOO_IBVERBS:

if WITH_DISTRIBUTED and (WITH_IB_DEVICES or
check_env_flag("WITH_GLOO_IBVERBS")):
build_libs_cmd += ['--with-gloo-ibverbs']

if subprocess.call(build_libs_cmd + libs, env=my_env) != 0:
Expand Down
76 changes: 76 additions & 0 deletions tools/setup_helpers/ib_detect.py
@@ -0,0 +1,76 @@
import os
import subprocess
import re


WITH_IB_DEVICES = False
IB_DEVINFO_CMD = "ibv_devinfo"


def get_command_path(command):
"""
Helper function that get the full path of a given linux command
"""
def excutable(command_path):
return os.path.isfile(command_path) and os.access(command_path, os.X_OK)

for path in os.environ["PATH"].split(os.pathsep):
command_path = os.path.join(path, command)
if excutable(command_path):
return command_path

return None


def detect_ib_devices():
"""
Helper function that detects if there are Infiniband devices on the host,
and returns the number of IB devices detected or None for failure to detect
"""
try:
full_cmd_path = get_command_path(IB_DEVINFO_CMD)
if not full_cmd_path:
return None
out = subprocess.check_output([full_cmd_path, "--list"])
# find the first line of the output
# The outpyt should be either:
#
# > ibv_devinfo --list
# 0 HCAs founds:
#
# or
#
# > ibv_devinfo --list
# 4 HCAs found:
# mlx5_3
# mlx5_2
# mlx5_1
# mlx5_0
first_line = out.decode().split('\n')[0]
res = re.findall("\d+", first_line)
if len(res) != 1:
raise Exception("-- IB_detect: unexpected parsing error while "
"trying to find the number of available devices.")
return int(res[0])

except Exception as ex:
# We just take all the exceptions here without affecting the build
print("-- IB_detect: encountered an exception: {}".format(str(ex)))
return None


num_ib_devices = detect_ib_devices()

if num_ib_devices is None:
print("-- IB_detect: unable to detect IB devices, "
"compiling with no IB support by default unless overridden "
"by WITH_GLOO_IBVERBS")

elif num_ib_devices > 0:
print("-- IB_detect: {} IB devices detected, compiling with IB support."
.format(num_ib_devices))
WITH_IB_DEVICES = True

else:
print("-- IB_detect: no IB device detected, compiling with no IB support "
"by default unless overridden by WITH_GLOO_IBVERBS")

0 comments on commit b5a4201

Please sign in to comment.