Skip to content

Commit

Permalink
PR #12948: [ROCm] Provide run_xla script to facilitate running XLA un…
Browse files Browse the repository at this point in the history
…it tests

Imported from GitHub PR openxla/xla#12948

This is first step in enabling CI runs on AMD hardware. Planning to use this repository to house ROCm related scripts.
Copybara import of the project:

--
5465e8b4b83302dabf6ceb64552fd841fb29f2b0 by Harsha HS <harsha.havanurshamsundara@amd.com>:

[ROCm] Provide run_xla script to facilitate running XLA unit tests

--
d5a1217b452539607571ca4c8d76907722fd05bc by Harsha H S <hsharsha@users.noreply.github.com>:

Update run_xla.sh

Merging this change closes #12948

FUTURE_COPYBARA_INTEGRATE_REVIEW=openxla/xla#12948 from ROCm:ci_add_run_xla_script_20240522 d5a1217b452539607571ca4c8d76907722fd05bc
PiperOrigin-RevId: 636886269
  • Loading branch information
hsharsha authored and tensorflower-gardener committed May 24, 2024
1 parent b08e206 commit e5cc1dc
Showing 1 changed file with 98 additions and 0 deletions.
98 changes: 98 additions & 0 deletions third_party/xla/build_tools/rocm/run_xla.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
#!/usr/bin/env bash
# Copyright 2024 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# ==============================================================================

set -e
set -x

N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo)
# If rocm-smi exists locally (it should) use it to find
# out how many GPUs we have to test with.
rocm-smi -i
STATUS=$?
if [ $STATUS -ne 0 ]; then TF_GPU_COUNT=1; else
TF_GPU_COUNT=$(rocm-smi -i|grep 'Device ID' |grep 'GPU' |wc -l)
fi
TF_TESTS_PER_GPU=1
N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU})

echo ""
echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)."
echo ""

# First positional argument (if any) specifies the ROCM_INSTALL_DIR
if [[ -n $1 ]]; then
ROCM_INSTALL_DIR=$1
else
if [[ -z "${ROCM_PATH}" ]]; then
ROCM_INSTALL_DIR=/opt/rocm-6.1.0
else
ROCM_INSTALL_DIR=$ROCM_PATH
fi
fi

export PYTHON_BIN_PATH=`which python3`
PYTHON_VERSION=`python3 -c "import sys;print(f'{sys.version_info.major}.{sys.version_info.minor}')"`
export TF_PYTHON_VERSION=$PYTHON_VERSION
export TF_NEED_ROCM=1
export ROCM_PATH=$ROCM_INSTALL_DIR
TAGS_FILTER="gpu,requires-gpu-amd,-requires-gpu-nvidia,-no_oss,-oss_excluded,-oss_serial,-no_gpu,-no_rocm"
UNSUPPORTED_GPU_TAGS="$(echo -requires-gpu-sm{60,70,80,86,89,90}{,-only})"
TAGS_FILTER="${TAGS_FILTER},${UNSUPPORTED_GPU_TAGS// /,}"
if [ -f /usertools/rocm.bazelrc ]; then
# Use the bazelrc files in /usertools if available
if [ ! -d /tf ];then
# The bazelrc files in /usertools expect /tf to exist
mkdir /tf
fi

bazel \
--bazelrc=/usertools/rocm.bazelrc \
test \
--config=sigbuild_local_cache \
--config=rocm \
--config=xla_cpp \
--build_tag_filters=${TAGS_FILTER} \
--test_tag_filters=${TAGS_FILTER} \
--keep_going \
--test_output=errors \
--local_test_jobs=${N_TEST_JOBS} \
--test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
--test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
--repo_env=HERMETIC_PYTHON_VERSION=3.11 \
--action_env=XLA_FLAGS=--xla_gpu_force_compilation_parallelism=16 \
--action_env=XLA_FLAGS=--xla_gpu_enable_llvm_module_compilation_parallelism=true \
--run_under=//tools/ci_build/gpu_build:parallel_gpu_execute \
-- //xla/...
else

yes "" | $PYTHON_BIN_PATH configure.py
bazel \
test \
-k \
--test_tag_filters=-no_oss,-oss_excluded,-oss_serial,gpu,requires-gpu,-no_gpu,-no_rocm --keep_going \
--build_tag_filters=-no_oss,-oss_excluded,-oss_serial,gpu,requires-gpu,-no_gpu,-no_rocm \
--config=rocm \
--test_output=errors \
--local_test_jobs=${N_TEST_JOBS} \
--test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
--test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
--repo_env=HERMETIC_PYTHON_VERSION=3.11 \
--action_env=XLA_FLAGS=--xla_gpu_force_compilation_parallelism=16 \
--action_env=XLA_FLAGS=--xla_gpu_enable_llvm_module_compilation_parallelism=true \
--run_under=//tools/ci_build/gpu_build:parallel_gpu_execute \
-- //xla/...
fi

0 comments on commit e5cc1dc

Please sign in to comment.