Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 0 additions & 13 deletions .bazelrc
Original file line number Diff line number Diff line change
Expand Up @@ -79,18 +79,6 @@ build:native_arch_posix --host_copt=-march=native

build:mkl_open_source_only --define=tensorflow_mkldnn_contraction_kernel=1

build:cuda --repo_env TF_NEED_CUDA=1
# "sm" means we emit only cubin, which is forward compatible within a GPU generation.
# "compute" means we emit both cubin and PTX, which is larger but also forward compatible to future GPU generations.
build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain
build:cuda --@local_config_cuda//:enable_cuda
build:cuda --define=xla_python_enable_gpu=true
build:cuda --cxxopt=-DXLA_CUDA=1

# Coverage with cuda/gcc/nvcc requires manually setting coverage flags.
coverage:cuda --per_file_copt=third_party/.*,torch_xla/.*@--coverage
coverage:cuda --linkopt=-lgcov

build:acl --define==build_with_acl=true

build:nonccl --define=no_nccl_support=true
Expand All @@ -105,7 +93,6 @@ build:tpu --define=with_tpu_support=true

# Run tests serially with TPU and GPU (only 1 device is available).
test:tpu --local_test_jobs=1
test:cuda --local_test_jobs=1

#########################################################################
# RBE config options below.
Expand Down
1 change: 0 additions & 1 deletion .circleci/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ source $XLA_DIR/xla_env
export GCLOUD_SERVICE_KEY_FILE="$XLA_DIR/default_credentials.json"
export SILO_NAME='cache-silo-ci-dev-3.8_cuda_12.1' # cache bucket for CI
export BUILD_CPP_TESTS='1'
export TF_CUDA_COMPUTE_CAPABILITIES="sm_50,sm_70,sm_75,compute_80,$TF_CUDA_COMPUTE_CAPABILITIES"
build_torch_xla $XLA_DIR

popd
5 changes: 0 additions & 5 deletions .github/upstream/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,6 @@ ARG tpuvm=""
# Disable CUDA for PyTorch
ENV USE_CUDA "0"

# Enable CUDA for XLA
ENV XLA_CUDA "${cuda}"
ENV TF_CUDA_COMPUTE_CAPABILITIES "${cuda_compute}"
ENV TF_CUDA_PATHS "/usr/local/cuda,/usr/include,/usr"

# CUDA build guidance
ENV NVIDIA_VISIBLE_DEVICES all
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/nightly.sh
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ if [[ ${IS_FRESH_RUN?} ]]; then
# Query local compute capability. If that fails, assign a sane default.
LOCAL_CAP=compute_$(nvidia-smi --query-gpu=compute_cap --format=csv | \
tail -1 | sed 's/\.//g' | grep -E '^[0-9]{2}$' || echo '80')
XLA_CUDA=1 TF_CUDA_COMPUTE_CAPABILITIES=${LOCAL_CAP:?} python setup.py develop
python setup.py develop
cd ../..

# Set up torchbench deps.
Expand Down
2 changes: 0 additions & 2 deletions build_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,6 @@ def bazel_options_from_env() -> Iterable[str]:
# Build configuration.
if check_env_flag('BAZEL_VERBOSE'):
bazel_flags.append('-s')
if check_env_flag('XLA_CUDA'):
bazel_flags.append('--config=cuda')
if check_env_flag('XLA_CPU_USE_ACL'):
bazel_flags.append('--config=acl')

Expand Down
7 changes: 1 addition & 6 deletions configuration.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ variables:
PJRT_DEVICE:
description:
- Indicates which device is being used with PJRT. It can be either CPU,
TPU, or CUDA
or TPU
type: string
PJRT_SELECT_DEFAULT_DEVICE:
description:
Expand Down Expand Up @@ -36,11 +36,6 @@ variables:
- Verbosity level for GRPC, e.g. INFO, ERROR, etc.
type: string
default_value: "ERROR"
XLA_CUDA:
description:
- Build the xla client with CUDA enabled.
type: bool
default_value: false
GIT_VERSIONED_XLA_BUILD:
description:
- Creates a versioned build. In particular, appends a git sha to the
Expand Down
4 changes: 0 additions & 4 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,6 @@ RUN git clone https://github.com/pytorch/pytorch
ENV USE_CUDA "0"
ENV USE_MPI "0"

# Enable CUDA for XLA
ENV XLA_CUDA "${cuda}"
ENV TF_CUDA_COMPUTE_CAPABILITIES "${cuda_compute}"

# Whether to build for TPUVM mode
ENV TPUVM_MODE "${tpuvm}"
ENV BUNDLE_LIBTPU "${tpuvm}"
Expand Down
8 changes: 0 additions & 8 deletions infra/ansible/config/env.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,6 @@ release_env:
ACCELERATOR: tpu
TPUVM_MODE: 1

cuda:
TF_CUDA_COMPUTE_CAPABILITIES: "{{ cuda_compute_capabilities }}"
XLA_CUDA: 1

# Variables that will be passed to shell environment only for building PyTorch and XLA libs.
build_env:
common:
Expand All @@ -41,10 +37,6 @@ build_env:

aarch64:

cuda:
TF_CUDA_COMPUTE_CAPABILITIES: "{{ cuda_compute_capabilities }}"
XLA_CUDA: 1

tpu:
ACCELERATOR: tpu
TPUVM_MODE: 1
Expand Down
23 changes: 0 additions & 23 deletions scripts/build_torch_wheels.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,28 +56,6 @@ function install_cudnn {
rm -f "$CUDNN_FILE"
}

function maybe_install_cuda {
if [ "$XLA_CUDA" == "1" ]; then
if [ ! -d "/usr/local/cuda" ]; then
local CUDA_VER="10.2"
local CUDA_SUBVER="89_440.33.01"
local CUDA_FILE="cuda_${CUDA_VER}.${CUDA_SUBVER}_linux.run"
wget "http://developer.download.nvidia.com/compute/cuda/${CUDA_VER}/Prod/local_installers/${CUDA_FILE}"
sudo sh "${CUDA_FILE}" --silent --toolkit
rm -f "${CUDA_FILE}"
fi
if [ ! -f "/usr/local/cuda/include/cudnn.h" ] && [ ! -f "/usr/include/cudnn.h" ]; then
install_cudnn
fi
export TF_CUDA_PATHS="/usr/local/cuda,/usr/include,/usr"
maybe_append 'export TF_CUDA_PATHS="/usr/local/cuda,/usr/include,/usr"' ~/.bashrc
if [ "$TF_CUDA_COMPUTE_CAPABILITIES" == "" ]; then
export TF_CUDA_COMPUTE_CAPABILITIES="7.0"
fi
maybe_append "export TF_CUDA_COMPUTE_CAPABILITIES=\"$TF_CUDA_COMPUTE_CAPABILITIES\"" ~/.bashrc
fi
}

function maybe_install_sources {
if [[ $(uname -m) == "aarch64" && ! -d "$HOME/ComputeLibrary" ]]; then
# install arm compute library
Expand Down Expand Up @@ -148,7 +126,6 @@ function install_gcc() {

function install_req_packages() {
sudo apt-get -y install python3-pip git curl libopenblas-dev vim apt-transport-https ca-certificates wget procps
maybe_install_cuda
install_bazel
install_ninja
}
Expand Down
3 changes: 0 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,6 @@
# BAZEL_VERBOSE=0
# turn on verbose messages during the bazel build of the xla/xrt client
#
# XLA_CUDA=0
# build the xla/xrt client with CUDA enabled
#
# XLA_CPU_USE_ACL=0
# whether to use ACL
#
Expand Down
3 changes: 0 additions & 3 deletions test/cpp/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,6 @@ if [[ "$BAZEL_REMOTE_CACHE" == "1" ]]; then
EXTRA_FLAGS="$EXTRA_FLAGS --remote_default_exec_properties=cache-silo-key=$SILO_NAME"
fi
fi
if [[ "$XLA_CUDA" == "1" ]]; then
EXTRA_FLAGS="$EXTRA_FLAGS --config=cuda"
fi
if [[ "$BAZEL_VERB" == "coverage" ]]; then
EXTRA_FLAGS="$EXTRA_FLAGS --remote_download_outputs=all" # for lcov symlink
fi
Expand Down
8 changes: 4 additions & 4 deletions test/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,8 @@ function run_xla_op_tests1 {
run_test "$_TEST_DIR/pjrt/test_runtime_multi_cpu.py"
run_test "$_TEST_DIR/pjrt/test_internal_tpu.py"

PJRT_DEVICE=CPU XLA_CUDA=0 run_test "$_TEST_DIR/pjrt/test_ddp.py"
PJRT_DEVICE=CPU XLA_CUDA=0 run_test "$_TEST_DIR/pjrt/test_mesh_service.py"
PJRT_DEVICE=CPU run_test "$_TEST_DIR/pjrt/test_ddp.py"
PJRT_DEVICE=CPU run_test "$_TEST_DIR/pjrt/test_mesh_service.py"

run_test "$_TEST_DIR/test_python_ops.py"
run_test "$_TEST_DIR/test_ops.py"
Expand Down Expand Up @@ -199,7 +199,7 @@ function run_xla_op_tests2 {
run_test "$_TEST_DIR/eager/test_eager_with_xla_compile.py"
run_test "$_TEST_DIR/eager/test_eager_with_torch_compile.py"

PJRT_DEVICE=CPU XLA_CUDA=0 run_test "$_TEST_DIR/eager/test_eager_all_reduce_in_place.py"
PJRT_DEVICE=CPU run_test "$_TEST_DIR/eager/test_eager_all_reduce_in_place.py"

run_test "$_TEST_DIR/eager/test_eager_spmd.py"
run_test "$_TEST_DIR/test_callback.py"
Expand Down Expand Up @@ -332,7 +332,7 @@ function run_tests {
elif [[ "$RUN_TORCH_MP_OP_TESTS" == "torch_mp_op" ]]; then
echo "Running torch op tests..."

PJRT_DEVICE=CPU XLA_CUDA=0 run_mp_op_tests
PJRT_DEVICE=CPU run_mp_op_tests
else
# Run full tests without sharding, respects XLA_SKIP_*
if [[ "$XLA_SKIP_XLA_OP_TESTS" != "1" ]]; then
Expand Down