Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .circleci/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/
# Install base system packages
RUN apt-get clean && apt-get update
RUN apt-get upgrade -y
RUN apt-get install --fix-missing -y python-pip python3-pip git curl libopenblas-dev vim jq \
RUN apt-get install --fix-missing -y python3-pip git curl libopenblas-dev vim jq \
apt-transport-https ca-certificates procps openssl sudo wget libssl-dev libc6-dbg

# Install clang & llvm
Expand Down
Empty file added .kokoro/README.md
Empty file.
49 changes: 49 additions & 0 deletions .kokoro/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#!/bin/bash

set -ex

DEBIAN_FRONTEND=noninteractive

PYTORCH_DIR="${KOKORO_ARTIFACTS_DIR}/github/pytorch"
XLA_DIR=$PYTORCH_DIR/xla
git clone --quiet https://github.com/pytorch/pytorch.git "$PYTORCH_DIR"
cp -r "${KOKORO_ARTIFACTS_DIR}/github/xla" "$XLA_DIR"
source ${XLA_DIR}/.kokoro/common.sh

TORCHVISION_COMMIT="$(cat $PYTORCH_DIR/.github/ci_commit_pins/vision.txt)"

apt-get clean && apt-get update
apt-get upgrade -y
apt-get install --fix-missing -y python3-pip git curl libopenblas-dev vim jq \
apt-transport-https ca-certificates procps openssl sudo wget libssl-dev libc6-dbg

curl -LO https://github.com/bazelbuild/bazelisk/releases/download/v1.11.0/bazelisk-linux-amd64
mv bazelisk-linux-amd64 /usr/local/bin/bazel
chmod +x /usr/local/bin/bazel

pip install mkl mkl-include setuptools typing_extensions cmake requests
sudo ln -s /usr/local/lib/libmkl_intel_lp64.so.2 /usr/local/lib/libmkl_intel_lp64.so.1
sudo ln -s /usr/local/lib/libmkl_intel_thread.so.2 /usr/local/lib/libmkl_intel_thread.so.1
sudo ln -s /usr/local/lib/libmkl_core.so.2 /usr/local/lib/libmkl_core.so.1
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/

echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" >> /etc/apt/sources.list.d/google-cloud-sdk.list
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add -

# TODO(yeounoh) fix `GoogleCredentials` import error
apt-get update
apt-get -y install google-cloud-cli
pip install --upgrade google-api-python-client
pip install --upgrade oauth2client
pip install --upgrade google-cloud-storage
pip install lark-parser
pip install cloud-tpu-client

pip install --user https://storage.googleapis.com/tpu-pytorch/wheels/tpuvm/torch-nightly-cp38-cp38-linux_x86_64.whl \
https://storage.googleapis.com/tpu-pytorch/wheels/tpuvm/torchvision-nightly-cp38-cp38-linux_x86_64.whl \
https://storage.googleapis.com/tpu-pytorch/wheels/tpuvm/torch_xla-nightly-cp38-cp38-linux_x86_64.whl
pip install torch_xla[tpuvm] --user

run_torch_xla_tests $PYTORCH_DIR $XLA_DIR


70 changes: 70 additions & 0 deletions .kokoro/common.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#!/bin/bash

set -ex

DEBIAN_FRONTEND=noninteractive

function apply_patches() {
# assumes inside pytorch dir
./xla/scripts/apply_patches.sh
}

function checkout_torch_pin_if_available() {
COMMITID_FILE="xla/.torch_pin"
if [ -e "$COMMITID_FILE" ]; then
git checkout $(cat "$COMMITID_FILE")
fi
git submodule update --init --recursive
}

function install_deps_pytorch_xla() {
XLA_DIR=$1

# Install pytorch deps
pip install sympy

# Install ninja to speedup the build
pip install ninja

# Install libraries required for running some PyTorch test suites
pip install hypothesis
pip install cloud-tpu-client
pip install absl-py
pip install --upgrade "numpy>=1.18.5"
pip install --upgrade numba

# Using the Ninja generator requires CMake version 3.13 or greater
pip install "cmake>=3.13" --upgrade

sudo apt-get -qq update
}

function build_torch_xla() {
XLA_DIR=$1
pushd "$XLA_DIR"
XLA_CUDA=0 python setup.py install
popd
}

function pip_install() {
# retry 3 times
# old versions of pip don't have the "--progress-bar" flag
pip install --progress-bar off "$@" || pip install --progress-bar off "$@" || pip install --progress-bar off "$@" ||\
pip install "$@" || pip install "$@" || pip install "$@"
}

function install_torchvision() {
pip_install --user --no-use-pep517 "git+https://github.com/pytorch/vision.git@$TORCHVISION_COMMIT"
}

function run_torch_xla_tests() {
PYTORCH_DIR=$1
XLA_DIR=$2

pushd $XLA_DIR
echo "Running integration tests..."
# Run integration tests with CPU
# TODO(yeounoh) use custom GCP project for TPU
XLA_CUDA=0 USE_COVERAGE=0 XLA_SKIP_TORCH_OP_TESTS=1 XLA_SKIP_XRT_TESTS=1 CONTINUE_ON_ERROR=1 ./test/run_tests.sh
popd
}
6 changes: 6 additions & 0 deletions .kokoro/continuous.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# -*- protobuffer -*-
# proto-file: google3/devtools/kokoro/config/proto/build.proto
# proto-message: BuildConfig

build_file: "xla/.kokoro/build.sh"
timeout_mins: 360
28 changes: 19 additions & 9 deletions test/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,7 @@ VERBOSITY=2
# Set the `CONTINUE_ON_ERROR` flag to `true` to make the CircleCI tests continue on error.
# This will allow you to see all the failures on your PR, not stopping with the first
# test failure like the default behavior.
#
# This flag should be set to `false`` by default. After testing your changes, make sure
# to set this flag back to `false`` before you merge your PR.
CONTINUE_ON_ERROR=false
CONTINUE_ON_ERROR="${CONTINUE_ON_ERROR:-0}"
if [[ "$CONTINUE_ON_ERROR" == "1" ]]; then
set +e
fi
Expand Down Expand Up @@ -58,7 +55,7 @@ function run_coverage {

function run_test {
echo "Running in PjRt runtime: $@"
if [ -x "$(command -v nvidia-smi)" ]; then
if [ -x "$(command -v nvidia-smi)" ] && [ "$XLA_CUDA" != "0" ]; then
PJRT_DEVICE=GPU run_coverage "$@"
else
# TODO(darisoy): run these tests with multiple CPU devices, this fails due to TF issue.
Expand Down Expand Up @@ -112,7 +109,7 @@ function run_xla_backend_mp {
}

function run_xrt {
if [ -x "$(command -v nvidia-smi)" ]; then
if [ -x "$(command -v nvidia-smi)" ] && [ "$XLA_CUDA" != "0" ]; then
GPU_NUM_DEVICES=2 run_coverage "$@"
else
XRT_DEVICE_MAP="CPU:0;/job:localservice/replica:0/task:0/device:XLA_CPU:0" XRT_WORKERS="localservice:0;grpc://localhost:$(shuf -i 40701-40999 -n 1)" run_coverage "$@"
Expand Down Expand Up @@ -146,7 +143,7 @@ function run_xrt_tests {
run_torchrun "$CDIR/test_allreduce_torchrun.py"
}

function run_op_tests {
function run_torch_op_tests {
run_dynamic "$CDIR/../../test/test_view_ops.py" "$@" -v TestViewOpsXLA
run_test "$CDIR/../../test/test_torch.py" "$@" -v TestTorchDeviceTypeXLA
run_dynamic "$CDIR/../../test/test_torch.py" "$@" -v TestDevicePrecisionXLA
Expand All @@ -160,6 +157,9 @@ function run_op_tests {
run_dynamic "$CDIR/../../test/nn/test_convolution.py" "$@" -v TestConvolutionNNDeviceTypeXLA
run_dynamic "$CDIR/../../test/nn/test_multihead_attention.py" "$@" -v TestMultiheadAttentionNNDeviceTypeXLA
run_dynamic "$CDIR/../../test/test_type_promotion.py" "$@" -v TestTypePromotionXLA
}

function run_xla_op_tests {
run_dynamic "$CDIR/test_operations.py" "$@" --verbosity=$VERBOSITY
run_dynamic "$CDIR/test_dynamic_shapes.py"
run_dynamic "$CDIR/test_dynamic_shape_models.py" "$@" --verbosity=$VERBOSITY
Expand Down Expand Up @@ -193,6 +193,11 @@ function run_op_tests {
run_test "$CDIR/test_torch_distributed_xla_backend.py"
}

function run_op_tests {
run_torch_op_tests
run_xla_op_tests
}

function run_mp_op_tests {
run_test "$CDIR/test_mp_replication.py"
run_test "$CDIR/test_mp_all_to_all.py"
Expand All @@ -213,11 +218,16 @@ function run_mp_op_tests {
}

function run_tests {
run_op_tests
run_xla_op_tests
if [[ "$XLA_SKIP_TORCH_OP_TESTS" != "1" ]]; then
run_torch_op_tests
fi
if [[ "$XLA_SKIP_MP_OP_TESTS" != "1" ]]; then
run_mp_op_tests
fi
run_xrt_tests
if [[ "$XLA_SKIP_XRT_TESTS" != "1" ]]; then
run_xrt_tests
fi
}

if [ "$LOGFILE" != "" ]; then
Expand Down