diff --git a/.circleci/common.sh b/.circleci/common.sh index 04d47ff8cf09..445c4b841550 100755 --- a/.circleci/common.sh +++ b/.circleci/common.sh @@ -111,12 +111,16 @@ function run_torch_xla_tests() { export GPU_NUM_DEVICES=2 else export XRT_DEVICE_MAP="CPU:0;/job:localservice/replica:0/task:0/device:XLA_CPU:0" - XLA_PORT=$(shuf -i 40701-40999 -n 1) + export XLA_PORT=$(shuf -i 40701-40999 -n 1) export XRT_WORKERS="localservice:0;grpc://localhost:$XLA_PORT" fi export PYTORCH_TESTING_DEVICE_ONLY_FOR="xla" pushd $XLA_DIR + if [[ ! -z "${XLA_PORT}" ]]; then + echo "Starting GRPC server" + python torch_xla/core/xrt_run_server.py --port $XLA_PORT --restart + fi echo "Running Python Tests" ./test/run_tests.sh # only run test_autocast for cpu and gpu on circleCI. @@ -141,6 +145,11 @@ function run_torch_xla_tests() { fi fi + # clear the XRT server before cpp test since CPP test won't run torch_xla's + # __init__.py hence will force a in process server. Note that we can not use + # -m here since we are in the XLA dir. Trying to run the torch_xla module + # from this dir will result in a `version.py` missing error. + python torch_xla/core/xrt_run_server.py --stop pushd test/cpp echo "Running C++ Tests" ./run_tests.sh diff --git a/test/run_tests.sh b/test/run_tests.sh index e0c03d67860f..b88d44a2c5d9 100755 --- a/test/run_tests.sh +++ b/test/run_tests.sh @@ -101,7 +101,6 @@ function run_all_tests { # TODO: enable this test after tf update, currently optimization_barrier does not # work on CPU. # run_test python3 "$CDIR/test_checkpoint.py" - run_pjrt python3 "$CDIR/test_operations.py" "$@" --verbosity=$VERBOSITY run_test python3 "$CDIR/test_mp_replication.py" run_test python3 "$CDIR/test_mp_all_to_all.py" run_test python3 "$CDIR/test_mp_collective_permute.py"