Skip to content

Commit

Permalink
[Ray] Optimize Ray CI execution time and stability (#3102)
Browse files Browse the repository at this point in the history
  • Loading branch information
chaokunyang committed Jun 1, 2022
1 parent 994aec1 commit b685973
Show file tree
Hide file tree
Showing 8 changed files with 358 additions and 312 deletions.
32 changes: 27 additions & 5 deletions .github/workflows/platform-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest]
python-version: [3.8-kubernetes, 3.8-hadoop, 3.8-ray, 3.8-ray-dag, 3.8-vineyard, 3.8-dask]
python-version: [3.8-kubernetes, 3.8-hadoop, 3.8-ray, 3.8-ray-deploy, 3.8-ray-dag, 3.8-vineyard, 3.8-dask]
include:
- { os: ubuntu-latest, python-version: 3.8-kubernetes, no-common-tests: 1,
no-deploy: 1, with-kubernetes: "with Kubernetes" }
Expand All @@ -28,6 +28,8 @@ jobs:
no-deploy: 1, with-vineyard: "with vineyard" }
- { os: ubuntu-latest, python-version: 3.8-ray, no-common-tests: 1,
no-deploy: 1, with-ray: "with ray" }
- { os: ubuntu-latest, python-version: 3.8-ray-deploy, no-common-tests: 1,
no-deploy: 1, with-ray-deploy: "with ray deploy" }
- { os: ubuntu-latest, python-version: 3.8-ray-dag, no-common-tests: 1,
no-deploy: 1, with-ray-dag: "with ray dag" }
- { os: ubuntu-latest, python-version: 3.8-dask, no-common-tests: 1,
Expand All @@ -53,6 +55,7 @@ jobs:
WITH_KUBERNETES: ${{ matrix.with-kubernetes }}
WITH_VINEYARD: ${{ matrix.with-vineyard }}
WITH_RAY: ${{ matrix.with-ray }}
WITH_RAY_DEPLOY: ${{ matrix.with-ray-deploy }}
WITH_RAY_DAG: ${{ matrix.with-ray-dag }}
RUN_DASK: ${{ matrix.run-dask }}
NO_COMMON_TESTS: ${{ matrix.no-common-tests }}
Expand Down Expand Up @@ -93,7 +96,7 @@ jobs:
sudo mv /tmp/etcd-download-test/etcdctl /usr/local/bin/
rm -fr /tmp/etcd-$ETCD_VER-linux-amd64.tar.gz /tmp/etcd-download-test
fi
if [ -n "$WITH_RAY" ] || [ -n "$WITH_RAY_DAG" ]; then
if [ -n "$WITH_RAY" ] || [ -n "$WITH_RAY_DAG" ] || [ -n "$WITH_RAY_DEPLOY" ]; then
pip install ray[default]==1.9.2 "protobuf<4"
pip install "xgboost_ray==0.1.5" "xgboost<1.6.0"
fi
Expand All @@ -110,6 +113,7 @@ jobs:
WITH_CYTHON: ${{ matrix.with-cython }}
WITH_VINEYARD: ${{ matrix.with-vineyard }}
WITH_RAY: ${{ matrix.with-ray }}
WITH_RAY_DEPLOY: ${{ matrix.with-ray-deploy }}
WITH_RAY_DAG: ${{ matrix.with-ray-dag }}
RUN_DASK: ${{ matrix.run-dask }}
NO_COMMON_TESTS: ${{ matrix.no-common-tests }}
Expand Down Expand Up @@ -144,13 +148,31 @@ jobs:
coverage combine build/ && coverage report
fi
if [ -n "$WITH_RAY" ]; then
pytest $PYTEST_CONFIG --durations=0 --timeout=200 -v -s -m ray
pytest $PYTEST_CONFIG --durations=0 --timeout=200 -v -s --ignore=mars/deploy/oscar/ -m ray
coverage report
fi
if [ -n "$WITH_RAY_DEPLOY" ]; then
pytest $PYTEST_CONFIG --durations=0 --timeout=200 -v -s mars/deploy/oscar/tests/test_ray.py -m ray
mv .coverage build/.coverage.test_ray.file
pytest $PYTEST_CONFIG --durations=0 --timeout=200 -v -s mars/deploy/oscar/tests/test_ray_client.py -m ray
mv .coverage build/.coverage.test_ray_client.file
pytest $PYTEST_CONFIG --durations=0 --timeout=200 -v -s mars/deploy/oscar/tests/test_ray_fault_injection.py -m ray
mv .coverage build/.coverage.test_ray_fault_injection.file
pytest $PYTEST_CONFIG --durations=0 --timeout=200 -v -s mars/deploy/oscar/tests/test_ray_scheduling.py -m ray
mv .coverage build/.coverage.test_ray_scheduling.file
coverage combine build/ && coverage report
fi
if [ -n "$WITH_RAY_DAG" ]; then
export MARS_CI_BACKEND=ray
pytest $PYTEST_CONFIG --durations=0 --timeout=600 -v -s -m ray_dag
coverage report
pytest $PYTEST_CONFIG --durations=0 --timeout=200 -v -s -m ray_dag
mv .coverage build/.coverage.ray_dag.file
pytest $PYTEST_CONFIG --durations=0 --timeout=200 -v -s mars/deploy/oscar/tests/test_ray_dag.py
mv .coverage build/.coverage.test_ray_dag.file
pytest $PYTEST_CONFIG --durations=0 --timeout=200 -v -s mars/deploy/oscar/tests/test_ray_dag_failover.py
mv .coverage build/.coverage.test_ray_dag_failover.file
coverage combine build/ && coverage report
fi
if [ -n "$RUN_DASK" ]; then
pytest $PYTEST_CONFIG mars/contrib/dask/tests/test_dask.py
Expand Down
1 change: 0 additions & 1 deletion mars/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,6 @@ async def ray_create_mars_cluster(request, check_router_cleaned):
worker_mem = param.get("worker_mem", 256 * 1024**2)
ray_config.update(param.get("config", {}))
client = await new_cluster(
"test_cluster",
supervisor_mem=supervisor_mem,
worker_num=worker_num,
worker_cpu=worker_cpu,
Expand Down

0 comments on commit b685973

Please sign in to comment.