Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into py-gcs-client-error…
Browse files Browse the repository at this point in the history
…-refactor

Signed-off-by: Ruiyang Wang <rywang014@gmail.com>
  • Loading branch information
rynewang committed Jun 21, 2024
2 parents 3f6c0ff + 9d66167 commit 6141b7b
Show file tree
Hide file tree
Showing 572 changed files with 13,438 additions and 7,445 deletions.
3 changes: 0 additions & 3 deletions .bazelrc
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,6 @@ build:ci --progress_report_interval=100
build:ci --show_progress_rate_limit=15
build:ci --ui_actions_shown=1024
build:ci --show_timestamps
build:ci-travis --disk_cache=~/ray-bazel-cache
build:ci-github --experimental_repository_cache_hardlinks # GitHub Actions has low disk space, so prefer hardlinks there.
build:ci-github --disk_cache=~/ray-bazel-cache
test:ci --flaky_test_attempts=3
# Disable test result caching because py_test under Bazel can import from outside of sandbox, but Bazel only looks at
# declared dependencies to determine if a result should be cached. More details at:
Expand Down
12 changes: 6 additions & 6 deletions .buildkite/_forge.rayci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,15 @@ steps:
- "3.10"
- "3.11"
cuda:
- "11.5.2"
- "11.6.2"
- "11.7.1"
- "11.8.0"
- "12.1.1"
- "11.7.1-cudnn8"
- "11.8.0-cudnn8"
- "12.1.1-cudnn8"
- "12.3.2-cudnn9"
env:
PYTHON_VERSION: "{{matrix.python}}"
CUDA_VERSION: "{{matrix.cuda}}"


- name: raycpubase
label: "wanda: ray.py{{matrix}}.cpu.base"
tags:
Expand Down Expand Up @@ -58,7 +58,7 @@ steps:
- "3.10"
- "3.11"
cuda:
- "11.8.0"
- "11.8.0-cudnn8"
env:
PYTHON_VERSION: "{{matrix.python}}"
CUDA_VERSION: "{{matrix.cuda}}"
Expand Down
10 changes: 5 additions & 5 deletions .buildkite/build.rayci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,10 @@ steps:
instance_type: medium
commands:
- bazel run //ci/ray_ci:build_in_docker -- docker --python-version {{matrix}}
--platform cu11.5.2 --platform cu11.6.2 --platform cu11.7.1
--platform cu11.8.0 --platform cu12.1.1 --platform cpu
--image-type ray
--upload
--platform cu11.7.1-cudnn8 --platform cu11.8.0-cudnn8
--platform cu12.1.1-cudnn8 --platform cu12.3.2-cudnn9
--platform cpu
--image-type ray --upload
depends_on:
- manylinux
- forge
Expand All @@ -79,7 +79,7 @@ steps:
instance_type: medium
commands:
- bazel run //ci/ray_ci:build_in_docker -- docker --python-version {{matrix}}
--platform cu11.8.0 --platform cpu --image-type ray-ml
--platform cu11.8.0-cudnn8 --platform cpu --image-type ray-ml
--upload
depends_on:
- manylinux
Expand Down
18 changes: 18 additions & 0 deletions .buildkite/core.rayci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,7 @@ steps:
--parallelism-per-worker 2

- label: ":ray: core: flaky tests"
key: core_flaky_tests
tags:
- python
- skip-on-premerge
Expand All @@ -286,6 +287,23 @@ steps:
commands:
- bazel run //ci/ray_ci:test_in_docker -- //... core
--run-flaky-tests --build-type clang
--except-tags multi_gpu

- label: ":ray: core: flaky gpu tests"
key: core_flaky_gpu_tests
tags:
- gpu
- python
- skip-on-premerge
instance_type: gpu-large
soft_fail: true
commands:
- bazel run //ci/ray_ci:test_in_docker -- //... core
--run-flaky-tests --build-type clang
--parallelism-per-worker 2 --gpus 2
--build-name coregpubuild
--only-tags multi_gpu
depends_on: coregpubuild

- label: ":ray: core: cpp worker tests"
tags: core_cpp
Expand Down
2 changes: 2 additions & 0 deletions .buildkite/data.rayci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ steps:
depends_on: data16build

- label: ":database: data: flaky tests"
key: data_flaky_tests
tags:
- python
- data
Expand All @@ -163,6 +164,7 @@ steps:
depends_on: data16build

- label: ":database: data: flaky gpu tests"
key: data_flaky_gpu_tests
tags:
- python
- data
Expand Down
18 changes: 8 additions & 10 deletions .buildkite/linux_aarch64.rayci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,10 @@ steps:
- "3.10"
- "3.11"
cuda:
- "11.5.2"
- "11.6.2"
- "11.7.1"
- "11.8.0"
- "12.1.1"
- "11.7.1-cudnn8"
- "11.8.0-cudnn8"
- "12.1.1-cudnn8"
- "12.3.2-cudnn9"
instance_type: builder-arm64
env:
PYTHON_VERSION: "{{matrix.python}}"
Expand Down Expand Up @@ -78,11 +77,10 @@ steps:
instance_type: medium-arm64
commands:
- bazel run //ci/ray_ci:build_in_docker -- docker --python-version {{matrix}}
--platform cu11.5.2 --platform cu11.6.2 --platform cu11.7.1
--platform cu11.8.0 --platform cu12.1.1 --platform cpu
--image-type ray
--architecture aarch64
--upload
--platform cu11.7.1-cudnn8 --platform cu11.8.0-cudnn8
--platform cu12.1.1-cudnn8 --platform cu12.3.2-cudnn9
--platform cpu
--image-type ray --architecture aarch64 --upload
depends_on:
- manylinux-aarch64
- forge-aarch64
Expand Down
1 change: 1 addition & 0 deletions .buildkite/macos/macos.rayci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ steps:
- RAY_INSTALL_JAVA=1 ./ci/ray_ci/macos/macos_ci.sh run_ray_cpp_and_java

- label: ":ray: core: :mac: flaky tests"
key: macos_flaky_tests
if: build.env("BUILDKITE_PIPELINE_ID") != "0189e759-8c96-4302-b6b5-b4274406bf89"
tags:
- core_cpp
Expand Down
2 changes: 2 additions & 0 deletions .buildkite/ml.rayci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ steps:
depends_on: [ "mllightning2gpubuild", "forge" ]

- label: ":train: ml: flaky tests"
key: ml_flaky_tests
tags:
- train
- skip-on-premerge
Expand Down Expand Up @@ -252,6 +253,7 @@ steps:
soft_fail: true

- label: ":train: ml: train gpu flaky tests"
key: ml_flaky_gpu_tests
tags:
- train
- skip-on-premerge
Expand Down
2 changes: 1 addition & 1 deletion .buildkite/release-automation/forge.rayci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@ steps:
- name: forge
wanda: .buildkite/release-automation/forge_x86_64.wanda.yaml

- name: forge_arm64
- name: forge-arm64
wanda: .buildkite/release-automation/forge_arm64.wanda.yaml
instance_type: builder-arm64
3 changes: 1 addition & 2 deletions .buildkite/release-automation/forge_arm64.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ apt-get install -y curl zip clang-12
ln -s /usr/bin/clang-12 /usr/bin/clang

# Install miniconda
curl -sfL https://repo.anaconda.com/miniconda/Miniconda3-py38_23.1.0-1-Linux-aarch64.sh > /tmp/miniconda.sh
curl -sfL https://repo.anaconda.com/miniconda/Miniconda3-py311_24.4.0-0-Linux-aarch64.sh > /tmp/miniconda.sh
bash /tmp/miniconda.sh -b -u -p /usr/local/bin/miniconda3
rm /tmp/miniconda.sh
/usr/local/bin/miniconda3/bin/conda init bash
Expand All @@ -31,6 +31,5 @@ EOF

ENV CC=clang
ENV CXX=clang++-12
ENV USE_BAZEL_VERSION=5.4.1

CMD ["echo", "ray release-automation forge"]
3 changes: 1 addition & 2 deletions .buildkite/release-automation/forge_x86_64.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ addgroup --gid 993 docker
ln -s /usr/bin/clang-12 /usr/bin/clang

# Install miniconda
curl -sfL https://repo.anaconda.com/miniconda/Miniconda3-py38_23.1.0-1-Linux-x86_64.sh > /tmp/miniconda.sh
curl -sfL https://repo.anaconda.com/miniconda/Miniconda3-py311_24.4.0-0-Linux-x86_64.sh > /tmp/miniconda.sh
bash /tmp/miniconda.sh -b -u -p /usr/local/bin/miniconda3
rm /tmp/miniconda.sh
/usr/local/bin/miniconda3/bin/conda init bash
Expand All @@ -40,6 +40,5 @@ EOF
USER forge
ENV CC=clang
ENV CXX=clang++-12
ENV USE_BAZEL_VERSION=5.4.1

CMD ["echo", "ray release-automation forge"]
6 changes: 0 additions & 6 deletions .buildkite/release-automation/pre_release.rayci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,18 @@ depends_on:
steps:
- label: "Check release blockers"
key: check-release-blockers
job_env: forge
instance_type: small_branch
commands:
- bazel run //ci/ray_ci/automation:weekly_green_metric -- --check

- label: "Check commit hash"
key: check-commit-hash
job_env: forge
depends_on: check-release-blockers
allow_dependency_failure: true
commands:
- bash .buildkite/release-automation/check-commit-hash.sh

- label: "Build update version binary"
key: build-update-version-zip
instance_type: default
job_env: forge
commands:
- bazel build --build_python_zip --incompatible_use_python_toolchains=false --python_path=python //ci/ray_ci/automation:update_version
- cp bazel-bin/ci/ray_ci/automation/update_version.zip /artifact-mount/
Expand Down Expand Up @@ -118,7 +113,6 @@ steps:
if: build.branch !~ /^releases\// && build.env("RAYCI_WEEKLY_RELEASE_NIGHTLY") == "1"
depends_on: trigger-postmerge-nightly
allow_dependency_failure: true
job_env: forge
commands:
- bazel run //ci/ray_ci/automation:check_nightly_ray_commit -- --ray_type={{matrix}} --expected_commit="${BUILDKITE_COMMIT}"
matrix:
Expand Down
20 changes: 12 additions & 8 deletions .buildkite/release-automation/verify-linux-wheels.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,20 @@ set -euo pipefail
set -x

export PYTHON_VERSION="${PYTHON_VERSION}"
if [[ -z "$RAY_VERSION" ]]; then
echo "RAY_VERSION environment variable is not set"
if [[ -z "${RAY_VERSION}" ]]; then
echo "RAY_VERSION is not set"
exit 1
fi
if [[ -z "$BUILDKITE_COMMIT" ]]; then
echo "BUILDKITE_COMMIT environment variable is not set"
exit 1

if [[ "${RAY_COMMIT:-}" == "" ]]; then
if [[ "${BUILDKITE_COMMIT:-}" == "" ]]; then
echo "neither BUILDKITE_COMMIT nor RAY_COMMIT is set"
exit 1
fi
RAY_COMMIT="${BUILDKITE_COMMIT:-}"
fi

export PATH="/usr/local/bin/miniconda3/bin:$PATH"
export PATH="/usr/local/bin/miniconda3/bin:${PATH}"
source "/usr/local/bin/miniconda3/etc/profile.d/conda.sh"

conda create -n rayio python="${PYTHON_VERSION}" -y
Expand All @@ -24,11 +28,11 @@ conda activate rayio
pip install \
--index-url https://test.pypi.org/simple/ \
--extra-index-url https://pypi.org/simple \
"ray[cpp]==$RAY_VERSION"
"ray[cpp]==${RAY_VERSION}"

(
cd release/util
python sanity_check.py --ray_version="$RAY_VERSION" --ray_commit="$BUILDKITE_COMMIT"
python sanity_check.py --ray_version="${RAY_VERSION}" --ray_commit="${RAY_COMMIT}"
)

(
Expand Down
24 changes: 17 additions & 7 deletions .buildkite/release-automation/verify-macos-wheels.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ if [[ $# -ne 1 ]]; then
fi

mac_architecture=$1 # First argument is the architecture of the machine, e.g. x86_64, arm64
export USE_BAZEL_VERSION="${USE_BAZEL_VERSION:-5.4.1}"
export USE_BAZEL_VERSION="${USE_BAZEL_VERSION:-6.5.0}"

install_bazel() {
if [[ "${mac_architecture}" = "arm64" ]]; then
Expand All @@ -35,7 +35,7 @@ install_bazel() {
install_miniconda() {
# Install miniconda3 based on the architecture used
mkdir -p "$TMP_DIR/miniconda3"
curl https://repo.anaconda.com/miniconda/Miniconda3-py38_23.1.0-1-MacOSX-"$mac_architecture".sh -o "$TMP_DIR/miniconda3/miniconda.sh"
curl https://repo.anaconda.com/miniconda/Miniconda3-py311_24.4.0-0-MacOSX-"$mac_architecture".sh -o "$TMP_DIR/miniconda3/miniconda.sh"
bash "$TMP_DIR/miniconda3/miniconda.sh" -b -u -p "$TMP_DIR/miniconda3"
rm -rf "$TMP_DIR/miniconda3/miniconda.sh"

Expand All @@ -45,16 +45,26 @@ install_miniconda() {
}

run_sanity_check() {
local python_version="$1"
conda create -n "rayio_${python_version}" python="${python_version}" -y
conda activate "rayio_${python_version}"
local PYTHON_VERSION="$1"

if [[ "${RAY_COMMIT:-}" == "" ]]; then
if [[ "${BUILDKITE_COMMIT:-}" == "" ]]; then
echo "neither BUILDKITE_COMMIT nor RAY_COMMIT is set"
exit 1
fi
RAY_COMMIT="${BUILDKITE_COMMIT:-}"
fi

conda create -n "rayio_${PYTHON_VERSION}" python="${PYTHON_VERSION}" -y
conda activate "rayio_${PYTHON_VERSION}"

pip install \
--index-url https://test.pypi.org/simple/ \
--extra-index-url https://pypi.org/simple \
"ray[cpp]==$RAY_VERSION"
"ray[cpp]==${RAY_VERSION}"
(
cd release/util
python sanity_check.py --ray_version="$RAY_VERSION" --ray_commit="$BUILDKITE_COMMIT"
python sanity_check.py --ray_version="${RAY_VERSION}" --ray_commit="${RAY_COMMIT}"
bash sanity_check_cpp.sh
)
conda deactivate
Expand Down
2 changes: 2 additions & 0 deletions .buildkite/release-automation/wheels.rayci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ steps:
commands:
- export PYTHON_VERSION={{matrix}}
- export RAY_VERSION="$RAY_VERSION"
- export RAY_COMMIT="$RAY_COMMIT"
- bash -i .buildkite/release-automation/verify-linux-wheels.sh
matrix:
- "3.9"
Expand All @@ -50,6 +51,7 @@ steps:
commands:
- export PYTHON_VERSION={{matrix}}
- export RAY_VERSION="$RAY_VERSION"
- export RAY_COMMIT="$RAY_COMMIT"
- bash -i .buildkite/release-automation/verify-linux-wheels.sh
matrix:
- "3.9"
Expand Down
8 changes: 4 additions & 4 deletions .buildkite/release/build.rayci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ steps:
- label: ":tapioca: build: anyscale py{{matrix.python}}-{{matrix.platform}} docker"
tags: skip-on-premerge
key: anyscalebuild
instance_type: release
instance_type: release-medium
commands:
- bazel run //ci/ray_ci:build_in_docker -- anyscale
--python-version {{matrix.python}} --platform {{matrix.platform}}
Expand All @@ -21,16 +21,16 @@ steps:
- "3.9"
- "3.11"
platform:
- cu12.1.1
- cu12.3.2-cudnn9
- cpu

- label: ":tapioca: build: anyscale-ml py{{matrix}}-cu11.8.0 docker"
tags: skip-on-premerge
key: anyscalemlbuild
instance_type: release
instance_type: release-medium
commands:
- bazel run //ci/ray_ci:build_in_docker -- anyscale --python-version {{matrix}}
--platform cu11.8.0 --image-type ray-ml --upload
--platform cu11.8.0-cudnn8 --image-type ray-ml --upload
depends_on:
- manylinux
- forge
Expand Down
1 change: 1 addition & 0 deletions .buildkite/release/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ builder_queues:
builder: builder_queue_branch
runner_queues:
release: release_queue_small
release-medium: release_queue_medium
buildkite_dirs:
- .buildkite/release
env:
Expand Down
Loading

0 comments on commit 6141b7b

Please sign in to comment.