From b0ba95b6128feb9c45a3a56545d39d02152183ea Mon Sep 17 00:00:00 2001 From: ptaylor Date: Tue, 3 Feb 2026 15:53:25 -0800 Subject: [PATCH] make test-rapids-build-times.yml smaller, do cached tests in same job --- .github/workflows/test-rapids-build-times.yml | 171 +++++++++++++----- 1 file changed, 128 insertions(+), 43 deletions(-) diff --git a/.github/workflows/test-rapids-build-times.yml b/.github/workflows/test-rapids-build-times.yml index d9ef14a8..1a3cc12c 100644 --- a/.github/workflows/test-rapids-build-times.yml +++ b/.github/workflows/test-rapids-build-times.yml @@ -1,9 +1,5 @@ name: Test RAPIDS build times -concurrency: - group: test-rapids-build-times-from-${{ github.ref_name }} - cancel-in-progress: true - on: workflow_dispatch: inputs: @@ -11,54 +7,143 @@ on: type: string required: false default: main + node_type: + type: string + required: false + default: cpu32 jobs: - uncached-builds: + check-event: + name: Check GH Event + runs-on: ubuntu-latest + outputs: + ok: ${{ steps.check_gh_event.outputs.ok }} + steps: + - id: check_gh_event + name: Check GH Event + shell: bash + run: | + [[ '${{ github.event_name }}' == 'workflow_dispatch' && '${{ github.repository }}' == 'rapidsai/devcontainers' ]] || \ + && echo "ok=true" | tee -a "$GITHUB_OUTPUT" \ + || echo "ok=false" | tee -a "$GITHUB_OUTPUT"; + + test-rapids-build-times: name: ${{ matrix.name }} + if: needs.check-event.outputs.ok == 'true' + needs: check-event secrets: inherit - uses: ./.github/workflows/build-all-rapids-repos.yml - with: - branch: ${{ inputs.branch }} - env: ${{ matrix.env }} - matrix: '{ "include": [{ "libs": "" }] }' - node_type: cpu32 + uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@main + permissions: + actions: read + packages: read + id-token: write + contents: read + pull-requests: read strategy: fail-fast: false matrix: include: - - name: 'no sccache' - env: | - PARALLEL_LEVEL= - DISABLE_SCCACHE=1 - SCCACHE_NO_CACHE=1 - SCCACHE_NO_DIST_COMPILE=1 - MAX_DEVICE_OBJ_TO_COMPILE_IN_PARALLEL=1 - - name: 'recache, local' + - name: 'build cluster: no' env: | - SCCACHE_RECACHE=1 SCCACHE_NO_DIST_COMPILE=1 - MAX_DEVICE_OBJ_TO_COMPILE_IN_PARALLEL=1 - - name: 'recache, remote' - env: | - SCCACHE_RECACHE=1 + MAX_DEVICE_OBJ_TO_COMPILE_IN_PARALLEL=2 + - name: 'build cluster: yes' + env: "" - cached-builds: - name: ${{ matrix.name }} - needs: [uncached-builds] - secrets: inherit - uses: ./.github/workflows/build-all-rapids-repos.yml with: - branch: ${{ inputs.branch }} - env: ${{ matrix.env }} - matrix: '{ "include": [{ "libs": "" }] }' - node_type: cpu32 - strategy: - fail-fast: false - matrix: - include: - - name: 'preprocessor cache' - env: | - SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE=1 - - name: 'no preprocessor cache' - env: | - SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE=0 + arch: '["amd64", "arm64"]' + cuda: '["12.9", "13.1"]' + node_type: ${{ inputs.node_type }} + rapids-aux-secret-1: GIST_REPO_READ_ORG_GITHUB_TOKEN + timeout-minutes: 720 + # 1. Prohibit sccache from shutting down automatically + # 2. Infinitely retry transient errors + # 3. Enable debug logging to track cache misses + # 4. Never fallback to locally compiling + # 5. Use RAPIDS_AUX_SECRET_1 as the sccache-dist auth token + env: | + CONDA_ENV_CREATE_QUIET=1 + PARALLEL_LEVEL=0 + SCCACHE_IDLE_TIMEOUT=0 + SCCACHE_SERVER_LOG=sccache=debug + SCCACHE_DIST_MAX_RETRIES=inf + SCCACHE_DIST_FALLBACK_TO_LOCAL_COMPILE=false + SCCACHE_DIST_AUTH_TOKEN_VAR=RAPIDS_AUX_SECRET_1 + ${{ matrix.env }} + build_command: | + function begin_group() { + local blue="34" + echo -e "::group::\e[${blue}m${1:-}\e[0m" + } + + function end_group() { + local name="${1:-}" + local build_status="${2:-0}" + local red="31" + + echo "::endgroup::" + if [ "$build_status" -ne 0 ]; then + echo -e "::error::\e[${red}m ${name} - Failed (⬆️ click above for full log ⬆️)\e[0m" + fi + } + + function run_command() { + local -; + set -euo pipefail; + + local group="${1:-}"; + shift; + local command=("$@"); + local exit_code="0"; + + begin_group "$group"; + + echo "Working directory: $(pwd)"; + echo "Running command: ${command[*]}"; + "${command[@]}" || exit_code=$?; + + end_group "$group" "$exit_code" + + return "$exit_code" + } + + # Clone all the repos + run_command "Clone RAPIDS repositories" \ + clone-all -j$(nproc) -b ${{ inputs.branch }} -v -q --clone-upstream --depth 1 --single-branch --shallow-submodules --no-update-env; + + run_command "Create RAPIDS python environment" \ + rapids-post-start-command; + + # Configure all the C++ libs + run_command "Configure C++ libraries" \ + time configure-all \ + -j${PARALLEL_LEVEL} \ + -GNinja \ + -Wno-dev \ + -DBUILD_TESTS=ON \ + -DBUILD_BENCHMARKS=ON \ + -DBUILD_PRIMS_BENCH=ON \ + -DBUILD_SHARED_LIBS=ON \ + -DRAFT_COMPILE_LIBRARY=ON \ + -DBUILD_CUGRAPH_MG_TESTS=ON + + for ENVVAR in "SCCACHE_RECACHE=1" \ + "SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE=0" \ + "SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE=1" ; do + + # Build all the C++ libs + run_command "Build C++ libraries (${ENVVAR})" time ${ENVVAR} build-all-cpp -j${PARALLEL_LEVEL} + + # Print cache and dist stats + run_command "sccache stats (${ENVVAR})" sccache --show-adv-stats + + # Print build times + run_command "Build times (${ENVVAR})" bash -c "\ + find /var/log/devcontainer-utils/ -type f -name 'build-*-time.log' -print0 \ + | xargs -0 -n1 grep -H real | sed 's/real\t/ /g' || :" # Nonfatal if not found + + # Clean + sccache -z >/dev/null 2>&1 + find /var/log/devcontainer-utils/ -type f -name 'build-*-time.log' -delete + find ~/ -maxdepth 4 -type l -path '*/cpp/build/latest' -print0 | xargs -P$(nproc) -0 -n1 ninja clean -C + done