From 093213b8085f23d3458a782eac2ce9ab543abcac Mon Sep 17 00:00:00 2001 From: Naman Lalit Date: Tue, 16 Sep 2025 10:41:23 -0700 Subject: [PATCH 1/6] upload profiling results to S3 --- .github/workflows/vllm-profiling.yml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/.github/workflows/vllm-profiling.yml b/.github/workflows/vllm-profiling.yml index c9b68e7..3a50571 100644 --- a/.github/workflows/vllm-profiling.yml +++ b/.github/workflows/vllm-profiling.yml @@ -214,6 +214,25 @@ jobs: ) docker exec -t "${container_name}" bash -c "cd vllm-profiling && bash ../.github/scripts/run_vllm_profiling.sh" + - name: Prepare S3 upload metadata + id: prepare_s3_upload + env: + REPOSITORY: ${{ github.repository }} + run: | + set -eux + + UPLOAD_DATE=$(date -u +"%Y-%m-%d") + echo "upload-date=${UPLOAD_DATE}" >> "${GITHUB_OUTPUT}" + echo "s3-prefix=${REPOSITORY}/${UPLOAD_DATE}/${HEAD_SHA}" >> "${GITHUB_OUTPUT}" + + - name: Upload profiling results to S3 + uses: seemethere/upload-artifact-s3@v5 + retention-days: 180 + with: + s3-prefix: ${{ steps.prepare_s3_upload.outputs.s3-prefix }} + path: vllm-profiling/profiling-results + if-no-files-found: warn + - uses: actions/upload-artifact@v4 with: name: profiling-results--${{ env.DEVICE_TYPE }} From 837965c89af653e424f13ed4cc06b3f52a367263 Mon Sep 17 00:00:00 2001 From: Naman Lalit Date: Tue, 16 Sep 2025 10:43:53 -0700 Subject: [PATCH 2/6] fix --- .github/workflows/vllm-profiling.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/vllm-profiling.yml b/.github/workflows/vllm-profiling.yml index 3a50571..9ee2cff 100644 --- a/.github/workflows/vllm-profiling.yml +++ b/.github/workflows/vllm-profiling.yml @@ -227,9 +227,9 @@ jobs: - name: Upload profiling results to S3 uses: seemethere/upload-artifact-s3@v5 - retention-days: 180 with: s3-prefix: ${{ steps.prepare_s3_upload.outputs.s3-prefix }} + retention-days: 180 path: vllm-profiling/profiling-results if-no-files-found: warn From 2fd1df8921aed74a4f4c1f85c5d10fe63c5c7c01 Mon Sep 17 00:00:00 2001 From: Naman Lalit Date: Tue, 16 Sep 2025 11:37:42 -0700 Subject: [PATCH 3/6] fix repo name --- .github/workflows/vllm-profiling.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/vllm-profiling.yml b/.github/workflows/vllm-profiling.yml index 9ee2cff..a32b92a 100644 --- a/.github/workflows/vllm-profiling.yml +++ b/.github/workflows/vllm-profiling.yml @@ -217,7 +217,7 @@ jobs: - name: Prepare S3 upload metadata id: prepare_s3_upload env: - REPOSITORY: ${{ github.repository }} + REPOSITORY: vllm-project/vllm run: | set -eux From fb65db12559c2d87c514d1116ddb1cca20b1b15b Mon Sep 17 00:00:00 2001 From: Naman Lalit Date: Tue, 16 Sep 2025 12:37:27 -0700 Subject: [PATCH 4/6] add model level heirarchy --- .github/scripts/run_vllm_profiling.sh | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/.github/scripts/run_vllm_profiling.sh b/.github/scripts/run_vllm_profiling.sh index 6203e47..e6f6009 100644 --- a/.github/scripts/run_vllm_profiling.sh +++ b/.github/scripts/run_vllm_profiling.sh @@ -59,6 +59,12 @@ cleanup_server() { run_profiling_tests() { # run profiling tests using JSON configuration local profiling_test_file="$1" + local base_profiler_dir="${VLLM_TORCH_PROFILER_DIR:-}" + + if [[ -z "${base_profiler_dir}" ]]; then + echo "Error: VLLM_TORCH_PROFILER_DIR is not set." + exit 1 + fi if [[ ! -f "$profiling_test_file" ]]; then echo "Error: Profiling test file $profiling_test_file not found!" @@ -92,17 +98,27 @@ run_profiling_tests() { # Clean up any existing processes first kill_gpu_processes + # Create a profiling sub-directory for each test case to isolate the + # generated traces (e.g. using the model name hierarchy) + local sanitized_test_name="${TEST_NAME// /_}" + local test_name_directory="${base_profiler_dir}/${sanitized_test_name}" + mkdir -p "${test_name_directory}" + chmod 755 "${test_name_directory}" + + # Override the profiler output directory for this test only + export VLLM_TORCH_PROFILER_DIR="${test_name_directory}" + # Run the profiling test if start_vllm_server "$server_args"; then run_profiling "$client_args" cleanup_server # Debug: Check if profiling files were created - echo "DEBUG: Checking profiling directory: ${VLLM_TORCH_PROFILER_DIR}" - if [ -d "${VLLM_TORCH_PROFILER_DIR}" ]; then + echo "DEBUG: Checking profiling directory: $test_name_directory" + if [ -d "$test_name_directory" ]; then echo "DEBUG: Profiling directory exists for test $TEST_NAME" - ls -la "${VLLM_TORCH_PROFILER_DIR}" || echo "DEBUG: Directory is empty or inaccessible" - find "${VLLM_TORCH_PROFILER_DIR}" -type f 2>/dev/null | head -10 | while read file; do + ls -la "$test_name_directory" || echo "DEBUG: Directory is empty or inaccessible" + find "$test_name_directory" -type f 2>/dev/null | head -10 | while read file; do echo "DEBUG: Found profiling file: ${file}" done else @@ -115,6 +131,9 @@ run_profiling_tests() { continue fi done + + # Ensure the profiler directory is restored after processing all tests + export VLLM_TORCH_PROFILER_DIR="${base_profiler_dir}" } main() { From 8d45ce8ddc4e783879ca51b1adbedf9930dcc71a Mon Sep 17 00:00:00 2001 From: Naman Lalit Date: Tue, 16 Sep 2025 22:48:31 -0700 Subject: [PATCH 5/6] review comments --- .github/scripts/run_vllm_profiling.sh | 1 + .github/scripts/utilities.sh | 43 +++++++++++++++++++++++++++ .github/workflows/vllm-profiling.yml | 2 +- 3 files changed, 45 insertions(+), 1 deletion(-) diff --git a/.github/scripts/run_vllm_profiling.sh b/.github/scripts/run_vllm_profiling.sh index e6f6009..2b30e3f 100644 --- a/.github/scripts/run_vllm_profiling.sh +++ b/.github/scripts/run_vllm_profiling.sh @@ -120,6 +120,7 @@ run_profiling_tests() { ls -la "$test_name_directory" || echo "DEBUG: Directory is empty or inaccessible" find "$test_name_directory" -type f 2>/dev/null | head -10 | while read file; do echo "DEBUG: Found profiling file: ${file}" + rename_profiling_file "$file" "vllm" done else echo "DEBUG: Profiling directory does not exist for test $TEST_NAME!" diff --git a/.github/scripts/utilities.sh b/.github/scripts/utilities.sh index a4262e1..d8ca22e 100644 --- a/.github/scripts/utilities.sh +++ b/.github/scripts/utilities.sh @@ -133,3 +133,46 @@ check_hf_token() { echo "HF_TOKEN is set and valid." fi } + +rename_profiling_file() { + # Rename profiling files to standardized format + # $1: file path to rename + # $2: prefix name (e.g., "vllm", "sglang") + local file="$1" + local prefix_name="$2" + + # Process .pt.trace.json.gz files + if [[ "$file" == *.pt.trace.json.gz ]]; then + local dir_path=$(dirname "$file") + local basename_file=$(basename "$file") + + # Determine new filename based on content + local new_filename + if [[ "$basename_file" == *".async_llm."* ]]; then + new_filename="${prefix_name}.async_llm.pt.trace.json.gz" + else + new_filename="${prefix_name}.pt.trace.json.gz" + fi + + local new_filepath="${dir_path}/${new_filename}" + + # Only rename if the new filename is different + if [[ "$file" != "$new_filepath" ]]; then + echo "DEBUG: Renaming ${file} to ${new_filepath}" + mv "$file" "$new_filepath" + if [[ $? -eq 0 ]]; then + echo "DEBUG: Successfully renamed to ${new_filepath}" + return 0 + else + echo "DEBUG: Failed to rename ${file}" + return 1 + fi + else + echo "DEBUG: File ${file} already has correct name" + return 0 + fi + else + echo "DEBUG: Skipping non-profiling file: ${file}" + return 0 + fi +} diff --git a/.github/workflows/vllm-profiling.yml b/.github/workflows/vllm-profiling.yml index a32b92a..e366518 100644 --- a/.github/workflows/vllm-profiling.yml +++ b/.github/workflows/vllm-profiling.yml @@ -223,7 +223,7 @@ jobs: UPLOAD_DATE=$(date -u +"%Y-%m-%d") echo "upload-date=${UPLOAD_DATE}" >> "${GITHUB_OUTPUT}" - echo "s3-prefix=${REPOSITORY}/${UPLOAD_DATE}/${HEAD_SHA}" >> "${GITHUB_OUTPUT}" + echo "s3-prefix=${UPLOAD_DATE}/${REPOSITORY}/${HEAD_SHA}/${GITHUB_RUN_ID}/${GITHUB_JOB}" >> "${GITHUB_OUTPUT}" - name: Upload profiling results to S3 uses: seemethere/upload-artifact-s3@v5 From f51ee319f120403e96733acbfe90fa0a97c8bd63 Mon Sep 17 00:00:00 2001 From: Naman Lalit Date: Tue, 16 Sep 2025 23:09:29 -0700 Subject: [PATCH 6/6] rename test name --- vllm-profiling/cuda/profiling-tests.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm-profiling/cuda/profiling-tests.json b/vllm-profiling/cuda/profiling-tests.json index 1d44f31..f54d13f 100644 --- a/vllm-profiling/cuda/profiling-tests.json +++ b/vllm-profiling/cuda/profiling-tests.json @@ -1,6 +1,6 @@ [ { - "test_name": "profiling_opt_125m_tp1_random", + "test_name": "facebook_opt_125m_tp1_random", "server_parameters": { "model": "facebook/opt-125m", "swap_space": 16,