Skip to content

Commit adef44d

Browse files
authored
[HPU][BENCHMARK-CI] Fix Gaudi docker image check by using similiar codes as rest (#99)
* Fix /tmp/vllm-gaudi exists issue Signed-off-by: Chendi Xue <chendi.xue@intel.com> * remove special handling for HPU docker image tag Same fix is done in vllm-ci-infra as well Signed-off-by: Chendi Xue <chendi.xue@intel.com> --------- Signed-off-by: Chendi Xue <chendi.xue@intel.com>
1 parent b437ff8 commit adef44d

File tree

1 file changed

+19
-71
lines changed

1 file changed

+19
-71
lines changed

.github/workflows/vllm-benchmark.yml

Lines changed: 19 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ jobs:
128128
elif [[ "${DEVICE_NAME}" == "rocm" ]]; then
129129
DEVICE_TYPE=$(rocminfo | grep "Marketing Name" | tail -n1 | awk -F':' '{print $2}' | xargs)
130130
elif [[ "${DEVICE_NAME}" == "hpu" ]]; then
131-
DEVICE_TYPE=$(hl-smi -q | grep "Product Name" | head -n 1 | awk -F ':' '{print $2}' | sed 's/^ *//')
131+
DEVICE_TYPE="Intel Gaudi3 "$(hl-smi -q | grep "Product Name" | head -n 1 | awk -F ':' '{print $2}' | sed 's/^ *//')
132132
elif [[ "${DEVICE_NAME}" == "cpu" ]]; then
133133
DEVICE_TYPE=$(lscpu | grep 'Model name' | cut -f 2 -d ":" | awk '{$1=$1}1' | cut -f 2 -d " ")
134134
fi
@@ -202,77 +202,25 @@ jobs:
202202
203203
if [[ -z "${HEAD_SHA}" ]]; then
204204
pushd vllm
205-
206-
# Special handling for HPU: use vllm-gaudi compatible commit
207-
#
208-
# Problem: VLLM_STABLE_COMMIT might change between when CI builds the image
209-
# and when this benchmark runs (every 12 hours), causing image tag mismatches.
210-
#
211-
# Solution: Query git history of VLLM_STABLE_COMMIT file to find the most recent
212-
# compatible vLLM commit that has an actual Docker image built by CI.
213-
if [[ "${DEVICE_NAME}" == "hpu" ]]; then
214-
echo "HPU device detected - finding compatible vLLM commit from vllm-gaudi history"
215-
216-
# Clone only the last-good-commit-for-vllm-gaudi branch (lightweight, single file)
217-
git clone --depth 50 --single-branch --branch vllm/last-good-commit-for-vllm-gaudi \
218-
https://github.com/vllm-project/vllm-gaudi.git /tmp/vllm-gaudi
219-
pushd /tmp/vllm-gaudi
220-
221-
# Get the last 30 commits - each commit represents a VLLM_STABLE_COMMIT update
222-
# This gives us a history of compatible vLLM versions
223-
CANDIDATE_COMMITS=$(git log -30 --pretty=format:"%H")
224-
popd
225-
226-
# Try each candidate commit (newest to oldest) until we find an existing image
227-
FOUND_IMAGE=0
228-
for VLLM_GAUDI_COMMIT in ${CANDIDATE_COMMITS}; do
229-
# Get the vLLM commit from this version of the branch
230-
CANDIDATE_VLLM_COMMIT=$(curl -s "https://raw.githubusercontent.com/vllm-project/vllm-gaudi/${VLLM_GAUDI_COMMIT}/VLLM_STABLE_COMMIT" | tr -d '\n')
231-
232-
if [[ -z "${CANDIDATE_VLLM_COMMIT}" ]]; then
233-
continue
234-
fi
235-
236-
DOCKER_IMAGE="${DOCKER_IMAGE_PREFIX}:${CANDIDATE_VLLM_COMMIT}${DOCKER_IMAGE_SUFFIX}"
237-
echo "Checking if image exists: ${DOCKER_IMAGE}"
238-
239-
if docker manifest inspect "${DOCKER_IMAGE}" > /dev/null 2>&1; then
240-
echo "Found existing HPU image for vLLM commit: ${CANDIDATE_VLLM_COMMIT}"
241-
HEAD_SHA="${CANDIDATE_VLLM_COMMIT}"
242-
FOUND_IMAGE=1
243-
break
244-
fi
245-
done
246-
247-
if [[ ${FOUND_IMAGE} == 0 ]]; then
248-
echo "ERROR: No HPU Docker image found in the last 20 versions of VLLM_STABLE_COMMIT"
249-
echo "This likely means ci-infra hasn't successfully built any HPU images yet"
250-
exit 1
205+
# Looking back the latest 100 commits is enough
206+
for i in {0..99}
207+
do
208+
# Check if the image is there, if it doesn't then check an older one
209+
# because the commit is too recent
210+
HEAD_SHA=$(git rev-parse --verify HEAD~${i})
211+
DOCKER_IMAGE="${DOCKER_IMAGE_PREFIX}:${HEAD_SHA}${DOCKER_IMAGE_SUFFIX}"
212+
# No Docker image available yet because the commit is too recent
213+
if ! docker manifest inspect "${DOCKER_IMAGE}"; then
214+
continue
251215
fi
252-
else
253-
# For non-HPU devices: Looking back the latest 100 commits
254-
for i in {0..99}
255-
do
256-
# Check if the image is there, if it doesn't then check an older one
257-
# because the commit is too recent
258-
HEAD_SHA=$(git rev-parse --verify HEAD~${i})
259-
DOCKER_IMAGE="${DOCKER_IMAGE_PREFIX}:${HEAD_SHA}${DOCKER_IMAGE_SUFFIX}"
260-
261-
# No Docker image available yet because the commit is too recent
262-
if ! docker manifest inspect "${DOCKER_IMAGE}"; then
263-
continue
264-
fi
265-
266-
NOT_EXIST=0
267-
S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/${DEVICE_TYPE// /_}/benchmark_results_${MODELS//\//_}.json"
268-
aws s3api head-object --bucket ossci-benchmarks --key ${S3_PATH} || NOT_EXIST=1
269-
270-
if [[ ${NOT_EXIST} == "1" ]]; then
271-
echo "Found a vLLM commit ${HEAD_SHA} that hasn't been benchmarked yet"
272-
break
273-
fi
274-
done
275-
fi
216+
NOT_EXIST=0
217+
S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/${DEVICE_TYPE// /_}/benchmark_results_${MODELS//\//_}.json"
218+
aws s3api head-object --bucket ossci-benchmarks --key ${S3_PATH} || NOT_EXIST=1
219+
if [[ ${NOT_EXIST} == "1" ]]; then
220+
echo "Found a vLLM commit ${HEAD_SHA} that hasn't been benchmarked yet"
221+
break
222+
fi
223+
done
276224
popd
277225
fi
278226

0 commit comments

Comments
 (0)