@@ -128,7 +128,7 @@ jobs:
128128 elif [[ "${DEVICE_NAME}" == "rocm" ]]; then
129129 DEVICE_TYPE=$(rocminfo | grep "Marketing Name" | tail -n1 | awk -F':' '{print $2}' | xargs)
130130 elif [[ "${DEVICE_NAME}" == "hpu" ]]; then
131- DEVICE_TYPE=$(hl-smi -q | grep "Product Name" | head -n 1 | awk -F ':' '{print $2}' | sed 's/^ *//')
131+ DEVICE_TYPE="Intel Gaudi3 " $(hl-smi -q | grep "Product Name" | head -n 1 | awk -F ':' '{print $2}' | sed 's/^ *//')
132132 elif [[ "${DEVICE_NAME}" == "cpu" ]]; then
133133 DEVICE_TYPE=$(lscpu | grep 'Model name' | cut -f 2 -d ":" | awk '{$1=$1}1' | cut -f 2 -d " ")
134134 fi
@@ -202,77 +202,25 @@ jobs:
202202
203203 if [[ -z "${HEAD_SHA}" ]]; then
204204 pushd vllm
205-
206- # Special handling for HPU: use vllm-gaudi compatible commit
207- #
208- # Problem: VLLM_STABLE_COMMIT might change between when CI builds the image
209- # and when this benchmark runs (every 12 hours), causing image tag mismatches.
210- #
211- # Solution: Query git history of VLLM_STABLE_COMMIT file to find the most recent
212- # compatible vLLM commit that has an actual Docker image built by CI.
213- if [[ "${DEVICE_NAME}" == "hpu" ]]; then
214- echo "HPU device detected - finding compatible vLLM commit from vllm-gaudi history"
215-
216- # Clone only the last-good-commit-for-vllm-gaudi branch (lightweight, single file)
217- git clone --depth 50 --single-branch --branch vllm/last-good-commit-for-vllm-gaudi \
218- https://github.com/vllm-project/vllm-gaudi.git /tmp/vllm-gaudi
219- pushd /tmp/vllm-gaudi
220-
221- # Get the last 30 commits - each commit represents a VLLM_STABLE_COMMIT update
222- # This gives us a history of compatible vLLM versions
223- CANDIDATE_COMMITS=$(git log -30 --pretty=format:"%H")
224- popd
225-
226- # Try each candidate commit (newest to oldest) until we find an existing image
227- FOUND_IMAGE=0
228- for VLLM_GAUDI_COMMIT in ${CANDIDATE_COMMITS}; do
229- # Get the vLLM commit from this version of the branch
230- CANDIDATE_VLLM_COMMIT=$(curl -s "https://raw.githubusercontent.com/vllm-project/vllm-gaudi/${VLLM_GAUDI_COMMIT}/VLLM_STABLE_COMMIT" | tr -d '\n')
231-
232- if [[ -z "${CANDIDATE_VLLM_COMMIT}" ]]; then
233- continue
234- fi
235-
236- DOCKER_IMAGE="${DOCKER_IMAGE_PREFIX}:${CANDIDATE_VLLM_COMMIT}${DOCKER_IMAGE_SUFFIX}"
237- echo "Checking if image exists: ${DOCKER_IMAGE}"
238-
239- if docker manifest inspect "${DOCKER_IMAGE}" > /dev/null 2>&1; then
240- echo "Found existing HPU image for vLLM commit: ${CANDIDATE_VLLM_COMMIT}"
241- HEAD_SHA="${CANDIDATE_VLLM_COMMIT}"
242- FOUND_IMAGE=1
243- break
244- fi
245- done
246-
247- if [[ ${FOUND_IMAGE} == 0 ]]; then
248- echo "ERROR: No HPU Docker image found in the last 20 versions of VLLM_STABLE_COMMIT"
249- echo "This likely means ci-infra hasn't successfully built any HPU images yet"
250- exit 1
205+ # Looking back the latest 100 commits is enough
206+ for i in {0..99}
207+ do
208+ # Check if the image is there, if it doesn't then check an older one
209+ # because the commit is too recent
210+ HEAD_SHA=$(git rev-parse --verify HEAD~${i})
211+ DOCKER_IMAGE="${DOCKER_IMAGE_PREFIX}:${HEAD_SHA}${DOCKER_IMAGE_SUFFIX}"
212+ # No Docker image available yet because the commit is too recent
213+ if ! docker manifest inspect "${DOCKER_IMAGE}"; then
214+ continue
251215 fi
252- else
253- # For non-HPU devices: Looking back the latest 100 commits
254- for i in {0..99}
255- do
256- # Check if the image is there, if it doesn't then check an older one
257- # because the commit is too recent
258- HEAD_SHA=$(git rev-parse --verify HEAD~${i})
259- DOCKER_IMAGE="${DOCKER_IMAGE_PREFIX}:${HEAD_SHA}${DOCKER_IMAGE_SUFFIX}"
260-
261- # No Docker image available yet because the commit is too recent
262- if ! docker manifest inspect "${DOCKER_IMAGE}"; then
263- continue
264- fi
265-
266- NOT_EXIST=0
267- S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/${DEVICE_TYPE// /_}/benchmark_results_${MODELS//\//_}.json"
268- aws s3api head-object --bucket ossci-benchmarks --key ${S3_PATH} || NOT_EXIST=1
269-
270- if [[ ${NOT_EXIST} == "1" ]]; then
271- echo "Found a vLLM commit ${HEAD_SHA} that hasn't been benchmarked yet"
272- break
273- fi
274- done
275- fi
216+ NOT_EXIST=0
217+ S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/${DEVICE_TYPE// /_}/benchmark_results_${MODELS//\//_}.json"
218+ aws s3api head-object --bucket ossci-benchmarks --key ${S3_PATH} || NOT_EXIST=1
219+ if [[ ${NOT_EXIST} == "1" ]]; then
220+ echo "Found a vLLM commit ${HEAD_SHA} that hasn't been benchmarked yet"
221+ break
222+ fi
223+ done
276224 popd
277225 fi
278226
0 commit comments