diff --git a/scripts/check-long-queue-intel.js b/scripts/check-long-queue-intel.js index 40cc5a6..ae44dc4 100644 --- a/scripts/check-long-queue-intel.js +++ b/scripts/check-long-queue-intel.js @@ -1,19 +1,24 @@ -dd.expect(dd.response.statusCode).to.equal(200); +if (dd.response.statusCode !== 200) { + // We do not want to fail due to hud.pytorch.org API failure. + console.log('Status code is not 200, stopping execution'); + dd.expect(true).to.equal(true); +} +else { + const MACHINE_TYPE_FILTER = '.idc.'; + const jsonData = dd.response.body; + const parsedData = JSON.parse(jsonData); -const MACHINE_TYPE_FILTER = '.idc.'; -const jsonData = dd.response.body; -const parsedData = JSON.parse(jsonData); + const highQueueItems = parsedData + .filter(item => item.machine_type.includes(MACHINE_TYPE_FILTER) && item.avg_queue_s > 10800) + .map(item => ({ machine_type: item.machine_type, avg_queue_s: item.avg_queue_s })); -const highQueueItems = parsedData - .filter(item => item.machine_type.includes(MACHINE_TYPE_FILTER) && item.avg_queue_s > 10800) - .map(item => ({ machine_type: item.machine_type, avg_queue_s: item.avg_queue_s })); + if (highQueueItems.length > 0) { + const machineDetails = highQueueItems + .map(item => `${item.machine_type} (${item.avg_queue_s}s)`) + .join(', '); + const message = `High queue detected for machine types containing ${MACHINE_TYPE_FILTER}: ${machineDetails}`; + console.error(message); + } -if (highQueueItems.length > 0) { - const machineDetails = highQueueItems - .map(item => `${item.machine_type} (${item.avg_queue_s}s)`) - .join(', '); - const message = `High queue detected for machine types containing ${MACHINE_TYPE_FILTER}: ${machineDetails}`; - console.error(message); + dd.expect(highQueueItems.length > 0).to.be.false; } - -dd.expect(highQueueItems.length > 0).to.be.false; diff --git a/scripts/check-long-queue-lf.js b/scripts/check-long-queue-lf.js index b090c29..682cde4 100644 --- a/scripts/check-long-queue-lf.js +++ b/scripts/check-long-queue-lf.js @@ -1,19 +1,24 @@ -dd.expect(dd.response.statusCode).to.equal(200); +if (dd.response.statusCode !== 200) { + // We do not want to fail due to hud.pytorch.org API failure. + console.log('Status code is not 200, stopping execution'); + dd.expect(true).to.equal(true); +} +else { + const MACHINE_TYPE_FILTER = 'lf.'; + const jsonData = dd.response.body; + const parsedData = JSON.parse(jsonData); -const MACHINE_TYPE_FILTER = 'lf.'; -const jsonData = dd.response.body; -const parsedData = JSON.parse(jsonData); + const highQueueItems = parsedData + .filter(item => item.machine_type.startsWith(MACHINE_TYPE_FILTER) && item.avg_queue_s > 10800) + .map(item => ({ machine_type: item.machine_type, avg_queue_s: item.avg_queue_s })); -const highQueueItems = parsedData - .filter(item => item.machine_type.startsWith(MACHINE_TYPE_FILTER) && item.avg_queue_s > 10800) - .map(item => ({ machine_type: item.machine_type, avg_queue_s: item.avg_queue_s })); + if (highQueueItems.length > 0) { + const machineDetails = highQueueItems + .map(item => `${item.machine_type} (${item.avg_queue_s}s)`) + .join(', '); + const message = `High queue detected for machine types containing ${MACHINE_TYPE_FILTER}: ${machineDetails}`; + console.error(message); + } -if (highQueueItems.length > 0) { - const machineDetails = highQueueItems - .map(item => `${item.machine_type} (${item.avg_queue_s}s)`) - .join(', '); - const message = `High queue detected for machine types containing ${MACHINE_TYPE_FILTER}: ${machineDetails}`; - console.error(message); + dd.expect(highQueueItems.length > 0).to.be.false; } - -dd.expect(highQueueItems.length > 0).to.be.false; diff --git a/scripts/check-long-queue-meta-h100.js b/scripts/check-long-queue-meta-h100.js index a284913..74f06b7 100644 --- a/scripts/check-long-queue-meta-h100.js +++ b/scripts/check-long-queue-meta-h100.js @@ -1,19 +1,24 @@ -dd.expect(dd.response.statusCode).to.equal(200); +if (dd.response.statusCode !== 200) { + // We do not want to fail due to hud.pytorch.org API failure. + console.log('Status code is not 200, stopping execution'); + dd.expect(true).to.equal(true); +} +else { + const MACHINE_TYPE_FILTER = 'linux.aws.h100'; + const jsonData = dd.response.body; + const parsedData = JSON.parse(jsonData); -const MACHINE_TYPE_FILTER = 'linux.aws.h100'; -const jsonData = dd.response.body; -const parsedData = JSON.parse(jsonData); + const highQueueItems = parsedData + .filter(item => item.machine_type === MACHINE_TYPE_FILTER && item.avg_queue_s > 21600) + .map(item => ({ machine_type: item.machine_type, avg_queue_s: item.avg_queue_s })); -const highQueueItems = parsedData - .filter(item => item.machine_type === MACHINE_TYPE_FILTER && item.avg_queue_s > 21600) - .map(item => ({ machine_type: item.machine_type, avg_queue_s: item.avg_queue_s })); + if (highQueueItems.length > 0) { + const machineDetails = highQueueItems + .map(item => `${item.machine_type} (${item.avg_queue_s}s)`) + .join(', '); + const message = `High queue detected for machine type ${MACHINE_TYPE_FILTER}: ${machineDetails}`; + console.error(message); + } -if (highQueueItems.length > 0) { - const machineDetails = highQueueItems - .map(item => `${item.machine_type} (${item.avg_queue_s}s)`) - .join(', '); - const message = `High queue detected for machine type ${MACHINE_TYPE_FILTER}: ${machineDetails}`; - console.error(message); + dd.expect(highQueueItems.length > 0).to.be.false; } - -dd.expect(highQueueItems.length > 0).to.be.false; diff --git a/scripts/check-long-queue-meta.js b/scripts/check-long-queue-meta.js index 6ee37a6..b1268d4 100644 --- a/scripts/check-long-queue-meta.js +++ b/scripts/check-long-queue-meta.js @@ -1,22 +1,28 @@ -dd.expect(dd.response.statusCode).to.equal(200); -const EXCLUDED_MACHINE_PATTERNS = ['.dgx.', '.idc.', '.rocm.', '.s390x', '^lf\\.', '^linux.aws.h100']; -const jsonData = dd.response.body; -const parsedData = JSON.parse(jsonData); -const highQueueItems = parsedData - .filter(item => { - const machineType = item.machine_type; - return !EXCLUDED_MACHINE_PATTERNS.some(pattern => - pattern.startsWith('^') ? - new RegExp(pattern).test(machineType) : - machineType.includes(pattern) - ) && item.avg_queue_s > 10800; - }) - .map(item => ({ machine_type: item.machine_type, avg_queue_s: item.avg_queue_s })); -if (highQueueItems.length > 0) { - const machineDetails = highQueueItems - .map(item => `${item.machine_type} (${item.avg_queue_s}s)`) - .join(', '); - const message = `High queue detected for machine types: ${machineDetails}`; - console.error(message); +if (dd.response.statusCode !== 200) { + // We do not want to fail due to hud.pytorch.org API failure. + console.log('Status code is not 200, stopping execution'); + dd.expect(true).to.equal(true); +} +else { + const EXCLUDED_MACHINE_PATTERNS = ['.dgx.', '.idc.', '.rocm.', '.s390x', '^lf\\.', '^linux.aws.h100']; + const jsonData = dd.response.body; + const parsedData = JSON.parse(jsonData); + const highQueueItems = parsedData + .filter(item => { + const machineType = item.machine_type; + return !EXCLUDED_MACHINE_PATTERNS.some(pattern => + pattern.startsWith('^') ? + new RegExp(pattern).test(machineType) : + machineType.includes(pattern) + ) && item.avg_queue_s > 10800; + }) + .map(item => ({ machine_type: item.machine_type, avg_queue_s: item.avg_queue_s })); + if (highQueueItems.length > 0) { + const machineDetails = highQueueItems + .map(item => `${item.machine_type} (${item.avg_queue_s}s)`) + .join(', '); + const message = `High queue detected for machine types: ${machineDetails}`; + console.error(message); + } + dd.expect(highQueueItems.length > 0).to.be.false; } -dd.expect(highQueueItems.length > 0).to.be.false; diff --git a/scripts/check-long-queue-nvidia.js b/scripts/check-long-queue-nvidia.js index 514e234..879fe43 100644 --- a/scripts/check-long-queue-nvidia.js +++ b/scripts/check-long-queue-nvidia.js @@ -1,19 +1,24 @@ -dd.expect(dd.response.statusCode).to.equal(200); +if (dd.response.statusCode !== 200) { + // We do not want to fail due to hud.pytorch.org API failure. + console.log('Status code is not 200, stopping execution'); + dd.expect(true).to.equal(true); +} +else { + const MACHINE_TYPE_FILTER = '.dgx.'; + const jsonData = dd.response.body; + const parsedData = JSON.parse(jsonData); -const MACHINE_TYPE_FILTER = '.dgx.'; -const jsonData = dd.response.body; -const parsedData = JSON.parse(jsonData); + const highQueueItems = parsedData + .filter(item => item.machine_type.includes(MACHINE_TYPE_FILTER) && item.avg_queue_s > 10800) + .map(item => ({ machine_type: item.machine_type, avg_queue_s: item.avg_queue_s })); -const highQueueItems = parsedData - .filter(item => item.machine_type.includes(MACHINE_TYPE_FILTER) && item.avg_queue_s > 10800) - .map(item => ({ machine_type: item.machine_type, avg_queue_s: item.avg_queue_s })); + if (highQueueItems.length > 0) { + const machineDetails = highQueueItems + .map(item => `${item.machine_type} (${item.avg_queue_s}s)`) + .join(', '); + const message = `High queue detected for machine types containing ${MACHINE_TYPE_FILTER}: ${machineDetails}`; + console.error(message); + } -if (highQueueItems.length > 0) { - const machineDetails = highQueueItems - .map(item => `${item.machine_type} (${item.avg_queue_s}s)`) - .join(', '); - const message = `High queue detected for machine types containing ${MACHINE_TYPE_FILTER}: ${machineDetails}`; - console.error(message); + dd.expect(highQueueItems.length > 0).to.be.false; } - -dd.expect(highQueueItems.length > 0).to.be.false; diff --git a/scripts/check-long-queue-rocm.js b/scripts/check-long-queue-rocm.js index d9c0e89..e63dde6 100644 --- a/scripts/check-long-queue-rocm.js +++ b/scripts/check-long-queue-rocm.js @@ -1,19 +1,24 @@ -dd.expect(dd.response.statusCode).to.equal(200); +if (dd.response.statusCode !== 200) { + // We do not want to fail due to hud.pytorch.org API failure. + console.log('Status code is not 200, stopping execution'); + dd.expect(true).to.equal(true); +} +else { + const MACHINE_TYPE_FILTER = '.rocm.'; + const jsonData = dd.response.body; + const parsedData = JSON.parse(jsonData); -const MACHINE_TYPE_FILTER = '.rocm.'; -const jsonData = dd.response.body; -const parsedData = JSON.parse(jsonData); + const highQueueItems = parsedData + .filter(item => item.machine_type.includes(MACHINE_TYPE_FILTER) && item.avg_queue_s > 14400) + .map(item => ({ machine_type: item.machine_type, avg_queue_s: item.avg_queue_s })); -const highQueueItems = parsedData - .filter(item => item.machine_type.includes(MACHINE_TYPE_FILTER) && item.avg_queue_s > 14400) - .map(item => ({ machine_type: item.machine_type, avg_queue_s: item.avg_queue_s })); + if (highQueueItems.length > 0) { + const machineDetails = highQueueItems + .map(item => `${item.machine_type} (${item.avg_queue_s}s)`) + .join(', '); + const message = `High queue detected for machine types containing ${MACHINE_TYPE_FILTER}: ${machineDetails}`; + console.error(message); + } -if (highQueueItems.length > 0) { - const machineDetails = highQueueItems - .map(item => `${item.machine_type} (${item.avg_queue_s}s)`) - .join(', '); - const message = `High queue detected for machine types containing ${MACHINE_TYPE_FILTER}: ${machineDetails}`; - console.error(message); + dd.expect(highQueueItems.length > 0).to.be.false; } - -dd.expect(highQueueItems.length > 0).to.be.false; diff --git a/scripts/check-long-queue-s390x.js b/scripts/check-long-queue-s390x.js index 8e4713b..6b38317 100644 --- a/scripts/check-long-queue-s390x.js +++ b/scripts/check-long-queue-s390x.js @@ -1,19 +1,24 @@ -dd.expect(dd.response.statusCode).to.equal(200); +if (dd.response.statusCode !== 200) { + // We do not want to fail due to hud.pytorch.org API failure. + console.log('Status code is not 200, stopping execution'); + dd.expect(true).to.equal(true); +} +else { + const MACHINE_TYPE_FILTER = '.s390x'; + const jsonData = dd.response.body; + const parsedData = JSON.parse(jsonData); -const MACHINE_TYPE_FILTER = '.s390x'; -const jsonData = dd.response.body; -const parsedData = JSON.parse(jsonData); + const highQueueItems = parsedData + .filter(item => item.machine_type.includes(MACHINE_TYPE_FILTER) && item.avg_queue_s > 10800) + .map(item => ({ machine_type: item.machine_type, avg_queue_s: item.avg_queue_s })); -const highQueueItems = parsedData - .filter(item => item.machine_type.includes(MACHINE_TYPE_FILTER) && item.avg_queue_s > 10800) - .map(item => ({ machine_type: item.machine_type, avg_queue_s: item.avg_queue_s })); + if (highQueueItems.length > 0) { + const machineDetails = highQueueItems + .map(item => `${item.machine_type} (${item.avg_queue_s}s)`) + .join(', '); + const message = `High queue detected for machine types containing ${MACHINE_TYPE_FILTER}: ${machineDetails}`; + console.error(message); + } -if (highQueueItems.length > 0) { - const machineDetails = highQueueItems - .map(item => `${item.machine_type} (${item.avg_queue_s}s)`) - .join(', '); - const message = `High queue detected for machine types containing ${MACHINE_TYPE_FILTER}: ${machineDetails}`; - console.error(message); + dd.expect(highQueueItems.length > 0).to.be.false; } - -dd.expect(highQueueItems.length > 0).to.be.false;