Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 15 additions & 20 deletions scripts/check-long-queue-intel.js
Original file line number Diff line number Diff line change
@@ -1,24 +1,19 @@
if (dd.response.statusCode !== 200) {
// We do not want to fail due to hud.pytorch.org API failure.
console.log('Status code is not 200, stopping execution');
dd.expect(true).to.equal(true);
}
else {
const MACHINE_TYPE_FILTER = '.idc.';
const jsonData = dd.response.body;
const parsedData = JSON.parse(jsonData);
dd.expect(dd.response.statusCode).to.equal(200);

const highQueueItems = parsedData
.filter(item => item.machine_type.includes(MACHINE_TYPE_FILTER) && item.avg_queue_s > 10800)
.map(item => ({ machine_type: item.machine_type, avg_queue_s: item.avg_queue_s }));
const MACHINE_TYPE_FILTER = '.idc.';
const jsonData = dd.response.body;
const parsedData = JSON.parse(jsonData);

if (highQueueItems.length > 0) {
const machineDetails = highQueueItems
.map(item => `${item.machine_type} (${item.avg_queue_s}s)`)
.join(', ');
const message = `High queue detected for machine types containing ${MACHINE_TYPE_FILTER}: ${machineDetails}`;
console.error(message);
}
const highQueueItems = parsedData
.filter(item => item.machine_type.includes(MACHINE_TYPE_FILTER) && item.avg_queue_s > 10800)
.map(item => ({ machine_type: item.machine_type, avg_queue_s: item.avg_queue_s }));

dd.expect(highQueueItems.length > 0).to.be.false;
if (highQueueItems.length > 0) {
const machineDetails = highQueueItems
.map(item => `${item.machine_type} (${item.avg_queue_s}s)`)
.join(', ');
const message = `High queue detected for machine types containing ${MACHINE_TYPE_FILTER}: ${machineDetails}`;
console.error(message);
}

dd.expect(highQueueItems.length > 0).to.be.false;
35 changes: 15 additions & 20 deletions scripts/check-long-queue-lf.js
Original file line number Diff line number Diff line change
@@ -1,24 +1,19 @@
if (dd.response.statusCode !== 200) {
// We do not want to fail due to hud.pytorch.org API failure.
console.log('Status code is not 200, stopping execution');
dd.expect(true).to.equal(true);
}
else {
const MACHINE_TYPE_FILTER = 'lf.';
const jsonData = dd.response.body;
const parsedData = JSON.parse(jsonData);
dd.expect(dd.response.statusCode).to.equal(200);

const highQueueItems = parsedData
.filter(item => item.machine_type.startsWith(MACHINE_TYPE_FILTER) && item.avg_queue_s > 10800)
.map(item => ({ machine_type: item.machine_type, avg_queue_s: item.avg_queue_s }));
const MACHINE_TYPE_FILTER = 'lf.';
const jsonData = dd.response.body;
const parsedData = JSON.parse(jsonData);

if (highQueueItems.length > 0) {
const machineDetails = highQueueItems
.map(item => `${item.machine_type} (${item.avg_queue_s}s)`)
.join(', ');
const message = `High queue detected for machine types containing ${MACHINE_TYPE_FILTER}: ${machineDetails}`;
console.error(message);
}
const highQueueItems = parsedData
.filter(item => item.machine_type.startsWith(MACHINE_TYPE_FILTER) && item.avg_queue_s > 10800)
.map(item => ({ machine_type: item.machine_type, avg_queue_s: item.avg_queue_s }));

dd.expect(highQueueItems.length > 0).to.be.false;
if (highQueueItems.length > 0) {
const machineDetails = highQueueItems
.map(item => `${item.machine_type} (${item.avg_queue_s}s)`)
.join(', ');
const message = `High queue detected for machine types containing ${MACHINE_TYPE_FILTER}: ${machineDetails}`;
console.error(message);
}

dd.expect(highQueueItems.length > 0).to.be.false;
35 changes: 15 additions & 20 deletions scripts/check-long-queue-meta-h100.js
Original file line number Diff line number Diff line change
@@ -1,24 +1,19 @@
if (dd.response.statusCode !== 200) {
// We do not want to fail due to hud.pytorch.org API failure.
console.log('Status code is not 200, stopping execution');
dd.expect(true).to.equal(true);
}
else {
const MACHINE_TYPE_FILTER = 'linux.aws.h100';
const jsonData = dd.response.body;
const parsedData = JSON.parse(jsonData);
dd.expect(dd.response.statusCode).to.equal(200);

const highQueueItems = parsedData
.filter(item => item.machine_type === MACHINE_TYPE_FILTER && item.avg_queue_s > 21600)
.map(item => ({ machine_type: item.machine_type, avg_queue_s: item.avg_queue_s }));
const MACHINE_TYPE_FILTER = 'linux.aws.h100';
const jsonData = dd.response.body;
const parsedData = JSON.parse(jsonData);

if (highQueueItems.length > 0) {
const machineDetails = highQueueItems
.map(item => `${item.machine_type} (${item.avg_queue_s}s)`)
.join(', ');
const message = `High queue detected for machine type ${MACHINE_TYPE_FILTER}: ${machineDetails}`;
console.error(message);
}
const highQueueItems = parsedData
.filter(item => item.machine_type === MACHINE_TYPE_FILTER && item.avg_queue_s > 21600)
.map(item => ({ machine_type: item.machine_type, avg_queue_s: item.avg_queue_s }));

dd.expect(highQueueItems.length > 0).to.be.false;
if (highQueueItems.length > 0) {
const machineDetails = highQueueItems
.map(item => `${item.machine_type} (${item.avg_queue_s}s)`)
.join(', ');
const message = `High queue detected for machine type ${MACHINE_TYPE_FILTER}: ${machineDetails}`;
console.error(message);
}

dd.expect(highQueueItems.length > 0).to.be.false;
48 changes: 21 additions & 27 deletions scripts/check-long-queue-meta.js
Original file line number Diff line number Diff line change
@@ -1,28 +1,22 @@
if (dd.response.statusCode !== 200) {
// We do not want to fail due to hud.pytorch.org API failure.
console.log('Status code is not 200, stopping execution');
dd.expect(true).to.equal(true);
}
else {
const EXCLUDED_MACHINE_PATTERNS = ['.dgx.', '.idc.', '.rocm.', '.s390x', '^lf\\.', '^linux.aws.h100'];
const jsonData = dd.response.body;
const parsedData = JSON.parse(jsonData);
const highQueueItems = parsedData
.filter(item => {
const machineType = item.machine_type;
return !EXCLUDED_MACHINE_PATTERNS.some(pattern =>
pattern.startsWith('^') ?
new RegExp(pattern).test(machineType) :
machineType.includes(pattern)
) && item.avg_queue_s > 10800;
})
.map(item => ({ machine_type: item.machine_type, avg_queue_s: item.avg_queue_s }));
if (highQueueItems.length > 0) {
const machineDetails = highQueueItems
.map(item => `${item.machine_type} (${item.avg_queue_s}s)`)
.join(', ');
const message = `High queue detected for machine types: ${machineDetails}`;
console.error(message);
}
dd.expect(highQueueItems.length > 0).to.be.false;
dd.expect(dd.response.statusCode).to.equal(200);
const EXCLUDED_MACHINE_PATTERNS = ['.dgx.', '.idc.', '.rocm.', '.s390x', '^lf\\.', '^linux.aws.h100'];
const jsonData = dd.response.body;
const parsedData = JSON.parse(jsonData);
const highQueueItems = parsedData
.filter(item => {
const machineType = item.machine_type;
return !EXCLUDED_MACHINE_PATTERNS.some(pattern =>
pattern.startsWith('^') ?
new RegExp(pattern).test(machineType) :
machineType.includes(pattern)
) && item.avg_queue_s > 10800;
})
.map(item => ({ machine_type: item.machine_type, avg_queue_s: item.avg_queue_s }));
if (highQueueItems.length > 0) {
const machineDetails = highQueueItems
.map(item => `${item.machine_type} (${item.avg_queue_s}s)`)
.join(', ');
const message = `High queue detected for machine types: ${machineDetails}`;
console.error(message);
}
dd.expect(highQueueItems.length > 0).to.be.false;
35 changes: 15 additions & 20 deletions scripts/check-long-queue-nvidia.js
Original file line number Diff line number Diff line change
@@ -1,24 +1,19 @@
if (dd.response.statusCode !== 200) {
// We do not want to fail due to hud.pytorch.org API failure.
console.log('Status code is not 200, stopping execution');
dd.expect(true).to.equal(true);
}
else {
const MACHINE_TYPE_FILTER = '.dgx.';
const jsonData = dd.response.body;
const parsedData = JSON.parse(jsonData);
dd.expect(dd.response.statusCode).to.equal(200);

const highQueueItems = parsedData
.filter(item => item.machine_type.includes(MACHINE_TYPE_FILTER) && item.avg_queue_s > 10800)
.map(item => ({ machine_type: item.machine_type, avg_queue_s: item.avg_queue_s }));
const MACHINE_TYPE_FILTER = '.dgx.';
const jsonData = dd.response.body;
const parsedData = JSON.parse(jsonData);

if (highQueueItems.length > 0) {
const machineDetails = highQueueItems
.map(item => `${item.machine_type} (${item.avg_queue_s}s)`)
.join(', ');
const message = `High queue detected for machine types containing ${MACHINE_TYPE_FILTER}: ${machineDetails}`;
console.error(message);
}
const highQueueItems = parsedData
.filter(item => item.machine_type.includes(MACHINE_TYPE_FILTER) && item.avg_queue_s > 10800)
.map(item => ({ machine_type: item.machine_type, avg_queue_s: item.avg_queue_s }));

dd.expect(highQueueItems.length > 0).to.be.false;
if (highQueueItems.length > 0) {
const machineDetails = highQueueItems
.map(item => `${item.machine_type} (${item.avg_queue_s}s)`)
.join(', ');
const message = `High queue detected for machine types containing ${MACHINE_TYPE_FILTER}: ${machineDetails}`;
console.error(message);
}

dd.expect(highQueueItems.length > 0).to.be.false;
35 changes: 15 additions & 20 deletions scripts/check-long-queue-rocm.js
Original file line number Diff line number Diff line change
@@ -1,24 +1,19 @@
if (dd.response.statusCode !== 200) {
// We do not want to fail due to hud.pytorch.org API failure.
console.log('Status code is not 200, stopping execution');
dd.expect(true).to.equal(true);
}
else {
const MACHINE_TYPE_FILTER = '.rocm.';
const jsonData = dd.response.body;
const parsedData = JSON.parse(jsonData);
dd.expect(dd.response.statusCode).to.equal(200);

const highQueueItems = parsedData
.filter(item => item.machine_type.includes(MACHINE_TYPE_FILTER) && item.avg_queue_s > 14400)
.map(item => ({ machine_type: item.machine_type, avg_queue_s: item.avg_queue_s }));
const MACHINE_TYPE_FILTER = '.rocm.';
const jsonData = dd.response.body;
const parsedData = JSON.parse(jsonData);

if (highQueueItems.length > 0) {
const machineDetails = highQueueItems
.map(item => `${item.machine_type} (${item.avg_queue_s}s)`)
.join(', ');
const message = `High queue detected for machine types containing ${MACHINE_TYPE_FILTER}: ${machineDetails}`;
console.error(message);
}
const highQueueItems = parsedData
.filter(item => item.machine_type.includes(MACHINE_TYPE_FILTER) && item.avg_queue_s > 14400)
.map(item => ({ machine_type: item.machine_type, avg_queue_s: item.avg_queue_s }));

dd.expect(highQueueItems.length > 0).to.be.false;
if (highQueueItems.length > 0) {
const machineDetails = highQueueItems
.map(item => `${item.machine_type} (${item.avg_queue_s}s)`)
.join(', ');
const message = `High queue detected for machine types containing ${MACHINE_TYPE_FILTER}: ${machineDetails}`;
console.error(message);
}

dd.expect(highQueueItems.length > 0).to.be.false;
35 changes: 15 additions & 20 deletions scripts/check-long-queue-s390x.js
Original file line number Diff line number Diff line change
@@ -1,24 +1,19 @@
if (dd.response.statusCode !== 200) {
// We do not want to fail due to hud.pytorch.org API failure.
console.log('Status code is not 200, stopping execution');
dd.expect(true).to.equal(true);
}
else {
const MACHINE_TYPE_FILTER = '.s390x';
const jsonData = dd.response.body;
const parsedData = JSON.parse(jsonData);
dd.expect(dd.response.statusCode).to.equal(200);

const highQueueItems = parsedData
.filter(item => item.machine_type.includes(MACHINE_TYPE_FILTER) && item.avg_queue_s > 10800)
.map(item => ({ machine_type: item.machine_type, avg_queue_s: item.avg_queue_s }));
const MACHINE_TYPE_FILTER = '.s390x';
const jsonData = dd.response.body;
const parsedData = JSON.parse(jsonData);

if (highQueueItems.length > 0) {
const machineDetails = highQueueItems
.map(item => `${item.machine_type} (${item.avg_queue_s}s)`)
.join(', ');
const message = `High queue detected for machine types containing ${MACHINE_TYPE_FILTER}: ${machineDetails}`;
console.error(message);
}
const highQueueItems = parsedData
.filter(item => item.machine_type.includes(MACHINE_TYPE_FILTER) && item.avg_queue_s > 10800)
.map(item => ({ machine_type: item.machine_type, avg_queue_s: item.avg_queue_s }));

dd.expect(highQueueItems.length > 0).to.be.false;
if (highQueueItems.length > 0) {
const machineDetails = highQueueItems
.map(item => `${item.machine_type} (${item.avg_queue_s}s)`)
.join(', ');
const message = `High queue detected for machine types containing ${MACHINE_TYPE_FILTER}: ${machineDetails}`;
console.error(message);
}

dd.expect(highQueueItems.length > 0).to.be.false;