Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 65 additions & 30 deletions app/api/metrics/route.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { NextResponse } from "next/server";
import * as si from "systeminformation";
import { detectROCm } from "@/lib/system/rocm";
import { detectROCm, getAdvancedAmdGpuMetrics, isAmdSmiAvailable } from "@/lib/system/rocm";
import type { SystemMetrics } from "@/types/metrics";

export const dynamic = "force-dynamic";
Expand Down Expand Up @@ -138,40 +138,72 @@ interface GpuOutput {
}

async function getGpuMetrics(): Promise<{ gpus: GpuOutput[]; rocmDetected: boolean; rocmRuntimeVersion: string }> {
const amdSmiAvail = await isAmdSmiAvailable();

// First try ROCm detection
try {
const rocData = await detectROCm();
if (rocData.gpus && rocData.gpus.length > 0) {
// Fetch advanced metrics for each GPU if amd-smi is available
const enhancedGpus = await Promise.all(
rocData.gpus.map(async (gpu) => {
const baseGpu = {
index: gpu.index,
name: gpu.name,
marketingName: gpu.marketingName,
vendor: gpu.vendor,
usage: gpu.usage ?? 0,
memory: gpu.memory || { total: 0, used: 0 },
gttMemory: gpu.gttMemory,
temperature: gpu.temperature ?? null,
temperatureHotspot: gpu.temperatureHotspot ?? null,
temperatureMem: gpu.temperatureMem ?? null,
power: gpu.power ?? null,
driverVersion: gpu.driverVersion || "Unknown",
gfxVersion: gpu.gfxVersion,
deviceId: gpu.deviceId || "N/A",
computeUnits: gpu.computeUnits,
maxClockMHz: gpu.maxClockMHz,
currentClockMHz: gpu.currentClockMHz || 0,
memoryClockMHz: gpu.memoryClockMHz ?? null,
vbiosVersion: gpu.vbiosVersion,
pciBus: gpu.pciBus,
vramType: gpu.vramType,
vramBitWidth: gpu.vramBitWidth,
pcieWidth: gpu.pcieWidth ?? null,
pcieSpeed: gpu.pcieSpeed ?? null,
eccCorrectable: gpu.eccCorrectable ?? null,
eccUncorrectable: gpu.eccUncorrectable ?? null,
isThrottling: gpu.isThrottling ?? false,
};

// Add advanced metrics if amd-smi is available
if (amdSmiAvail) {
try {
const advancedMetrics = await getAdvancedAmdGpuMetrics(gpu.index);
return {
...baseGpu,
engineUtilization: advancedMetrics.engineMetrics,
thermal: advancedMetrics.thermalMetrics,
powerMetrics: advancedMetrics.powerMetrics,
clocks: advancedMetrics.clockMetrics,
pcieMetrics: advancedMetrics.pcieMetrics,
xgmiMetrics: advancedMetrics.xgmiMetrics,
mediaEngines: advancedMetrics.mediaMetrics,
eccMetrics: advancedMetrics.eccMetrics,
};
} catch {
// Advanced metrics failed, return base GPU
return baseGpu;
}
}

return baseGpu;
})
);

return {
gpus: rocData.gpus.map((gpu) => ({
index: gpu.index,
name: gpu.name,
marketingName: gpu.marketingName,
vendor: gpu.vendor,
usage: gpu.usage ?? 0,
memory: gpu.memory || { total: 0, used: 0 },
gttMemory: gpu.gttMemory,
temperature: gpu.temperature ?? null,
temperatureHotspot: gpu.temperatureHotspot ?? null,
temperatureMem: gpu.temperatureMem ?? null,
power: gpu.power ?? null,
driverVersion: gpu.driverVersion || "Unknown",
gfxVersion: gpu.gfxVersion,
deviceId: gpu.deviceId || "N/A",
computeUnits: gpu.computeUnits,
maxClockMHz: gpu.maxClockMHz,
currentClockMHz: gpu.currentClockMHz || 0,
memoryClockMHz: gpu.memoryClockMHz ?? null,
vbiosVersion: gpu.vbiosVersion,
pciBus: gpu.pciBus,
vramType: gpu.vramType,
vramBitWidth: gpu.vramBitWidth,
pcieWidth: gpu.pcieWidth ?? null,
pcieSpeed: gpu.pcieSpeed ?? null,
eccCorrectable: gpu.eccCorrectable ?? null,
eccUncorrectable: gpu.eccUncorrectable ?? null,
isThrottling: gpu.isThrottling ?? false,
})),
gpus: enhancedGpus,
rocmDetected: true,
rocmRuntimeVersion: rocData.runtimeVersion || "",
};
Expand Down Expand Up @@ -315,6 +347,8 @@ export async function GET(): Promise<NextResponse> {
arch: osInfo.arch || "Unknown",
};

const amdSmiAvail = await isAmdSmiAvailable();

const response: SystemMetrics = {
timestamp: Date.now(),
cpu: cpuMetrics,
Expand All @@ -325,6 +359,7 @@ export async function GET(): Promise<NextResponse> {
os: osMetrics,
rocmDetected: gpuData.rocmDetected,
rocmRuntimeVersion: gpuData.rocmRuntimeVersion,
amdSmiAvailable: amdSmiAvail,
};

return NextResponse.json(response, { headers: corsHeaders });
Expand Down
90 changes: 90 additions & 0 deletions lib/components/GpuEccPanel.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
"use client";

import type { GpuEcc } from "@/types/metrics";

interface GpuEccPanelProps {
ecc?: GpuEcc;
}

export default function GpuEccPanel({ ecc }: GpuEccPanelProps) {
if (!ecc) {
return (
<div className="p-4 rounded-lg border" style={{ background: "var(--surface-1)", borderColor: "var(--border)" }}>
<div className="flex items-center gap-2 mb-3">
<div className="p-2 rounded-md" style={{ background: "rgba(107, 114, 128, 0.2)" }}>
<svg className="w-5 h-5" style={{ color: "var(--muted-foreground)" }} fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 12l2 2 4-4m5.618-4.016A11.955 11.955 0 0112 2.944a11.955 11.955 0 01-8.618 3.04A12.02 12.02 0 003 9c0 5.591 3.824 10.29 9 11.622 5.176-1.332 9-6.03 9-11.622 0-1.042-.133-2.052-.382-3.016z" />
</svg>
</div>
<div>
<p className="text-[10px] uppercase tracking-wider" style={{ color: "var(--muted-foreground)" }}>
ECC Status
</p>
</div>
</div>
<p className="text-sm text-center" style={{ color: "var(--muted-foreground)" }}>
ECC data not available
</p>
</div>
);
}

const { correctable, uncorrectable } = ecc;
const hasErrors = correctable > 0 || uncorrectable > 0;

return (
<div className="p-4 rounded-lg border" style={{ background: "var(--surface-1)", borderColor: "var(--border)" }}>
<div className="flex items-center gap-2 mb-3">
<div className="p-2 rounded-md" style={{ background: hasErrors ? "rgba(239, 68, 68, 0.2)" : "rgba(34, 197, 94, 0.2)" }}>
<svg
className="w-5 h-5"
style={{ color: hasErrors ? "#ef4444" : "#22c55e" }}
fill="none"
viewBox="0 0 24 24"
stroke="currentColor"
>
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 12l2 2 4-4m5.618-4.016A11.955 11.955 0 0112 2.944a11.955 11.955 0 01-8.618 3.04A12.02 12.02 0 003 9c0 5.591 3.824 10.29 9 11.622 5.176-1.332 9-6.03 9-11.622 0-1.042-.133-2.052-.382-3.016z" />
</svg>
</div>
<div>
<p className="text-[10px] uppercase tracking-wider" style={{ color: "var(--muted-foreground)" }}>
ECC Status
</p>
{hasErrors ? (
<p className="text-sm font-semibold text-red-500">Errors Detected</p>
) : (
<p className="text-sm font-semibold text-green-500">No Errors</p>
)}
</div>
</div>

<div className="grid grid-cols-2 gap-3">
{/* Correctable Errors */}
<div className="p-3 rounded-lg" style={{ background: "var(--surface-2)" }}>
<div className="flex items-center justify-between">
<span className="text-xs" style={{ color: "var(--muted-foreground)" }}>Correctable</span>
<span
className="text-sm font-bold"
style={{ color: correctable > 0 ? "#eab308" : "var(--foreground)" }}
>
{correctable}
</span>
</div>
</div>

{/* Uncorrectable Errors */}
<div className="p-3 rounded-lg" style={{ background: "var(--surface-2)" }}>
<div className="flex items-center justify-between">
<span className="text-xs" style={{ color: "var(--muted-foreground)" }}>Uncorrectable</span>
<span
className="text-sm font-bold"
style={{ color: uncorrectable > 0 ? "#ef4444" : "var(--foreground)" }}
>
{uncorrectable}
</span>
</div>
</div>
</div>
</div>
);
}
101 changes: 101 additions & 0 deletions lib/components/GpuEnginesPanel.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
"use client";

import type { GpuEngineUtilization } from "@/types/metrics";

interface GpuEnginesPanelProps {
engines: GpuEngineUtilization;
}

export default function GpuEnginesPanel({ engines }: GpuEnginesPanelProps) {
const { gfx, mem, mm } = engines;

// Determine workload type based on engine utilization
const getWorkloadType = () => {
if (gfx > 70 && mem < 40) return { label: "Compute-Bound", color: "text-blue-400" };
if (mem > 70 && gfx < 40) return { label: "Memory-Bound", color: "text-yellow-400" };
if (mm > 50) return { label: "Video Workload", color: "text-purple-400" };
if (gfx > 70 && mem > 70) return { label: "Balanced Load", color: "text-green-400" };
return { label: "Light Load", color: "text-muted-foreground" };
};

const workload = getWorkloadType();

return (
<div className="p-4 rounded-lg border" style={{ background: "var(--surface-1)", borderColor: "var(--border)" }}>
<div className="flex items-center gap-2 mb-3">
<div className="p-2 rounded-md" style={{ background: "rgba(255, 77, 77, 0.2)" }}>
<svg className="w-5 h-5" style={{ color: "var(--primary)" }} fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 3v2m6-2v2M9 19v2m6-2v2M5 9H3m2 6H3m18-6h-2m2 6h-2M7 19h10a2 2 0 002-2V7a2 2 0 00-2-2H7a2 2 0 00-2 2v10a2 2 0 002 2zM9 9h6v6H9V9z" />
</svg>
</div>
<div>
<p className="text-[10px] uppercase tracking-wider" style={{ color: "var(--muted-foreground)" }}>
GPU Engines
</p>
<p className="text-sm font-semibold" style={{ color: "var(--foreground)" }}>
<span className={workload.color}>{workload.label}</span>
</p>
</div>
</div>

<div className="space-y-3">
{/* GFX Engine */}
<div>
<div className="flex items-center justify-between mb-1">
<span className="text-xs" style={{ color: "var(--muted-foreground)" }}>GFX (Compute)</span>
<span className="text-xs font-medium" style={{ color: gfx > 80 ? "#ef4444" : "var(--foreground)" }}>
{Math.round(gfx)}%
</span>
</div>
<div className="w-full h-2 bg-surface-2 rounded-full overflow-hidden">
<div
className="h-full rounded-full transition-all duration-300"
style={{
width: `${Math.min(gfx, 100)}%`,
background: gfx > 80 ? "#ef4444" : gfx > 50 ? "var(--primary)" : "#60a5fa",
}}
/>
</div>
</div>

{/* MEM Engine */}
<div>
<div className="flex items-center justify-between mb-1">
<span className="text-xs" style={{ color: "var(--muted-foreground)" }}>MEM (Memory)</span>
<span className="text-xs font-medium" style={{ color: mem > 80 ? "#ef4444" : "var(--foreground)" }}>
{Math.round(mem)}%
</span>
</div>
<div className="w-full h-2 bg-surface-2 rounded-full overflow-hidden">
<div
className="h-full rounded-full transition-all duration-300"
style={{
width: `${Math.min(mem, 100)}%`,
background: mem > 80 ? "#ef4444" : mem > 50 ? "#fbbf24" : "#60a5fa",
}}
/>
</div>
</div>

{/* MM Engine */}
<div>
<div className="flex items-center justify-between mb-1">
<span className="text-xs" style={{ color: "var(--muted-foreground)" }}>MM (Multimedia)</span>
<span className="text-xs font-medium" style={{ color: "var(--foreground)" }}>
{Math.round(mm)}%
</span>
</div>
<div className="w-full h-2 bg-surface-2 rounded-full overflow-hidden">
<div
className="h-full rounded-full transition-all duration-300"
style={{
width: `${Math.min(mm, 100)}%`,
background: mm > 50 ? "#a855f7" : "#60a5fa",
}}
/>
</div>
</div>
</div>
</div>
);
}
77 changes: 77 additions & 0 deletions lib/components/GpuMediaPanel.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
"use client";

import type { GpuMedia } from "@/types/metrics";

interface GpuMediaPanelProps {
media: GpuMedia;
}

export default function GpuMediaPanel({ media }: GpuMediaPanelProps) {
const { encoder, decoder } = media;

const formatPercent = (val: number | null) => {
if (val === null) return "N/A";
return `${Math.round(val)}%`;
};

return (
<div className="p-4 rounded-lg border" style={{ background: "var(--surface-1)", borderColor: "var(--border)" }}>
<div className="flex items-center gap-2 mb-3">
<div className="p-2 rounded-md" style={{ background: "rgba(168, 85, 247, 0.2)" }}>
<svg className="w-5 h-5" style={{ color: "#a855f7" }} fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M15 10l4.553-2.276A1 1 0 0121 8.618v6.764a1 1 0 01-1.447.894L15 14M5 18h8a2 2 0 002-2V8a2 2 0 00-2-2H5a2 2 0 00-2 2v8a2 2 0 002 2z" />
</svg>
</div>
<div>
<p className="text-[10px] uppercase tracking-wider" style={{ color: "var(--muted-foreground)" }}>
Media Engines
</p>
</div>
</div>

<div className="grid grid-cols-2 gap-3">
{/* Encoder */}
<div className="p-3 rounded-lg" style={{ background: "var(--surface-2)" }}>
<div className="flex items-center justify-between mb-2">
<span className="text-xs" style={{ color: "var(--muted-foreground)" }}>Encoder</span>
<span className="text-sm font-bold" style={{ color: "var(--foreground)" }}>
{formatPercent(encoder)}
</span>
</div>
{encoder !== null && (
<div className="w-full h-2 bg-surface-2 rounded-full overflow-hidden">
<div
className="h-full rounded-full transition-all duration-300"
style={{
width: `${Math.min(encoder, 100)}%`,
background: encoder > 70 ? "#a855f7" : "#60a5fa",
}}
/>
</div>
)}
</div>

{/* Decoder */}
<div className="p-3 rounded-lg" style={{ background: "var(--surface-2)" }}>
<div className="flex items-center justify-between mb-2">
<span className="text-xs" style={{ color: "var(--muted-foreground)" }}>Decoder</span>
<span className="text-sm font-bold" style={{ color: "var(--foreground)" }}>
{formatPercent(decoder)}
</span>
</div>
{decoder !== null && (
<div className="w-full h-2 bg-surface-2 rounded-full overflow-hidden">
<div
className="h-full rounded-full transition-all duration-300"
style={{
width: `${Math.min(decoder, 100)}%`,
background: decoder > 70 ? "#a855f7" : "#60a5fa",
}}
/>
</div>
)}
</div>
</div>
</div>
);
}
Loading
Loading