In [1]:
pip install psutil GPUtil rich

Collecting GPUtil
  Downloading GPUtil-1.4.0.tar.gz (5.5 kB)
doneing metadata (setup.py) ... [?25l
Collecting rich
  Using cached rich-14.0.0-py3-none-any.whl.metadata (18 kB)
Collecting markdown-it-py>=2.2.0 (from rich)
  Using cached markdown_it_py-3.0.0-py3-none-any.whl.metadata (6.9 kB)
Collecting mdurl~=0.1 (from markdown-it-py>=2.2.0->rich)
  Using cached mdurl-0.1.2-py3-none-any.whl.metadata (1.6 kB)
Downloading rich-14.0.0-py3-none-any.whl (243 kB)
Downloading markdown_it_py-3.0.0-py3-none-any.whl (87 kB)
Downloading mdurl-0.1.2-py3-none-any.whl (10.0 kB)
Building wheels for collected packages: GPUtil
doneng wheel for GPUtil (setup.py) ... [?25l
[?25h  Created wheel for GPUtil: filename=GPUtil-1.4.0-py3-none-any.whl size=7410 sha256=1f33eb4bd56ca9549daf3213d7eb3911a99bed7b9cd3e68ec50cb6db2077d953
  Stored in directory: /root/.cache/pip/wheels/a9/8a/bd/81082387151853ab8b6b3ef33426e98f5cbfebc3c397a9d4d0
Successfully built GPUtil
Installing collected packages: GPUtil, mdurl, ma

In [None]:
import time
import psutil
import GPUtil
from rich.live import Live
from rich.panel import Panel
from rich.table import Table
from rich.progress import Progress, BarColumn, TextColumn

def format_size(bytes_value):
    return f"{bytes_value / 1024 ** 3:.1f} GB"

def make_bar(label: str, percent: float, usage_str: str, color: str = "green") -> Panel:
    progress = Progress(
        TextColumn(f"[bold]{label:<12}"),
        BarColumn(bar_width=40, complete_style=color),
        TextColumn("{task.percentage:>5.1f}%"),
        TextColumn(f"[white]{usage_str}"),
        expand=True,
    )
    task_id = progress.add_task(label, total=100)
    progress.update(task_id, completed=percent)
    return Panel(progress)

def get_color(percent):
    if percent > 90:
        return "red"
    elif percent > 70:
        return "yellow"
    else:
        return "green"

def get_stats():
    stats = []

    # CPU
    cpu_percent = psutil.cpu_percent()
    cpu_freq = psutil.cpu_freq().current
    stats.append(("CPU", cpu_percent, f"{cpu_freq:.0f} MHz", get_color(cpu_percent)))

    # RAM
    ram = psutil.virtual_memory()
    ram_percent = ram.percent
    ram_used = format_size(ram.used)
    ram_total = format_size(ram.total)
    stats.append(("RAM", ram_percent, f"{ram_used} / {ram_total}", get_color(ram_percent)))

    # Disk
    disk = psutil.disk_usage("/")
    disk_percent = disk.percent
    disk_used = format_size(disk.used)
    disk_total = format_size(disk.total)
    stats.append(("Disk", disk_percent, f"{disk_used} / {disk_total}", get_color(disk_percent)))

    # GPUs
    gpus = GPUtil.getGPUs()
    for gpu in gpus:
        gpu_load = gpu.load * 100
        gpu_mem = gpu.memoryUsed / gpu.memoryTotal * 100
        mem_str = f"{gpu.memoryUsed:.1f} MB / {gpu.memoryTotal:.1f} MB"

        stats.append((f"GPU{gpu.id} Load", gpu_load, "", get_color(gpu_load)))
        stats.append((f"GPU{gpu.id} Mem", gpu_mem, mem_str, get_color(gpu_mem)))

    return stats

with Live(refresh_per_second=1) as live:
    while True:
        stat_entries = get_stats()
        layout = Table.grid(padding=1)

        for name, percent, usage_str, color in stat_entries:
            layout.add_row(make_bar(name, percent, usage_str, color))

        live.update(Panel(layout, title="🖥️ System Resource Monitor (All GPUs)", border_style="bright_blue"))
        time.sleep(1)

Output()

In [None]:
OMP_NUM_THREADS=1 torchrun --standalone --nnodes=1 --nproc_per_node=8  GLM-4/finetune/finetune_vision.py  rstpreid_conv/  THUDM/glm-4v-9b  GLM-4/finetune/configs/lora.yaml 
