# 01 â€” Decorator quickstart

This notebook demonstrates:

- wrapping a function with `@gpu_profile`
- the importance of `sync_fn` for CUDA-async frameworks (PyTorch)
- capturing the summary programmatically

If PyTorch/CUDA is not available, the GPU cells will be skipped.


In [None]:
from profgpu import gpu_profile


# The decorator prints a report by default.
@gpu_profile(interval_s=0.2, strict=False)
def do_work():
    # Replace this with real GPU work in your codebase.
    import time

    time.sleep(2)


do_work()

In [None]:
# PyTorch example (optional)

try:
    import torch
except Exception as e:
    torch = None
    print("torch not available:", e)

if torch is None or not torch.cuda.is_available():
    print("CUDA not available; skipping PyTorch demo.")
else:
    from profgpu import gpu_profile

    @gpu_profile(interval_s=0.1, sync_fn=torch.cuda.synchronize, warmup_s=0.2)
    def matmul_bench(n=4096, steps=30):
        a = torch.randn(n, n, device="cuda")
        b = torch.randn(n, n, device="cuda")
        for _ in range(steps):
            _ = a @ b

    matmul_bench()

In [None]:
# Get results back as a structured object

from profgpu import gpu_profile


@gpu_profile(report=False, return_profile=True, interval_s=0.2, strict=False)
def work_and_return_value():
    import time

    time.sleep(1)
    return {"ok": True}


res = work_and_return_value()
print("value:", res.value)
print("util mean:", res.gpu.util_gpu_mean)
print("p95:", res.gpu.util_gpu_p95)