# 06 — CuPy quick benchmark

This notebook demonstrates using `gpu-profile` with CuPy.

It requires CuPy installed for your CUDA version (e.g., `cupy-cuda12x`).


In [None]:
import time

try:
    import cupy as cp
except Exception as e:
    cp = None
    print('CuPy not available:', e)

from gpu_profile import gpu_profile


In [None]:
if cp is not None:
    sync_fn = cp.cuda.Stream.null.synchronize

    @gpu_profile(interval_s=0.1, sync_fn=sync_fn, warmup_s=0.2)
    def cupy_matmul(n=8192, steps=10):
        a = cp.random.randn(n, n, dtype=cp.float32)
        b = cp.random.randn(n, n, dtype=cp.float32)
        # warmup
        _ = a @ b
        sync_fn()

        t0 = time.perf_counter()
        for _ in range(steps):
            _ = a @ b
        sync_fn()
        dt = time.perf_counter() - t0
        print(f'matmul wall={dt:.3f}s')

    cupy_matmul()
