In [1]:
import subprocess
from memory_profiler import memory_usage
from torch.profiler import profile, ProfilerActivity

from cubes import get_cube

In [None]:
with profile(
    activities=[ProfilerActivity.CPU],
    profile_memory=True,
    record_shapes=True,
    # with_stack=True,
) as prof:
    cube = get_cube(20)
    solution = cube.solve(rtol=1e-5)

# Show memory allocation over time
# prof.export_memory_timeline("memory.html")

# Export trace to Chrome Trace format (this can be opened in chrome://tracing or https://ui.perfetto.dev/)
prof.export_chrome_trace("trace.json")

In [None]:
# add @profile decorator to function to be profiled
cube = get_cube(20)
solution = cube.solve(rtol=1e-5)

In [2]:
print("|  N  |     DOFs |  FWD Time |  BWD Time |   Peak RAM |")
print("| --- | -------- | --------- | --------- | ---------- |")

device = "cpu"
for N in [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]:
    mem_usage, result = memory_usage(
        lambda: subprocess.run(
            ["python", "cubes.py", "-N", str(N), "-device", device],
            capture_output=True,
            text=True,
        ),
        retval=True,
        include_children=True,
        max_usage=True,
        interval=0.1,
    )

    print(result.stdout.strip() + f" {mem_usage:8.1f}MB |")

|  N  |     DOFs |  FWD Time |  BWD Time |   Peak RAM |
| --- | -------- | --------- | --------- | ---------- |
|  10 |     3000 |     0.16s |     0.08s |    571.4MB |
|  20 |    24000 |     0.71s |     0.28s |    883.0MB |
|  30 |    81000 |     2.71s |     1.17s |   1453.3MB |
|  40 |   192000 |     7.57s |     3.73s |   2351.0MB |
|  50 |   375000 |    16.20s |     8.94s |   3919.8MB |
|  60 |   648000 |    27.59s |    18.82s |   4855.9MB |
|  70 |  1029000 |    48.86s |    40.32s |   6761.7MB |
|  80 |  1536000 |    85.89s |    68.61s |   7454.9MB |
|  90 |  2187000 |   131.45s |   110.14s |   8457.8MB |
| 100 |  3000000 |   193.44s |   162.16s |   9898.3MB |
