In [1]:
import cProfile
import pstats

import subprocess
from memory_profiler import memory_usage

from cubes import get_cube

In [2]:
print("|  N  |     DOFs |  FWD Time |  BWD Time |   Peak RAM |")
print("| --- | -------- | --------- | --------- | ---------- |")

device = "cpu"
for N in [10, 20, 30, 40, 50, 60, 70, 80, 90]:
    mem_usage, result = memory_usage(
        lambda: subprocess.run(
            ["python", "cubes.py", "-N", str(N), "-device", device],
            capture_output=True,
            text=True,
        ),
        retval=True,
        include_children=True,
        max_usage=True,
        interval=0.1,
    )

    print(result.stdout.strip() + f" {mem_usage:8.1f}MB |")

|  N  |     DOFs |  FWD Time |  BWD Time |   Peak RAM |
| --- | -------- | --------- | --------- | ---------- |
|  10 |     3000 |     0.24s |     0.15s |    567.7MB |
|  20 |    24000 |     0.74s |     0.25s |    965.7MB |
|  30 |    81000 |     2.63s |     1.18s |   1797.9MB |
|  40 |   192000 |     7.18s |     3.66s |   2814.2MB |
|  50 |   375000 |    15.60s |     9.12s |   3784.7MB |
|  60 |   648000 |    32.22s |    19.24s |   4368.8MB |
|  70 |  1029000 |    55.33s |    34.54s |   5903.4MB |
|  80 |  1536000 |    87.58s |    56.95s |   7321.9MB |
|  90 |  2187000 |   137.29s |   106.87s |   8855.2MB |


### Memory and speed analysis for cube of size 70

In [2]:
# add @profile decorator to function to be profiled
cube = get_cube(70)
solution = cube.solve(max_iter=1)

Filename: /Users/meyernil/Code/torch-fem/src/torchfem/base.py

Line #    Mem usage    Increment  Occurrences   Line Contents
   164   1980.1 MiB   1980.1 MiB           1       @profile
   165                                             def solve(
   166                                                 self,
   167                                                 increments: Tensor = torch.tensor([0.0, 1.0]),
   168                                                 max_iter: int = 10,
   169                                                 tol: float = 1e-4,
   170                                                 verbose: bool = False,
   171                                                 return_intermediate: bool = False,
   172                                                 aggregate_integration_points: bool = True,
   173                                             ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]:
   174                                                 """Solve the FEM 

In [3]:
cProfile
cProfile.run("get_cube(70).solve()", "stats")
p = pstats.Stats("stats")
p.sort_stats("tottime").print_stats(10)

Mon Nov  4 19:09:33 2024    stats

         48705 function calls (48274 primitive calls) in 50.813 seconds

   Ordered by: internal time
   List reduced from 472 to 10 due to restriction <10>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      317   18.207    0.057   18.207    0.057 {built-in method scipy.sparse._sparsetools.csr_matvec}
        1   15.898   15.898   15.898   15.898 {method 'coalesce' of 'torch._C.TensorBase' objects}
       96    9.260    0.096    9.260    0.096 {built-in method torch.einsum}
        1    2.459    2.459   20.850   20.850 /Users/meyernil/miniforge3/envs/torchfem/lib/python3.10/site-packages/scipy/sparse/linalg/_isolve/minres.py:10(minres)
        2    0.841    0.421   11.618    5.809 /Users/meyernil/Code/torch-fem/src/torchfem/base.py:76(integrate)
      198    0.819    0.004    0.819    0.004 {built-in method torch.stack}
        1    0.731    0.731   17.218   17.218 /Users/meyernil/Code/torch-fem/src/torchfem/base.py:128(ass

<pstats.Stats at 0x142350e80>