In [1]:
import torch
import time
import scipy
from memory_profiler import memory_usage
import cProfile
import pstats

from torchfem import Solid
from torchfem.materials import Isotropic

In [2]:
def get_cube(N):
    # Create nodes
    grid = torch.linspace(0, 1, N)
    x, y, z = torch.meshgrid(grid, grid, grid, indexing="ij")
    nodes = torch.vstack([x.ravel(), y.ravel(), z.ravel()]).T

    # Create elements
    indices = torch.arange(N**3).reshape((N, N, N))
    n0 = indices[:-1, :-1, :-1].ravel()
    n1 = indices[1:, :-1, :-1].ravel()
    n2 = indices[:-1, 1:, :-1].ravel()
    n3 = indices[1:, 1:, :-1].ravel()
    n4 = indices[:-1, :-1, 1:].ravel()
    n5 = indices[1:, :-1, 1:].ravel()
    n6 = indices[:-1, 1:, 1:].ravel()
    n7 = indices[1:, 1:, 1:].ravel()
    elements = torch.vstack([n0, n1, n3, n2, n4, n5, n7, n6]).T

    # Material model
    material = Isotropic(E=1000.0, nu=0.3)

    # Define cube
    cube = Solid(nodes, elements, material)

    # Assign boundary conditions
    cube.forces = torch.zeros_like(nodes, requires_grad=True)
    cube.constraints[nodes[:, 0] == 0.0, :] = True
    cube.constraints[nodes[:, 0] == 1.0, 0] = True
    cube.displacements[nodes[:, 0] == 1.0, 0] = 0.1

    return cube

### Memory analysis for cube of size 70

**Initializing Object:** 4000 MB with 3000 MB in indices and 800 MB in vectorized material

In [3]:
_ = get_cube(70).solve()

In [4]:
# cProfile
cProfile.run("get_cube(40).solve()", "stats")
p = pstats.Stats("stats")
p.sort_stats("tottime").print_stats(10)

Sun Oct 27 17:22:25 2024    stats

         26815 function calls (26711 primitive calls) in 7.613 seconds

   Ordered by: internal time
   List reduced from 274 to 10 due to restriction <10>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    2.512    2.512    2.512    2.512 {method 'coalesce' of 'torch._C.TensorBase' objects}
      196    1.992    0.010    1.992    0.010 {built-in method scipy.sparse._sparsetools.csr_matvec}
       96    1.840    0.019    1.840    0.019 {built-in method torch.einsum}
        2    0.270    0.135    2.412    1.206 /Users/meyernil/Code/torch-fem/src/torchfem/base.py:75(integrate)
        1    0.207    0.207    2.221    2.221 /Users/meyernil/miniforge3/envs/torchfem/lib/python3.10/site-packages/scipy/sparse/linalg/_isolve/minres.py:10(minres)
      198    0.161    0.001    0.161    0.001 {built-in method torch.stack}
        1    0.136    0.136    2.737    2.737 /Users/meyernil/Code/torch-fem/src/torchfem/base.py:117(asse

<pstats.Stats at 0x122163490>

In [5]:
results = {}
for N in [10, 20, 30, 40, 50, 60, 70, 80, 90]:
    print(f"Running N={N}")
    box = get_cube(N)
    dofs = box.n_dofs

    # Forward pass
    start_time = time.time()
    mem_usage, (u, f, sigma, epsilon, state) = memory_usage(
        lambda: box.solve(), retval=True, interval=0.1
    )
    end_time = time.time()
    fwd_mem_usage = max(mem_usage) - min(mem_usage)
    fwd_time = end_time - start_time
    print(f"  ... forward pass with {dofs} DOFs done in {fwd_time:.2f}s.")

    # Backward pass
    start_time = time.time()
    mem_usage = memory_usage(lambda: u.sum().backward(retain_graph=True), interval=0.1)
    end_time = time.time()
    bwd_mem_usage = max(mem_usage) - min(mem_usage)
    bwd_time = end_time - start_time
    print(f"  ... backward pass with {dofs} DOFs done in {bwd_time:.2f}.")

    results[N] = (
        dofs,
        fwd_time,
        fwd_mem_usage,
        bwd_time,
        bwd_mem_usage,
    )

Running N=10
  ... forward pass with 3000 DOFs done in 0.84s.
  ... backward pass with 3000 DOFs done in 0.66.
Running N=20
  ... forward pass with 24000 DOFs done in 5.92s.
  ... backward pass with 24000 DOFs done in 5.45.
Running N=30
  ... forward pass with 81000 DOFs done in 2.94s.
  ... backward pass with 81000 DOFs done in 1.58.
Running N=40
  ... forward pass with 192000 DOFs done in 7.86s.
  ... backward pass with 192000 DOFs done in 4.08.
Running N=50
  ... forward pass with 375000 DOFs done in 16.45s.
  ... backward pass with 375000 DOFs done in 9.30.
Running N=60
  ... forward pass with 648000 DOFs done in 31.62s.
  ... backward pass with 648000 DOFs done in 19.37.
Running N=70
  ... forward pass with 1029000 DOFs done in 56.33s.
  ... backward pass with 1029000 DOFs done in 34.10.
Running N=80
  ... forward pass with 1536000 DOFs done in 93.71s.
  ... backward pass with 1536000 DOFs done in 56.53.
Running N=90
  ... forward pass with 2187000 DOFs done in 146.70s.
  ... back

In [6]:
# Format results as a table
print("|  N  |    DOFs | FWD Time |  FWD Memory | BWD Time |  BWD Memory |")
print("| --- | ------- | -------- | ----------- | -------- | ----------- |")
for N, (dofs, fwd_t, fwd_mem, bwd_t, bwd_mem) in results.items():
    print(
        f"| {N:3d} | {dofs:7d} |"
        f" {fwd_t:7.2f}s |  {fwd_mem:7.2f} MB |"
        f" {bwd_t:7.2f}s |  {bwd_mem:7.2f} MB |"
    )


scipy.show_config()

|  N  |    DOFs | FWD Time |  FWD Memory | BWD Time |  BWD Memory |
| --- | ------- | -------- | ----------- | -------- | ----------- |
|  10 |    3000 |    0.84s |     0.53 MB |    0.66s |     0.12 MB |
|  20 |   24000 |    5.92s |   268.69 MB |    5.45s |   235.25 MB |
|  30 |   81000 |    2.94s |   670.89 MB |    1.58s |     0.62 MB |
|  40 |  192000 |    7.86s |  1681.77 MB |    4.08s |   350.62 MB |
|  50 |  375000 |   16.45s |  3056.41 MB |    9.30s |   834.12 MB |
|  60 |  648000 |   31.62s |  4049.66 MB |   19.37s |  1296.44 MB |
|  70 | 1029000 |   56.33s |  4495.06 MB |   34.10s |  2405.62 MB |
|  80 | 1536000 |   93.71s |  6787.83 MB |   56.53s |  3716.17 MB |
|  90 | 2187000 |  146.70s |  8282.39 MB |  109.08s |  6407.16 MB |
Build Dependencies:
  blas:
    detection method: extraframeworks
    found: true
    include directory: unknown
    lib directory: unknown
    name: accelerate
    openblas configuration: unknown
    pc file directory: unknown
    version: unknown
  l