# Analyze parallel performance

In this notebook we look at the performance of the parallel simulation versus the serial reference case. Before comparing things such as execution speed and memory consumption, make sure that all cases used in the study have been run successfully to completion and the outcomes are equivalent.

### Imports

In [None]:
# General
import pathlib as pl
import numpy as np
import matplotlib.pyplot as plt

# FloPy
from flopy.mf6.utils import MfSimulationList

# Local
from utilities import *

## Collect performance data

First collect some of the performance data from the simulation workspace directories and assemble them into a dictionary. The next step will then be plotting the results in an overview.

In [None]:
all_ws = get_all_workspaces()

# print inventory
print(f"ndomains, workspace")
for key, val in all_ws.items():
  print(f"{key}, {val}")

Now every process simulates part of the entire model and writes its own list file with timings, memory information, etc. to disk. Parallel simulations will therefore have multiple list files, following the pattern `mfsim.p*.lst` with the process id at the `*`. Here those numbers are reduced to a single number per simulation, by taking for example the maximum of run times of all processes (domains) or by summing all of their used memory.

In [None]:
nr_cpus = []

run_time = []
form_time = []
sol_time = []
nr_outers = []
nr_iters_total = []

for ndomains, wspace in all_ws.items():
  # list with simulation list file objects
  mfsim_lists = [MfSimulationList(lst) for lst in get_simulation_listfiles(wspace)]  

  # append nr. of processors
  nr_cpus.append(ndomains)

  # append max of run times
  run_time.append(
    np.max(
      [l.get_runtime(
        units="seconds", simulation_timer="elapsed"
      ) 
      for l in mfsim_lists]
    )
  )

   # append max of formulate times
  form_time.append(
    np.max(
      [l.get_runtime(
        units="seconds", simulation_timer="formulate"
      ) 
      for l in mfsim_lists]
    )
  )

  # append max of solution times
  sol_time.append(
    np.max(
      [l.get_runtime(
        units="seconds", simulation_timer="solution"
      ) 
      for l in mfsim_lists]
    )
  )

  # append total nr. of iters
  nr_outers.append(
    np.max(
      [l.get_outer_iterations()
      for l in mfsim_lists]
    )
  )

  # append total nr. of iters
  nr_iters_total.append(
    np.max(
      [l.get_total_iterations()
      for l in mfsim_lists]
    )
  )

In [None]:
axd = plt.figure(
    layout="constrained",
    figsize=(6, 6),
    ).subplot_mosaic(
        """
        ab
        cd
        """,
        empty_sentinel="X",
        sharex=True,
    )

timing_data = {}
timing_data["a"] = run_time
timing_data["b"] = nr_iters_total
timing_data["c"] = form_time
timing_data["d"] = sol_time

ylabels = {
    "a": "Runtime (sec)",
    "b": "Number of iterations",
    "c": "Formulation time (sec)",
    "d": "Solution time (sec)",
}

plotlabels = {
    "a": "Runtime",
    "b": "Inner iterations",
    "c": "Formulation time",
    "d": "Solution time",
}

for idx, key in enumerate(axd.keys()):
    ax = axd[key]
    p1 = ax.plot(
        nr_cpus,
        timing_data[key],
        lw=0.5,
        color="black",
        marker="o",
        ms=4,
        mfc="none",
        mec="black",
        label=plotlabels[key],
    )
    ax.set_ylabel(ylabels[key])
    if key in ("c", "d"):
        ax.set_xlabel("Number of CPUs")


