# Parallelisation Benchmark (Strong Scaling)

A benchmarking workflow to measure **parallel scaling efficiency** of the OpenFOAM
CFD solver on the target HPC cluster.

The experiment fixes **one terrain**, **one wind direction**, and **one mesh** and
submits the same simulation with a range of core counts.  The resulting wall-clock
times are used to compute:

- **Speedup** = T(baseline) / T(N)
- **Efficiency** = Speedup / N

where `T(baseline)` is the wall time at the smallest core count tested.

## How copying works — one transfer, not N

All variants share **exactly the same mesh**.  To avoid transferring the full
case directory once per core count, the workflow is structured as follows:

1. **Mesh once** — a single `BASE_CASE_DIR` is built locally.
2. **Copy once** — `BASE_CASE_DIR` is rsynced to the cluster once
   (`REMOTE_BASE_CASE_PATH`).
3. **Per-variant job scripts** — for each core count a small SLURM batch script
   is generated (< 1 KB) and uploaded to the cluster.  When the job starts,
   the script does a fast **local** `cp -r` on the cluster from the shared base
   to a variant subdirectory, writes the correct `decomposeParDict`, and then
   runs `decomposePar` + `simpleFoam -parallel`.  No large data transfer
   is repeated.

**Node arithmetic:** the cluster has **128 cores per node**, so:
```
nodes             = ceil(n_cores / 128)
ntasks_per_node   = min(n_cores, 128)
```

**Resume-safe:** Close and reopen at any time.  
All decisions are derived from `benchmark_metadata.json` files written into each
variant directory.

## 1. Configuration

Edit these settings before running the notebook.

In [None]:
import math
import os
import sys

# ── Paths ────────────────────────────────────────────────────────────────────
# Root of the CFD-dataset repository (directory containing this notebook)
REPO_ROOT = os.path.dirname(os.path.abspath("__file__"))

# Path to the single terrain directory to use for this benchmark
# (e.g. a folder produced by generateInputs.py under Data/downloads/)
FIXED_TERRAIN_DIR = os.path.join(REPO_ROOT, "Data", "downloads", "terrain_0001_example")

# Single wind direction to test (degrees, 0 = North)
FIXED_ROTATION_DEG = 270

# terrain_config.yaml — used as-is (no mesh modifications for this benchmark)
TERRAIN_CONFIG_PATH = os.path.join(REPO_ROOT, "terrain_config.yaml")

# Root output directory for all benchmark artefacts
CASES_OUTPUT_DIR = os.path.join(REPO_ROOT, "parallelisation_benchmark")

# ── Single base case (shared mesh) ───────────────────────────────────────────
# All variants share this one meshed case.  Only the decomposition and SLURM
# resource allocation differ between variants.
BASE_CASE_DIR = os.path.join(
    CASES_OUTPUT_DIR, f"base_case_{FIXED_ROTATION_DEG:03d}deg"
)

# ── HPC paths ─────────────────────────────────────────────────────────────────
# Hostname alias used for SSH / rsync (must be configured in ~/.ssh/config)
HPC_HOST = "deucalion"

# Remote root directory on the cluster
DEUCALION_PATH = "/projects/EEHPC-BEN-2026B02-011/cfd_data"

# Remote path where the single base case is copied (done ONCE)
REMOTE_BASE_CASE_PATH = (
    f"{DEUCALION_PATH}/parallelisation_benchmark/base_case_{FIXED_ROTATION_DEG:03d}deg"
)

# Remote root for per-variant directories created by the job scripts on the cluster
REMOTE_VARIANTS_ROOT = f"{DEUCALION_PATH}/parallelisation_benchmark"

# ── taskManager submodule ─────────────────────────────────────────────────────
TASK_MANAGER_DIR = os.path.join(REPO_ROOT, "taskManager")

# Number of parallel workers for local meshing
N_PARALLEL_WORKERS = 4

# ── Core-count configuration ──────────────────────────────────────────────────
# Number of physical cores per node on the cluster — do not change
CORES_PER_NODE = 128

# List of total core counts to benchmark (strong scaling series)
CORES_TO_TEST = [16, 32, 64, 128, 256, 512]

# SLURM settings common to all jobs
SLURM_PARTITION = "hpc"
SLURM_TIME      = "04:00:00"   # wall-clock time limit per job


def compute_slurm_resources(n_cores: int, cores_per_node: int = CORES_PER_NODE) -> tuple[int, int]:
    """
    Compute the SLURM --nodes and --ntasks-per-node values for a given
    total core count, assuming a fixed number of cores per node.

    Returns
    -------
    (nodes, ntasks_per_node)
    """
    nodes            = math.ceil(n_cores / cores_per_node)
    ntasks_per_node  = min(n_cores, cores_per_node)
    return nodes, ntasks_per_node


# ── Submodule path setup ──────────────────────────────────────────────────────
for _submod in ["terrain_following_mesh_generator", "ABL_BC_generator", TASK_MANAGER_DIR]:
    _p = _submod if os.path.isabs(_submod) else os.path.join(REPO_ROOT, _submod)
    if _p not in sys.path:
        sys.path.insert(0, _p)

print(f"REPO_ROOT              : {REPO_ROOT}")
print(f"FIXED_TERRAIN_DIR      : {FIXED_TERRAIN_DIR}")
print(f"FIXED_ROTATION_DEG     : {FIXED_ROTATION_DEG}")
print(f"TERRAIN_CONFIG_PATH    : {TERRAIN_CONFIG_PATH}")
print(f"CASES_OUTPUT_DIR       : {CASES_OUTPUT_DIR}")
print(f"BASE_CASE_DIR          : {BASE_CASE_DIR}")
print(f"REMOTE_BASE_CASE_PATH  : {REMOTE_BASE_CASE_PATH}")
print(f"REMOTE_VARIANTS_ROOT   : {REMOTE_VARIANTS_ROOT}")
print(f"CORES_PER_NODE         : {CORES_PER_NODE}")
print(f"CORES_TO_TEST          : {CORES_TO_TEST}")
print()
print("SLURM resource allocation per variant:")
for n in CORES_TO_TEST:
    nodes, ntpn = compute_slurm_resources(n)
    print(f"  {n:>4} cores → {nodes} node(s), {ntpn} tasks/node")


## 2. Imports

In [None]:
import json
import subprocess
from pathlib import Path
from datetime import datetime

import yaml
import pandas as pd

# ── terrain_following_mesh_generator (submodule) ──────────────────────────────
try:
    from terrain_following_mesh_generator import terrain_mesh as tm
    _MESH_OK = True
    print("✓ terrain_following_mesh_generator imported")
except ImportError as _e:
    _MESH_OK = False
    print(f"✗ terrain_following_mesh_generator not available: {_e}")
    print("  Run: git submodule update --init --recursive")

# ── taskManager (submodule) ───────────────────────────────────────────────────
try:
    from taskManager import OpenFOAMCaseGenerator
    _TM_OK = True
    print("✓ taskManager imported")
except ImportError as _e:
    _TM_OK = False
    OpenFOAMCaseGenerator = None
    print(f"✗ taskManager not available: {_e}")
    print("  Run: git submodule update --init --recursive")


## 3. Create Variant Metadata

For each core count in `CORES_TO_TEST`:
1. Compute `nodes` and `ntasks_per_node` via `compute_slurm_resources()`.
2. Create a lightweight variant directory `parallel_bench_{n_cores}cores_{rotation:03d}deg/`.
3. Write `benchmark_metadata.json` recording the variant parameters.

**No mesh data is duplicated here.** The mesh lives exclusively in `BASE_CASE_DIR`.
These directories later receive `system/decomposeParDict` and a SLURM run script
(both tiny), which are the only files uploaded to the cluster per variant.

Variants that already have a `benchmark_metadata.json` are **skipped** (resume-safe).

In [None]:
def _parallel_bench_dir(cases_output_dir: str, n_cores: int, rotation: int) -> Path:
    return Path(cases_output_dir) / f"parallel_bench_{n_cores}cores_{rotation:03d}deg"


# ── Ensure output root exists ─────────────────────────────────────────────────
Path(CASES_OUTPUT_DIR).mkdir(parents=True, exist_ok=True)

# ── Create variant directories and metadata ───────────────────────────────────
for n_cores in CORES_TO_TEST:
    nodes, ntasks_per_node = compute_slurm_resources(n_cores)
    var_dir   = _parallel_bench_dir(CASES_OUTPUT_DIR, n_cores, FIXED_ROTATION_DEG)
    meta_file = var_dir / "benchmark_metadata.json"

    # Resume check — skip if metadata already written
    if meta_file.exists():
        print(f"  ↷ {n_cores} cores: metadata already exists, skipping")
        continue

    var_dir.mkdir(parents=True, exist_ok=True)

    remote_variant_path = (
        f"{REMOTE_VARIANTS_ROOT}/parallel_bench_{n_cores}cores_{FIXED_ROTATION_DEG:03d}deg"
    )
    meta = {
        "n_cores"             : n_cores,
        "nodes"               : nodes,
        "ntasks_per_node"     : ntasks_per_node,
        "rotation"            : FIXED_ROTATION_DEG,
        "terrain_dir"         : str(FIXED_TERRAIN_DIR),
        "base_case_dir"       : str(BASE_CASE_DIR),
        "remote_base_case"    : REMOTE_BASE_CASE_PATH,
        "remote_variant_path" : remote_variant_path,
        "status"              : "pending",
        "created_at"          : datetime.now().isoformat(),
    }
    with open(meta_file, "w") as fh:
        json.dump(meta, fh, indent=2)
    print(f"  ✓ {n_cores} cores: created {var_dir.name}")

print("\nVariant directory setup complete.")
print(f"Shared base case (to be meshed once): {BASE_CASE_DIR}")


## 4. Status Scanner

Reads every `benchmark_metadata.json` found under `CASES_OUTPUT_DIR` and assembles a
pandas DataFrame.  **Re-run this cell at any time to refresh the view.**

In [None]:
def scan_parallel_bench_status(cases_output_dir: str) -> pd.DataFrame:
    """Scan variant directories and return a status DataFrame."""
    output_path = Path(cases_output_dir)
    records = []

    if not output_path.exists():
        print(f"⚠  Output directory does not exist yet: {cases_output_dir}")
        print("   Run Section 3 first to create the variant directories.")
    else:
        for meta_file in sorted(output_path.glob("parallel_bench_*/benchmark_metadata.json")):
            try:
                with open(meta_file) as fh:
                    meta = json.load(fh)
            except (json.JSONDecodeError, OSError) as exc:
                print(f"⚠  Could not read {meta_file}: {exc}")
                continue

            # Read per-variant submission status (written by Section 8)
            submit_status_file = meta_file.parent / "submit_status.json"
            submit_status      = {}
            if submit_status_file.exists():
                try:
                    with open(submit_status_file) as fh:
                        submit_status = json.load(fh)
                except (json.JSONDecodeError, OSError):
                    pass

            records.append({
                "n_cores"             : meta.get("n_cores"),
                "nodes"               : meta.get("nodes"),
                "ntasks_per_node"     : meta.get("ntasks_per_node"),
                "pipeline_status"     : meta.get("status"),
                "base_copied_to_hpc"  : meta.get("base_copied_to_hpc", False),
                "job_id"              : submit_status.get("job_id"),
                "job_status"          : submit_status.get("job_status"),
                "wall_time"           : submit_status.get("wall_time"),
                "last_checked"        : submit_status.get("last_checked"),
                "variant_dir"         : str(meta_file.parent),
                "remote_variant_path" : meta.get("remote_variant_path"),
            })

    # Also check base case mesh status
    base_meta_file = Path(BASE_CASE_DIR) / "benchmark_metadata.json"
    base_mesh_status = "not_built"
    if base_meta_file.exists():
        try:
            bm = json.load(open(base_meta_file))
            base_mesh_status = bm.get("mesh_status", "unknown")
        except Exception:
            pass
    print(f"Base case mesh status : {base_mesh_status}  ({BASE_CASE_DIR})")

    columns = [
        "n_cores", "nodes", "ntasks_per_node", "pipeline_status",
        "base_copied_to_hpc", "job_id", "job_status", "wall_time",
        "last_checked", "variant_dir", "remote_variant_path",
    ]
    df = pd.DataFrame(records, columns=columns) if records else pd.DataFrame(columns=columns)
    if not df.empty and "n_cores" in df.columns:
        df = df.sort_values("n_cores").reset_index(drop=True)
    return df


df_pbench = scan_parallel_bench_status(CASES_OUTPUT_DIR)
print(f"Scanned {len(df_pbench)} variant(s) at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")


## 5. Summary Dashboard

In [None]:
STATUS_ICONS = {
    "complete" : "★",
    "pending"  : "○",
    "failed"   : "✗",
    "running"  : "▶",
    "meshed"   : "✓",
    "meshing"  : "⟳",
}

total = len(df_pbench)
print(f"{'='*55}")
print(f"  PARALLELISATION BENCHMARK — STATUS SUMMARY")
print(f"  Terrain  : {Path(FIXED_TERRAIN_DIR).name}")
print(f"  Rotation : {FIXED_ROTATION_DEG}°")
print(f"  {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"{'='*55}")
print(f"  Total variants : {total}")
print()
if total > 0:
    counts = df_pbench["pipeline_status"].value_counts()
    for status, n in counts.items():
        icon = STATUS_ICONS.get(str(status), "?")
        print(f"  {icon}  {str(status):<16} : {n}")
print(f"{'='*55}")


In [None]:
# ── Full variant table ────────────────────────────────────────────────────────
if not df_pbench.empty:
    display(
        df_pbench[
            ["n_cores", "nodes", "ntasks_per_node", "pipeline_status",
             "base_copied_to_hpc", "job_id", "job_status", "wall_time", "last_checked"]
        ].reset_index(drop=True)
    )
else:
    print("No variants found. Run Section 3 first.")


## 6. Build the Shared Base Case (mesh once)

Generates and meshes **one** OpenFOAM case from the fixed terrain inputs.
This is the only case that is ever meshed — all core-count variants reuse it.

The base case is placed in `BASE_CASE_DIR` (configured in Section 1).
If it already has `mesh_status == DONE` the cell is a no-op (resume-safe).

> **Why only one case?**  
> The parallel decomposition in OpenFOAM is done at *runtime* by `decomposePar`;
> the underlying mesh is identical regardless of how many cores are used.  
> Meshing N copies would waste both local disk and HPC transfer time.

In [None]:
if not _TM_OK:
    print("✗ taskManager not available — cannot generate or mesh the base case.")
    print("  Run: git submodule update --init --recursive")
else:
    generator = OpenFOAMCaseGenerator(
        template_path=os.path.join(TASK_MANAGER_DIR, "template"),
        input_dir=str(FIXED_TERRAIN_DIR),
        output_dir=CASES_OUTPUT_DIR,
        deucalion_path=DEUCALION_PATH,
    )

    # ── Check whether the base case has already been meshed ───────────────────
    base_meta   = Path(BASE_CASE_DIR) / "benchmark_metadata.json"
    already_done = False
    if base_meta.exists():
        try:
            bm = json.load(open(base_meta))
            already_done = (bm.get("mesh_status", "").upper() == "DONE")
        except Exception:
            pass

    if already_done:
        print(f"↷ Base case already meshed — skipping.  ({BASE_CASE_DIR})")
    else:
        # ── Generate the case (creates directory + OpenFOAM structure) ──────────
        Path(BASE_CASE_DIR).mkdir(parents=True, exist_ok=True)
        print(f"Generating base case at {BASE_CASE_DIR} …")
        try:
            case_info = {
                "terrain_dir"      : str(FIXED_TERRAIN_DIR),
                "rotation_degree"  : FIXED_ROTATION_DEG,
                "output_dir"       : str(BASE_CASE_DIR),
            }
            output_path = generator.setup_case(case_info)
            print(f"  ✓ Base case created at {output_path}")
        except Exception as exc:
            print(f"  ✗ Case setup failed: {exc}")

        # ── Mesh the base case ────────────────────────────────────────────────
        print(f"Meshing base case with {N_PARALLEL_WORKERS} worker(s) …")
        results = generator.mesh_cases_parallel([str(BASE_CASE_DIR)], n_workers=N_PARALLEL_WORKERS)
        if results and results[0]:
            print("  ✓ Meshing succeeded.")
            # Record mesh_status in base case metadata
            bm = {"mesh_status": "DONE", "meshed_at": datetime.now().isoformat()}
            with open(base_meta, "w") as fh:
                json.dump(bm, fh, indent=2)
        else:
            print("  ✗ Meshing failed — check logs.")

    print("\nRe-run Section 4 to refresh the dashboard.")


## 7. Write Per-Variant `decomposeParDict`

Write (or overwrite) `system/decomposeParDict` in each lightweight variant directory.
The `scotch` method is used by default — it requires no geometric input and
typically produces well-balanced partitions.

These tiny files are later uploaded to the cluster alongside the per-variant
SLURM run script.  **The base case mesh data is never re-uploaded.**

In [None]:
DECOMPOSE_METHOD = "scotch"   # alternative: "simple" (requires coefficients)


def write_decompose_par_dict(case_dir: Path, n_subdomains: int, method: str = "scotch") -> Path:
    """
    Write (or overwrite) system/decomposeParDict for *case_dir*.
    Returns the path of the written file.
    """
    system_dir = case_dir / "system"
    system_dir.mkdir(parents=True, exist_ok=True)

    content = f"""/*--------------------------------*- C++ -*----------------------------------*\\\
\n  =========                 |
  \\\\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox
   \\\\    /   O peration     |
    \\\\  /    A nd           |
     \\\\/     M anipulation  |
\\*---------------------------------------------------------------------------*/
FoamFile
{{
    version     2.0;
    format      ascii;
    class       dictionary;
    location    \"system\";
    object      decomposeParDict;
}}
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //

numberOfSubdomains  {n_subdomains};

method              {method};

// ************************************************************************* //
"""
    out_path = system_dir / "decomposeParDict"
    out_path.write_text(content)
    return out_path


# ── Apply to every variant ────────────────────────────────────────────────────
df_pbench = scan_parallel_bench_status(CASES_OUTPUT_DIR)

for _, row in df_pbench.iterrows():
    n_cores = int(row["n_cores"])
    var_dir = Path(row["variant_dir"])

    out_path = write_decompose_par_dict(var_dir, n_subdomains=n_cores, method=DECOMPOSE_METHOD)
    print(f"  ✓ {n_cores:>4} cores → {out_path.relative_to(Path(CASES_OUTPUT_DIR))}")

# ── Show the generated file for the first variant as a sanity check ───────────
if not df_pbench.empty:
    sample_path = Path(df_pbench.iloc[0]["variant_dir"]) / "system" / "decomposeParDict"
    if sample_path.exists():
        print(f"\nSample decomposeParDict ({df_pbench.iloc[0]['n_cores']} cores):")
        print(sample_path.read_text())


## 8. Copy Base Case to HPC (one-time transfer)

Rsyncs the single meshed `BASE_CASE_DIR` to `REMOTE_BASE_CASE_PATH` on the cluster.

This transfer happens **once**.  Subsequent variants do **not** re-upload the mesh
data — their SLURM job scripts perform a fast `cp -r` from the remote base
directory to the variant directory entirely within the cluster filesystem.

The cell is **resume-safe**: if `base_copied_to_hpc` is already `true` in the
variant metadata files the rsync is skipped.  Set `FORCE_RECOPY = True` to
override and re-transfer.

In [None]:
FORCE_RECOPY = False   # set True to force re-transfer of the base case

# ── Check whether the base case has already been copied ──────────────────────
_base_already_copied = False
df_pbench = scan_parallel_bench_status(CASES_OUTPUT_DIR)
if not df_pbench.empty:
    _base_already_copied = bool(df_pbench["base_copied_to_hpc"].iloc[0])

if _base_already_copied and not FORCE_RECOPY:
    print("↷ Base case already flagged as copied to HPC — skipping rsync.")
    print("  Set FORCE_RECOPY = True to re-transfer.")
else:
    base_src = str(BASE_CASE_DIR).rstrip("/") + "/"
    remote_dest = f"{HPC_HOST}:{REMOTE_BASE_CASE_PATH}/"

    print(f"Copying base case to HPC (one-time transfer)")
    print(f"  Local  : {base_src}")
    print(f"  Remote : {remote_dest}")
    print()

    rsync_cmd = [
        "rsync", "-az", "--progress",
        "--exclude=processor*/",   # exclude any pre-existing decomposed data
        base_src,
        remote_dest,
    ]
    try:
        result = subprocess.run(rsync_cmd, check=True, capture_output=False, text=True)
        print("  ✓ Base case transferred successfully.")

        # ── Mark all variants as having the base copied ───────────────────────
        for meta_file in sorted(Path(CASES_OUTPUT_DIR).glob(
                "parallel_bench_*/benchmark_metadata.json")):
            try:
                with open(meta_file) as fh:
                    meta = json.load(fh)
                meta["base_copied_to_hpc"] = True
                meta["base_copied_at"]     = datetime.now().isoformat()
                with open(meta_file, "w") as fh:
                    json.dump(meta, fh, indent=2)
            except Exception as exc:
                print(f"  ⚠  Could not update metadata for {meta_file.parent.name}: {exc}")

        print("  Variant metadata updated (base_copied_to_hpc = true).")
    except subprocess.CalledProcessError as exc:
        print(f"  ✗ rsync failed (exit code {exc.returncode}).")
        print("  Check that the 'deucalion' host is reachable and ~/.ssh/config is set up.")
    except FileNotFoundError:
        print("  ✗ rsync not found on PATH.")
        print("  Install rsync or manually copy BASE_CASE_DIR to the cluster before submitting.")

print("\nRe-run Section 4 to refresh the dashboard.")


## 9. Submit Per-Variant Jobs to HPC

For each core-count variant this cell:
1. Generates a **self-contained SLURM batch script** locally (< 1 KB).
2. Uploads that script **and** the matching `decomposeParDict` to the cluster
   (two tiny files, no mesh data).
3. Submits the script via `sbatch`.

When the job starts on the cluster the script:
- Does a fast **local** `cp -r <remote_base> <remote_variant>` inside the
  cluster filesystem (no network transfer).
- Overwrites `system/decomposeParDict` with the variant-specific file.
- Runs `decomposePar` followed by `mpirun simpleFoam -parallel`.

**Pre-requisites:** The base case must have been copied (Section 8).
Variants that already have a `job_id` in `submit_status.json` are skipped
(resume-safe).

In [None]:
def _generate_slurm_script(
    n_cores: int,
    nodes: int,
    ntasks_per_node: int,
    remote_base: str,
    remote_variant: str,
    decompose_dict_content: str,
    partition: str,
    time_limit: str,
    job_name: str,
) -> str:
    """
    Return a SLURM batch script string.

    The script copies the pre-uploaded base case to a variant directory on the
    cluster (fast local cp, no network transfer), installs the correct
    decomposeParDict, decomposes, and runs the solver in parallel.
    """
    # Escape the decomposeParDict content for the heredoc
    dict_escaped = decompose_dict_content.replace("'", "'\"'\"'")
    return f"""#!/bin/bash
#SBATCH --job-name={job_name}
#SBATCH --nodes={nodes}
#SBATCH --ntasks-per-node={ntasks_per_node}
#SBATCH --partition={partition}
#SBATCH --time={time_limit}
#SBATCH --output={remote_variant}/slurm_%j.out
#SBATCH --error={remote_variant}/slurm_%j.err

set -euo pipefail

BASE_DIR=\"{remote_base}\"
VARIANT_DIR=\"{remote_variant}\"

# ── Fast local copy of the shared base case (no network transfer) ──────────
if [ ! -d \"$VARIANT_DIR/constant\" ]; then
    echo \"Copying base case to variant directory…\"
    cp -r \"$BASE_DIR/.\" \"$VARIANT_DIR/\"
fi

# ── Install the correct decomposeParDict ────────────────────────────────────
mkdir -p \"$VARIANT_DIR/system\"
cat > \"$VARIANT_DIR/system/decomposeParDict\" << 'DICTEOF'
{dict_escaped}
DICTEOF

# ── Decompose and run ───────────────────────────────────────────────────────
cd \"$VARIANT_DIR\"
decomposePar -force > log.decomposePar 2>&1
mpirun -np {n_cores} simpleFoam -parallel > log.simpleFoam 2>&1
"""


df_pbench = scan_parallel_bench_status(CASES_OUTPUT_DIR)

# Only submit variants whose base case has been copied and that have no job yet
ready = df_pbench[
    (df_pbench["base_copied_to_hpc"] == True) &
    (df_pbench["job_id"].isna())
]

if ready.empty:
    print("No variants ready for submission.")
    print("Possible reasons:")
    print("  • Base case has not been copied yet — run Section 8.")
    print("  • All variants have already been submitted.")
else:
    print(f"Submitting {len(ready)} variant(s) to SLURM…")

    for _, row in ready.iterrows():
        n_cores         = int(row["n_cores"])
        nodes           = int(row["nodes"])
        ntasks_per_node = int(row["ntasks_per_node"])
        var_dir         = Path(row["variant_dir"])
        remote_variant  = row["remote_variant_path"]
        job_name        = f"pbench_{n_cores}c_{FIXED_ROTATION_DEG}deg"

        # ── Read the decomposeParDict written in Section 7 ────────────────────
        decomp_file = var_dir / "system" / "decomposeParDict"
        if not decomp_file.exists():
            print(f"  ✗ {n_cores} cores: decomposeParDict not found — run Section 7 first.")
            continue
        decompose_dict_content = decomp_file.read_text()

        # ── Generate SLURM script ─────────────────────────────────────────────
        slurm_script = _generate_slurm_script(
            n_cores=n_cores,
            nodes=nodes,
            ntasks_per_node=ntasks_per_node,
            remote_base=REMOTE_BASE_CASE_PATH,
            remote_variant=remote_variant,
            decompose_dict_content=decompose_dict_content,
            partition=SLURM_PARTITION,
            time_limit=SLURM_TIME,
            job_name=job_name,
        )

        # ── Write SLURM script locally ─────────────────────────────────────────
        local_script = var_dir / "run.sh"
        local_script.write_text(slurm_script)

        # ── Upload decomposeParDict + run script (two tiny files) ─────────────
        remote_system_dir = f"{remote_variant}/system"
        try:
            # Ensure remote variant + system directories exist
            subprocess.run(
                ["ssh", HPC_HOST, f"mkdir -p {remote_system_dir}"],
                check=True, capture_output=True, text=True,
            )
            # Upload decomposeParDict
            subprocess.run(
                ["rsync", "-az", str(decomp_file),
                 f"{HPC_HOST}:{remote_system_dir}/decomposeParDict"],
                check=True, capture_output=True, text=True,
            )
            # Upload run script
            subprocess.run(
                ["rsync", "-az", str(local_script),
                 f"{HPC_HOST}:{remote_variant}/run.sh"],
                check=True, capture_output=True, text=True,
            )

            # ── sbatch ────────────────────────────────────────────────────────
            sbatch_result = subprocess.run(
                ["ssh", HPC_HOST, f"sbatch {remote_variant}/run.sh"],
                check=True, capture_output=True, text=True,
            )
            # Parse job ID from "Submitted batch job XXXXXX"
            job_id = None
            for tok in sbatch_result.stdout.split():
                if tok.isdigit():
                    job_id = tok
                    break

            # ── Persist submission status ──────────────────────────────────────
            submit_status = {
                "job_id"       : job_id,
                "job_status"   : "PENDING",
                "submitted_at" : datetime.now().isoformat(),
                "wall_time"    : None,
                "last_checked" : datetime.now().isoformat(),
            }
            with open(var_dir / "submit_status.json", "w") as fh:
                json.dump(submit_status, fh, indent=2)

            print(f"  ✓ {n_cores:>4} cores ({nodes}×{ntasks_per_node}): "
                  f"job_id={job_id}  (uploaded: decomposeParDict + run.sh only)")

        except subprocess.CalledProcessError as exc:
            err = exc.stderr.strip() if exc.stderr else str(exc)
            print(f"  ✗ {n_cores:>4} cores: submission failed: {err}")
        except FileNotFoundError as exc:
            print(f"  ✗ {n_cores:>4} cores: command not found ({exc})")
            print("    Make sure rsync and ssh are available and the host alias is configured.")
            break  # no point continuing if rsync/ssh are missing

    print("\nSubmission complete. Re-run Section 4 to refresh the dashboard.")


## 10. Refresh Job Statuses

Polls SLURM (`sacct`) on the cluster for every submitted variant and writes the
updated status (including elapsed wall time) back to `submit_status.json`.
Requires SSH access to the `deucalion` host.

In [None]:
df_pbench = scan_parallel_bench_status(CASES_OUTPUT_DIR)
submitted = df_pbench[df_pbench["job_id"].notna()]

if submitted.empty:
    print("No submitted jobs to refresh.")
else:
    print(f"Refreshing {len(submitted)} job status(es) via sacct…")
    for _, row in submitted.iterrows():
        job_id  = str(row["job_id"])
        n_cores = int(row["n_cores"])
        var_dir = Path(row["variant_dir"])

        try:
            result = subprocess.run(
                [
                    "ssh", HPC_HOST,
                    f"sacct -j {job_id} --format=State,Elapsed --noheader --parsable2",
                ],
                capture_output=True, text=True, check=True,
            )
            lines = [l for l in result.stdout.strip().splitlines() if l]
            job_status = wall_time = None
            if lines:
                parts      = lines[0].split("|")
                job_status = parts[0].strip() if parts else None
                wall_time  = parts[1].strip() if len(parts) > 1 else None

            submit_status_file = var_dir / "submit_status.json"
            try:
                with open(submit_status_file) as fh:
                    ss = json.load(fh)
            except Exception:
                ss = {}
            ss["job_status"]   = job_status
            ss["wall_time"]    = wall_time
            ss["last_checked"] = datetime.now().isoformat()
            with open(submit_status_file, "w") as fh:
                json.dump(ss, fh, indent=2)

            icon = {"COMPLETED": "★", "RUNNING": "▶", "PENDING": "○",
                    "FAILED": "✗", "CANCELLED": "✗"}.get(str(job_status), "?")
            print(f"  {icon} {n_cores:>4} cores (job {job_id}): {job_status}  elapsed={wall_time}")

        except subprocess.CalledProcessError as exc:
            print(f"  ✗ {n_cores:>4} cores: sacct query failed: {exc.stderr.strip()}")
        except FileNotFoundError:
            print("  ✗ ssh not found — cannot poll SLURM.")
            break

    print("\nJob statuses updated. Re-run Section 4 to refresh the dashboard.")


## 11. Results: Scaling Analysis

Parse wall-clock times from `submit_status.json` or `log.simpleFoam` (fetched
from the cluster) and compute the classic strong-scaling metrics:

| Metric | Formula |
|---|---|
| Speedup | T(N_baseline) / T(N) |
| Efficiency | Speedup / N × 100 % |

Re-run this cell after all jobs have reached `COMPLETED` status.

In [None]:
import re

try:
    import matplotlib.pyplot as plt
    _PLT_OK = True
except ImportError:
    _PLT_OK = False
    print("⚠  matplotlib not available — plots will be skipped.")


def _parse_solver_time_remote(n_cores: int, remote_variant: str) -> float | None:
    """
    Fetch the final ExecutionTime from log.simpleFoam on the cluster via SSH.
    Returns elapsed seconds or None if unavailable.
    """
    try:
        result = subprocess.run(
            ["ssh", HPC_HOST,
             f"grep 'ExecutionTime' {remote_variant}/log.simpleFoam 2>/dev/null | tail -1"],
            capture_output=True, text=True, check=False,
        )
        m = re.search(r"ExecutionTime\s*=\s*([0-9.]+)\s*s", result.stdout)
        return float(m.group(1)) if m else None
    except Exception:
        return None


# ── Build results table ───────────────────────────────────────────────────────
df_pbench = scan_parallel_bench_status(CASES_OUTPUT_DIR)

results = []
for _, row in df_pbench.iterrows():
    n_cores        = int(row["n_cores"])
    remote_variant = row.get("remote_variant_path") or ""
    # Prefer wall_time from submit_status.json (updated by Section 10)
    wall_time_str  = row.get("wall_time")
    wall_time_s    = None
    if wall_time_str and isinstance(wall_time_str, str) and ":" in wall_time_str:
        # sacct format HH:MM:SS or MM:SS
        parts = wall_time_str.split(":")
        try:
            if len(parts) == 3:
                wall_time_s = int(parts[0])*3600 + int(parts[1])*60 + float(parts[2])
            elif len(parts) == 2:
                wall_time_s = int(parts[0])*60 + float(parts[1])
        except ValueError:
            pass
    if wall_time_s is None and remote_variant:
        # Fall back to parsing log.simpleFoam on the cluster
        wall_time_s = _parse_solver_time_remote(n_cores, remote_variant)
    results.append({
        "n_cores"    : n_cores,
        "nodes"      : row["nodes"],
        "job_status" : row["job_status"],
        "wall_time_s": wall_time_s,
    })

df_results = pd.DataFrame(results).sort_values("n_cores").reset_index(drop=True)

# ── Compute speedup and efficiency ────────────────────────────────────────────
valid_times = df_results[df_results["wall_time_s"].notna()]
t_baseline  = float(valid_times.iloc[0]["wall_time_s"]) if not valid_times.empty else None
n_baseline  = int(valid_times.iloc[0]["n_cores"])       if not valid_times.empty else None

def _speedup(t, t0):
    return t0 / t if (t and t0 and t > 0) else None

def _efficiency(speedup, n, n0):
    return speedup / (n / n0) * 100 if (speedup and n and n0) else None

df_results["speedup"]    = df_results["wall_time_s"].apply(lambda t: _speedup(t, t_baseline))
df_results["efficiency"] = df_results.apply(
    lambda r: _efficiency(r["speedup"], r["n_cores"], n_baseline), axis=1
)

print("Scaling analysis:")
display(df_results.reset_index(drop=True))

if t_baseline is None:
    print("\n⚠  No completed jobs with wall times yet.")
    print("   Wait for jobs to finish and re-run this cell.")


In [None]:
# ── Scaling plots ─────────────────────────────────────────────────────────────
if not _PLT_OK:
    print("matplotlib not available — install with: pip install matplotlib")
elif valid_times.empty:
    print("No completed results to plot yet.")
else:
    plot_df = df_results.dropna(subset=["wall_time_s"])
    cores   = plot_df["n_cores"].astype(int).tolist()
    speedup = plot_df["speedup"].tolist()
    effic   = plot_df["efficiency"].tolist()

    fig, axes = plt.subplots(1, 2, figsize=(12, 5))

    # ── Speedup plot ──────────────────────────────────────────────────────────
    axes[0].plot(cores, speedup, "o-", color="steelblue", linewidth=2, label="Measured")
    axes[0].plot(
        [cores[0], cores[-1]],
        [1, cores[-1] / cores[0]],
        "--", color="gray", linewidth=1, label="Ideal (linear)"
    )
    axes[0].set_xlabel("Number of cores")
    axes[0].set_ylabel("Speedup")
    axes[0].set_title("Strong Scaling — Speedup")
    axes[0].legend()
    axes[0].grid(True, linestyle="--", alpha=0.5)

    # ── Efficiency plot ───────────────────────────────────────────────────────
    axes[1].plot(cores, effic, "s-", color="darkorange", linewidth=2)
    axes[1].axhline(y=100, color="gray", linestyle="--", linewidth=1, label="Ideal (100%)")
    axes[1].set_xlabel("Number of cores")
    axes[1].set_ylabel("Parallel efficiency (%)")
    axes[1].set_title("Strong Scaling — Efficiency")
    axes[1].legend()
    axes[1].grid(True, linestyle="--", alpha=0.5)

    plt.tight_layout()
    plt.show()
