# CFD Pipeline Dashboard

A lightweight workflow dashboard and controlled batch runner for the CFD pipeline.

- **Dataset preparation** is handled by `generateInputs.py` (this repo).
- **Simulation execution** is handled by `taskManager/taskManager.py` (submodule), which creates cases, meshes them, submits them to SLURM on HPC, and maintains a `case_status.json` per case.

This notebook does **not** reimplement any execution logic — it only orchestrates and monitors by reading `case_status.json` files and calling existing `taskManager` methods when needed.

**Resume-safe:** Close and reopen at any time. All decisions are derived from the `case_status.json` files.

## 1. Configuration

Edit these paths and settings before running the notebook.

In [None]:
import os
import sys

# ── Paths ────────────────────────────────────────────────────────────────────
# Root of the CFD-dataset repository (directory containing this notebook)
REPO_ROOT = os.path.dirname(os.path.abspath("__file__"))

# Directory that contains the generated OpenFOAM case folders
# (each sub-folder has a case_status.json produced by taskManager)
CASES_OUTPUT_DIR = os.path.join(REPO_ROOT, "openFoamCases")

# OpenFOAM case template folder (used by taskManager to create new cases)
TEMPLATE_PATH = os.path.join(REPO_ROOT, "taskManager", "template")

# Input data directory (downloads from generateInputs.py)
INPUT_DATA_DIR = os.path.join(REPO_ROOT, "Data", "downloads")

# Remote HPC path on Deucalion (used by taskManager for rsync/sbatch)
DEUCALION_PATH = "/projects/EEHPC-BEN-2026B02-011/cfd_data"

# ── Batch settings ────────────────────────────────────────────────────────────
# Number of cases to mesh per batch run (Cell 5)
BATCH_SIZE = 5

# Number of parallel workers for meshing
N_PARALLEL_WORKERS = 4

# ── taskManager import ────────────────────────────────────────────────────────
# Add the taskManager submodule directory to sys.path so we can import it
TASK_MANAGER_DIR = os.path.join(REPO_ROOT, "taskManager")
if TASK_MANAGER_DIR not in sys.path:
    sys.path.insert(0, TASK_MANAGER_DIR)

print(f"REPO_ROOT          : {REPO_ROOT}")
print(f"CASES_OUTPUT_DIR   : {CASES_OUTPUT_DIR}")
print(f"TEMPLATE_PATH      : {TEMPLATE_PATH}")
print(f"INPUT_DATA_DIR     : {INPUT_DATA_DIR}")
print(f"BATCH_SIZE         : {BATCH_SIZE}")
print(f"N_PARALLEL_WORKERS : {N_PARALLEL_WORKERS}")

## 2. Imports and Initialisation

In [None]:
import json
from pathlib import Path
from datetime import datetime

import pandas as pd

try:
    from taskManager import OpenFOAMCaseGenerator
    print("✓ taskManager imported successfully")
except ImportError as e:
    print(f"✗ Could not import taskManager: {e}")
    print("  Make sure the taskManager submodule is initialised:")
    print("    git submodule update --init --recursive")
    OpenFOAMCaseGenerator = None

# Initialise the case generator (used later for meshing / submission)
if OpenFOAMCaseGenerator is not None:
    generator = OpenFOAMCaseGenerator(
        template_path=TEMPLATE_PATH,
        input_dir=INPUT_DATA_DIR,
        output_dir=CASES_OUTPUT_DIR,
        deucalion_path=DEUCALION_PATH,
    )
    print("✓ OpenFOAMCaseGenerator initialised")
else:
    generator = None

## 3. Status Scanner

Reads every `case_status.json` found under `CASES_OUTPUT_DIR` and assembles a pandas DataFrame.  
Each row represents one case.  **Re-run this cell at any time to refresh the view.**

In [None]:
def derive_pipeline_status(status: dict) -> str:
    """
    Map the raw fields in case_status.json to a single human-readable
    pipeline status string:

        ready_to_mesh  – mesh has not been run yet (mesh_status == NOT_RUN)
        meshing        – meshing is currently in progress (mesh_status == IN_PROGRESS)
        meshed         – mesh done, not yet submitted to HPC
        running        – submitted to HPC, job is PENDING or RUNNING
        complete       – HPC job completed successfully
        failed         – meshing failed, or HPC job failed/cancelled/timed-out
        unknown        – status file present but unrecognised combination
    """
    mesh_status = (status.get("mesh_status") or "NOT_RUN").upper()
    job_status  = (status.get("job_status")  or "").upper()
    submitted   = status.get("submitted", False)

    if mesh_status in ("FAILED", "ERROR"):
        return "failed"
    if job_status in ("FAILED", "CANCELLED", "TIMEOUT"):
        return "failed"
    if job_status == "COMPLETED":
        return "complete"
    if submitted and job_status in ("PENDING", "RUNNING", ""):
        return "running"
    if mesh_status == "DONE":
        return "meshed"
    if mesh_status == "IN_PROGRESS":
        return "meshing"
    if mesh_status == "NOT_RUN":
        return "ready_to_mesh"
    return "unknown"


def scan_cases(output_dir: str) -> pd.DataFrame:
    """
    Walk *output_dir* and collect every case_status.json into a DataFrame.
    Returns an empty DataFrame (with the expected columns) if no cases exist yet.
    """
    output_path = Path(output_dir)
    records = []

    if not output_path.exists():
        print(f"⚠  Cases output directory does not exist yet: {output_dir}")
        print("   Run generateInputs.py and the taskManager case generator first.")
    else:
        for status_file in sorted(output_path.rglob("case_status.json")):
            case_dir = status_file.parent
            try:
                with open(status_file) as fh:
                    status = json.load(fh)
            except (json.JSONDecodeError, OSError) as exc:
                print(f"⚠  Could not read {status_file}: {exc}")
                continue

            records.append({
                "case_name"       : case_dir.name,
                "case_path"       : str(case_dir),
                "pipeline_status" : derive_pipeline_status(status),
                "mesh_status"     : status.get("mesh_status"),
                "mesh_ok"         : status.get("mesh_ok"),
                "copied_to_hpc"   : status.get("copied_to_hpc"),
                "submitted"       : status.get("submitted"),
                "job_id"          : status.get("job_id"),
                "job_status"      : status.get("job_status"),
                "last_checked"    : status.get("last_checked"),
            })

    columns = [
        "case_name", "case_path", "pipeline_status",
        "mesh_status", "mesh_ok", "copied_to_hpc",
        "submitted", "job_id", "job_status", "last_checked",
    ]
    df = pd.DataFrame(records, columns=columns) if records else pd.DataFrame(columns=columns)
    return df


# ── Scan ──────────────────────────────────────────────────────────────────────
df_cases = scan_cases(CASES_OUTPUT_DIR)
print(f"Scanned {len(df_cases)} case(s) at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

## 4. Summary Dashboard

Total case count, per-status breakdown, and focused tables for the statuses that need attention.

In [None]:
# ── High-level counts ────────────────────────────────────────────────────────
STATUS_ORDER = ["ready_to_mesh", "meshing", "meshed", "running", "complete", "failed", "unknown"]

total = len(df_cases)
print(f"{'='*50}")
print(f"  CFD PIPELINE STATUS SUMMARY")
print(f"  {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"{'='*50}")
print(f"  Total cases : {total}")
print()

if total > 0:
    counts = df_cases["pipeline_status"].value_counts()
    for status in STATUS_ORDER:
        n = counts.get(status, 0)
        if n > 0:
            icon = {"ready_to_mesh": "○", "meshing": "⟳", "meshed": "✓",
                    "running": "▶", "complete": "★", "failed": "✗"}.get(status, "?")
            print(f"  {icon}  {status:<16} : {n}")
print(f"{'='*50}")

In [None]:
# ── Per-status counts as a styled DataFrame ───────────────────────────────────
if total > 0:
    count_df = (
        df_cases["pipeline_status"]
        .value_counts()
        .reindex(STATUS_ORDER)
        .fillna(0)
        .astype(int)
        .reset_index()
    )
    count_df.columns = ["pipeline_status", "count"]
    count_df = count_df[count_df["count"] > 0]
    display(count_df.style.hide(axis="index").set_caption("Cases per pipeline status"))
else:
    print("No cases found.")

In [None]:
# ── Ready-to-mesh cases ──────────────────────────────────────────────────────
df_ready = df_cases[df_cases["pipeline_status"] == "ready_to_mesh"][["case_name", "mesh_status"]]
print(f"Ready to mesh: {len(df_ready)} case(s)")
if not df_ready.empty:
    display(df_ready.reset_index(drop=True))

In [None]:
# ── Running cases ────────────────────────────────────────────────────────────
df_running = df_cases[df_cases["pipeline_status"] == "running"][
    ["case_name", "job_id", "job_status", "last_checked"]
]
print(f"Running on HPC: {len(df_running)} case(s)")
if not df_running.empty:
    display(df_running.reset_index(drop=True))

In [None]:
# ── Failed cases ─────────────────────────────────────────────────────────────
df_failed = df_cases[df_cases["pipeline_status"] == "failed"][
    ["case_name", "mesh_status", "job_id", "job_status"]
]
print(f"Failed cases: {len(df_failed)}")
if not df_failed.empty:
    display(df_failed.reset_index(drop=True))

In [None]:
# ── Full case table ───────────────────────────────────────────────────────────
print("Full case table:")
if total > 0:
    display(
        df_cases[
            ["case_name", "pipeline_status", "mesh_status",
             "submitted", "job_id", "job_status", "last_checked"]
        ].reset_index(drop=True)
    )
else:
    print("No cases found. Run generateInputs.py and the taskManager case generator first.")

## 5. Controlled Batch Runner

Selects up to `BATCH_SIZE` cases whose `pipeline_status` is `ready_to_mesh` and triggers  
`generator.mesh_cases_parallel()`.  

**Resume-safe:** cases already meshed (or failed) are never re-selected — all filtering is based on `case_status.json`.  
Re-run *Cell 3* after the batch completes to refresh the dashboard.

In [None]:
if generator is None:
    print("✗ taskManager not available — cannot run batch.")
else:
    # Derive ready-to-mesh cases directly from the JSON status files
    # (not from the in-memory DataFrame, so this cell is safe to run
    #  even if the DataFrame is stale)
    all_ready = generator.list_cases_by_status(mesh_status="NOT_RUN")
    batch = all_ready[:BATCH_SIZE]

    print(f"Ready-to-mesh cases found : {len(all_ready)}")
    print(f"Batch size configured     : {BATCH_SIZE}")
    print(f"Cases selected for batch  : {len(batch)}")

    if not batch:
        print("\nNothing to do — no cases with mesh_status=NOT_RUN.")
        print("Possible reasons:")
        print("  • All cases have already been meshed (or are running/complete).")
        print("  • generateInputs.py has not been run yet.")
        print("  • CASES_OUTPUT_DIR is set incorrectly.")
    else:
        print("\nCases to be meshed in this batch:")
        for case_path in batch:
            print(f"  • {Path(case_path).name}")

        print(f"\nStarting parallel meshing with {N_PARALLEL_WORKERS} worker(s)…")
        results = generator.mesh_cases_parallel(batch, n_workers=N_PARALLEL_WORKERS)

        succeeded = sum(results)
        failed    = len(results) - succeeded
        print(f"\nBatch complete: {succeeded} succeeded, {failed} failed.")
        print("Re-run Cell 3 to refresh the dashboard.")

## 6. (Optional) Refresh HPC Job Statuses

Polls the SLURM scheduler on Deucalion for every submitted case and updates the local `case_status.json` files.  
Requires SSH access to the `deucalion` host.

In [None]:
if generator is None:
    print("✗ taskManager not available.")
else:
    submitted_cases = generator.list_cases_by_status(submitted=True)
    print(f"Submitted cases to check: {len(submitted_cases)}")

    if not submitted_cases:
        print("No submitted jobs to refresh.")
    else:
        for case_path in submitted_cases:
            new_status = generator.update_job_status(case_path)
            print(f"  {Path(case_path).name}: {new_status}")

        print("\nJob statuses updated. Re-run Cell 3 to refresh the dashboard.")