# CFD Input Generation Dashboard

A lightweight workflow dashboard and controlled batch runner for `generateInputs.py`.

This notebook orchestrates the dataset-preparation pipeline:
1. Download DEM + roughness tiles for each coordinate in `coords.csv`
2. Generate terrain meshes and ABL boundary conditions for each wind direction
3. Produce `pipeline_metadata.json` per rotation (one per OpenFOAM case input)

**Resume-safe:** Close and reopen at any time.  
All decisions are derived from the filesystem: terrain folder existence and
`pipeline_metadata.json` files inside each `rotatedTerrain_*_deg/` sub-directory.

This notebook does **not** reimplement any pipeline logic — it only orchestrates
by calling existing functions from `generateInputs.py` modules.

## 1. Configuration

Edit these settings before running the notebook.

In [None]:
import os
import sys

# ── Paths ────────────────────────────────────────────────────────────────────
# Root of the CFD-dataset repository (directory containing this notebook)
REPO_ROOT = os.path.dirname(os.path.abspath("__file__"))

# Path to the CSV file listing all (lat, lon) coordinates to process
CSV_PATH = os.path.join(REPO_ROOT, "coords.csv")

# Root folder where terrain sub-directories will be created
# Structure: DATA_DIR / terrain_{index}_{lat}_{lon} / rotatedTerrain_{dir}_deg /
DATA_DIR = os.path.join(REPO_ROOT, "Data", "downloads")

# terrain_config.yaml used by the mesh pipeline
TERRAIN_CONFIG_PATH = os.path.join(REPO_ROOT, "terrain_config.yaml")

# ── Batch settings ────────────────────────────────────────────────────────────
# Number of terrain coordinates to process in one run
BATCH_SIZE = 10

# Number of wind directions to generate per terrain
# Total OpenFOAM case inputs = BATCH_SIZE * N_DIRECTIONS
N_DIRECTIONS = 8

# ── Download settings ────────────────────────────────────────────────────────
# Tile side length in km for DEM / roughness download
SIDE_LENGTH_KM = 50

# Set to False to skip roughness map download (faster, but no z0 field)
INCLUDE_ROUGHNESS = True

# ── Submodule path setup ──────────────────────────────────────────────────────
# Add submodule dirs to sys.path so their packages can be imported
for _submod in ["terrain_following_mesh_generator", "ABL_BC_generator"]:
    _p = os.path.join(REPO_ROOT, _submod)
    if _p not in sys.path:
        sys.path.insert(0, _p)

print(f"REPO_ROOT          : {REPO_ROOT}")
print(f"CSV_PATH           : {CSV_PATH}")
print(f"DATA_DIR           : {DATA_DIR}")
print(f"TERRAIN_CONFIG     : {TERRAIN_CONFIG_PATH}")
print(f"BATCH_SIZE         : {BATCH_SIZE}")
print(f"N_DIRECTIONS       : {N_DIRECTIONS}")
print(f"Total inputs/batch : {BATCH_SIZE * N_DIRECTIONS}")

## 2. Imports

In [None]:
import json
import glob
from pathlib import Path
from datetime import datetime

import pandas as pd

# ── fetchData (always available in this repo) ─────────────────────────────────
from fetchData.csv_utils import load_coordinates_from_csv
from fetchData.parameter_generation import generate_directions
from fetchData import download_raster_data, create_output_dir, DownloadConfig
from fetchData.download_raster import format_coord

print("✓ fetchData imported")

# ── terrain_following_mesh_generator (submodule) ──────────────────────────────
try:
    from terrain_following_mesh_generator import terrain_mesh as tm
    _MESH_OK = True
    print("✓ terrain_following_mesh_generator imported")
except ImportError as _e:
    _MESH_OK = False
    print(f"✗ terrain_following_mesh_generator not available: {_e}")
    print("  Run: git submodule update --init --recursive")

# ── ABL_BC_generator (submodule) ──────────────────────────────────────────────
try:
    from ABL_BC_generator.generateBCs import generate_inlet_data_workflow, ABLConfig
    _ABL_OK = True
    print("✓ ABL_BC_generator imported")
except ImportError as _e:
    _ABL_OK = False
    print(f"✗ ABL_BC_generator not available: {_e}")
    print("  Run: git submodule update --init --recursive")

_PIPELINE_AVAILABLE = _MESH_OK and _ABL_OK

## 3. Status Scanner

Reads `coords.csv` and checks the filesystem to determine the generation state
of every coordinate.  
**Re-run this cell at any time to refresh the view.**

State definitions:
| status | meaning |
|---|---|
| `not_started` | terrain folder does not exist |
| `partial` | terrain folder exists, but fewer than `N_DIRECTIONS` rotation metadata files found |
| `complete` | `≥ N_DIRECTIONS` rotation metadata files found |
| `failed` | terrain folder exists but contains **zero** rotation metadata files |

In [None]:
def _terrain_folder_path(lat: float, lon: float, index: int, data_dir: str) -> Path:
    """Return the expected terrain folder path for a coordinate."""
    lat_str = format_coord(lat, is_lat=True, precision=3)
    lon_str = format_coord(lon, is_lat=False, precision=3)
    folder_name = f"terrain_{(index + 1):04d}_{lat_str}_{lon_str}"
    return Path(data_dir) / folder_name


def _count_completed_rotations(terrain_path: Path) -> tuple[int, list[int]]:
    """
    Count rotation sub-directories that have a `pipeline_metadata.json`.

    Returns
    -------
    (count, directions)
        count      : number of completed rotations
        directions : sorted list of completed direction angles
    """
    done_dirs = sorted(terrain_path.glob("rotatedTerrain_*_deg"))
    completed = []
    for d in done_dirs:
        if (d / "pipeline_metadata.json").exists():
            # extract the degree value from the folder name, e.g. rotatedTerrain_045_deg -> 45
            try:
                deg = int(d.name.split("_")[1])
                completed.append(deg)
            except (IndexError, ValueError):
                pass
    return len(completed), completed


def scan_generation_status(csv_path: str, data_dir: str, n_directions: int) -> pd.DataFrame:
    """
    Scan the filesystem against `coords.csv` and return a status DataFrame.

    Parameters
    ----------
    csv_path    : path to coords.csv
    data_dir    : root directory for terrain sub-folders
    n_directions: target number of wind directions per terrain
    """
    coordinates = load_coordinates_from_csv(csv_path, verbose=False)
    records = []

    for idx, (lat, lon) in enumerate(coordinates):
        terrain_path = _terrain_folder_path(lat, lon, idx, data_dir)
        exists = terrain_path.exists()

        n_done, done_dirs = (0, []) if not exists else _count_completed_rotations(terrain_path)

        if not exists:
            status = "not_started"
        elif n_done == 0:
            status = "failed"
        elif n_done < n_directions:
            status = "partial"
        else:
            status = "complete"

        records.append({
            "index"          : idx,
            "lat"            : lat,
            "lon"            : lon,
            "terrain_folder" : terrain_path.name,
            "folder_exists"  : exists,
            "n_rotations_done": n_done,
            "target_rotations": n_directions,
            "status"         : status,
            "done_directions" : done_dirs,
        })

    return pd.DataFrame(records)


# ── Run the scan ──────────────────────────────────────────────────────────────
df_status = scan_generation_status(CSV_PATH, DATA_DIR, N_DIRECTIONS)
print(f"Scanned {len(df_status)} coordinate(s) at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

## 4. Summary Dashboard

Overall progress, per-status counts, and a per-terrain breakdown.

In [None]:
STATUS_ORDER = ["not_started", "partial", "failed", "complete"]
STATUS_ICONS = {"not_started": "○", "partial": "◑", "failed": "✗", "complete": "★"}

total_coords   = len(df_status)
total_expected = total_coords * N_DIRECTIONS
total_done     = int(df_status["n_rotations_done"].sum())

print(f"{'='*55}")
print(f"  CFD INPUT GENERATION STATUS")
print(f"  {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"{'='*55}")
print(f"  Total coordinates        : {total_coords}")
print(f"  Target directions/terrain: {N_DIRECTIONS}")
print(f"  Total inputs expected    : {total_expected}")
print(f"  Total inputs generated   : {total_done}")
print(f"  Progress                 : {total_done}/{total_expected}",
      f"({100*total_done/total_expected:.1f}%)" if total_expected > 0 else "")
print()

counts = df_status["status"].value_counts()
for s in STATUS_ORDER:
    n = counts.get(s, 0)
    if n > 0:
        print(f"  {STATUS_ICONS[s]}  {s:<14} : {n}")
print(f"{'='*55}")

In [None]:
# ── Per-status count table ────────────────────────────────────────────────────
count_df = (
    df_status["status"]
    .value_counts()
    .reindex(STATUS_ORDER)
    .fillna(0)
    .astype(int)
    .reset_index()
)
count_df.columns = ["status", "count"]
count_df = count_df[count_df["count"] > 0]
display(count_df.style.hide(axis="index").set_caption("Terrains per generation status"))

In [None]:
# ── Full per-terrain table ────────────────────────────────────────────────────
display_cols = ["index", "lat", "lon", "status", "n_rotations_done", "target_rotations", "terrain_folder"]
print("Per-terrain status table:")
display(df_status[display_cols].reset_index(drop=True))

In [None]:
# ── Partial / failed cases ────────────────────────────────────────────────────
df_needs_work = df_status[df_status["status"].isin(["not_started", "partial", "failed"])]
print(f"Coordinates needing work: {len(df_needs_work)}")
if not df_needs_work.empty:
    display(df_needs_work[["index", "lat", "lon", "status", "n_rotations_done"]].reset_index(drop=True))

## 5. Controlled Batch Runner

Finds the resume point (first incomplete coordinate) and processes the next
`BATCH_SIZE` terrains, each with `N_DIRECTIONS` wind directions.

- **Already-complete terrains** in the batch are silently skipped.
- **Partially-complete terrains** skip any rotation that already has a
  `pipeline_metadata.json`, generating only the missing ones.
- **Not-started terrains** download DEM + roughness, then generate all directions.

**Total new inputs this run ≤ `BATCH_SIZE` × `N_DIRECTIONS`**

Re-run Cell 3 after the batch completes to refresh the dashboard.

In [None]:
if not _PIPELINE_AVAILABLE:
    print("✗ One or more submodules are not available.")
    print("  Run: git submodule update --init --recursive")
else:
    # ── Re-scan from disk to ensure we have fresh state ───────────────────────
    df_fresh = scan_generation_status(CSV_PATH, DATA_DIR, N_DIRECTIONS)

    # ── Find batch: first BATCH_SIZE incomplete terrains ─────────────────────
    incomplete = df_fresh[df_fresh["status"] != "complete"]
    batch_df   = incomplete.head(BATCH_SIZE)

    if batch_df.empty:
        print("✓ All coordinates are complete — nothing to do.")
        print("  Add more coordinates to coords.csv and re-run.")
    else:
        resume_idx = int(batch_df.iloc[0]["index"])
        print(f"Resuming from coordinate index {resume_idx}")
        print(f"Batch: {len(batch_df)} terrain(s) × {N_DIRECTIONS} direction(s) = "
              f"up to {len(batch_df) * N_DIRECTIONS} new inputs\n")

        for _, row in batch_df.iterrows():
            coord_idx = int(row["index"])
            lat, lon  = row["lat"], row["lon"]
            status    = row["status"]
            done_dirs = row["done_directions"]

            print(f"  [{coord_idx:04d}] lat={lat:.5f}, lon={lon:.5f}  status={status}")

            terrain_path = _terrain_folder_path(lat, lon, coord_idx, DATA_DIR)

            # ── Step 1: Download DEM + roughness if terrain folder is missing ──
            if status == "not_started":
                download_config = DownloadConfig(
                    side_length_km=SIDE_LENGTH_KM,
                    include_roughness_map=INCLUDE_ROUGHNESS,
                    save_raw_files=True,
                    verbose=True,
                    show_plots=False,
                )
                download_path = create_output_dir(lat, lon, coord_idx, DATA_DIR)
                try:
                    dem_file, roughness_file = download_raster_data(
                        lat=lat, lon=lon, index=coord_idx,
                        out_dir=download_path, config=download_config,
                    )
                    print(f"    ✓ DEM downloaded: {dem_file}")
                    if roughness_file:
                        print(f"    ✓ Roughness downloaded: {roughness_file}")
                except Exception as exc:
                    print(f"    ✗ Download failed: {exc}")
                    continue
            else:
                # Terrain folder exists — find the DEM and roughness files
                dem_files  = sorted(terrain_path.glob("terrain_*.tif"))
                rmap_files = sorted(terrain_path.glob("roughness_*.tif"))
                # Prefer the UTM (non-raw) versions
                dem_files  = [f for f in dem_files  if "_raw" not in f.name]
                rmap_files = [f for f in rmap_files if "_raw" not in f.name]
                if not dem_files:
                    print(f"    ✗ No DEM file found in {terrain_path} — skipping")
                    continue
                dem_file      = str(dem_files[0])
                roughness_file = str(rmap_files[0]) if rmap_files else None
                print(f"    ✓ Using existing DEM: {Path(dem_file).name}")

            # ── Step 2: Generate missing wind direction rotations ─────────────
            mesh_config         = tm.load_config(TERRAIN_CONFIG_PATH)
            inletBC_config      = ABLConfig()
            terrain_mesh_pipeline = tm.TerrainMeshPipeline()

            # Generate N_DIRECTIONS candidate directions, then skip any already done
            candidate_directions = generate_directions(N_DIRECTIONS)
            # Avoid re-running directions whose rotation folder already has metadata
            new_directions = [d for d in candidate_directions if d not in done_dirs]
            # If all candidates are already done, we still need to fill up to N_DIRECTIONS
            still_needed = N_DIRECTIONS - len(done_dirs)
            if still_needed <= 0:
                print(f"    ✓ Already has {len(done_dirs)} rotations — skipping")
                continue

            directions_to_run = new_directions[:still_needed]
            print(f"    Generating {len(directions_to_run)} rotation(s): {directions_to_run}")

            for direction in directions_to_run:
                subdir = f"rotatedTerrain_{direction:03d}_deg"
                rotation_path = terrain_path / subdir

                # Skip if metadata already exists (fine-grained resume)
                if (rotation_path / "pipeline_metadata.json").exists():
                    print(f"      ↷ {subdir}: already done, skipping")
                    continue

                rotation_path.mkdir(parents=True, exist_ok=True)

                try:
                    mesh_config["terrain_config"].rotation_deg = direction
                    inletBC_config.flow_dir_deg                = direction

                    terrain_mesh_pipeline.run(
                        dem_path=dem_file,
                        rmap_path=roughness_file,
                        output_dir=str(rotation_path),
                        **mesh_config,
                    )
                    generate_inlet_data_workflow(str(rotation_path), inletBC_config)
                    print(f"      ✓ {subdir}")

                except Exception as exc:
                    print(f"      ✗ {subdir}: {exc}")

        print(f"\nBatch complete. Re-run Cell 3 to refresh the dashboard.")