In [None]:
from pathlib import Path
import numpy as np
import pandas as pd

from is2retreat.config import Params
from is2retreat.pipeline import run_workflow
from is2retreat.bluff import process_cluster_with_reference
from is2retreat.metrics import compute_cluster_statistics

P = Params()
TRACK_ID = 129

CLUSTER_SIZES   = [6, 12, 18, 24, 30, 36, 42]
BIAS_TOLERANCES = [0.25, 0.5, 0.75, 1.0]
OUTFILE = Path("DSAS_clusters.csv")

existing = pd.read_csv(OUTFILE, dtype={"track_id": str}) if OUTFILE.exists() else pd.DataFrame()

all_rows = []

for cs in CLUSTER_SIZES:
    for tol in BIAS_TOLERANCES:
        P.CLUSTER_DISTANCE_M = float(cs)
        P.BIAS_TOLERANCE = float(tol)

        already = existing[
            (existing.get("track_id") == f"{int(TRACK_ID):04d}") &
            (existing.get("ClusterSize") == cs) &
            (existing.get("bias_tolerance") == tol)
        ]
        if not already.empty:
            print(f"Skip cs={cs}, tol={tol} (exists)")
            continue

        (
            summary_fam,
            summary_clust,
            dataset_clean,
            clusters_gdf,
            selected_clusters,
            filtered_profiles,
            bias_summary,
            bias_df,
        ) = run_workflow(TRACK_ID, dataset_raw, shoreline_gdf, P, verbose=False)

        if selected_clusters is None or selected_clusters.empty:
            continue

        for fam in selected_clusters["gt_family"].unique():
            fam_clusters = selected_clusters.query("gt_family == @fam").copy()

            for cid in fam_clusters["cluster_id"].unique():
                bluff_df, y_ref = process_cluster_with_reference(
                    filtered_profiles=filtered_profiles,
                    selected_clusters=fam_clusters,
                    params=P,
                    cluster_id=int(cid),
                    gt_family=str(fam),
                    which="first",
                    bias_df=bias_df,
                    debug=False,
                )
                if bluff_df is None or bluff_df.empty:
                    continue

                stats = compute_cluster_statistics(
                    bluff_df,
                    confidence=P.CONFIDENCE,
                    min_span_days=P.MIN_SPAN_DAYS,
                )

                all_rows.append({
                    "track_id": f"{int(TRACK_ID):04d}",
                    "ClusterSize": int(cs),
                    "bias_tolerance": float(tol),
                    "gt_family": fam,
                    "cluster_id": int(cid),
                    **stats,
                    "angle_deg": float(fam_clusters.loc[fam_clusters["cluster_id"] == cid, "angle_deg"].iloc[0]) if "angle_deg" in fam_clusters.columns else np.nan,
                    "center_lat": float(fam_clusters.loc[fam_clusters["cluster_id"] == cid, "center_lat"].iloc[0]) if "center_lat" in fam_clusters.columns else np.nan,
                    "center_lon": float(fam_clusters.loc[fam_clusters["cluster_id"] == cid, "center_lon"].iloc[0]) if "center_lon" in fam_clusters.columns else np.nan,
                })

new_df = pd.DataFrame(all_rows)
combined = pd.concat([existing, new_df], ignore_index=True) if not existing.empty else new_df
combined.to_csv(OUTFILE, index=False)
combined.tail(10)
