In [1]:
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt

# Paths
project_root = Path("..").resolve()
patch_path = project_root / "data/patches/sst_patches_64x64.npy"
coord_path = project_root / "data/patches/sst_patch_coords.npy"
umap_path  = project_root / "outputs/sst_umap_embeddings.npy"

# Load data
patches = np.load(patch_path)
coords = np.load(coord_path)

# Align embeddings & run clustering again
from sklearn.cluster import DBSCAN
embeddings = np.load(umap_path)[:len(patches)]
labels = DBSCAN(eps=0.5, min_samples=10).fit_predict(embeddings)

📊 Stats

In [2]:
for cluster_id in np.unique(labels):
    mask = labels == cluster_id
    cluster_patches = patches[mask]
    cluster_coords = coords[mask]
    
    mean_sst = np.nanmean(cluster_patches)
    std_sst = np.nanstd(cluster_patches)
    nan_frac = np.isnan(cluster_patches).mean()
    mean_lat = cluster_coords[:, 0].mean()
    lat_range = cluster_coords[:, 0].min(), cluster_coords[:, 0].max()
    
    print(f"\n🌀 Cluster {cluster_id} — {mask.sum()} patches")
    print(f"🌡️  Mean SST: {mean_sst:.2f} °C")
    print(f"📉  Std Dev:  {std_sst:.2f}")
    print(f"❄️  NaN fraction: {nan_frac*100:.1f}%")
    print(f"🧭  Lat range: {lat_range[0]:.1f} to {lat_range[1]:.1f} (mean: {mean_lat:.1f})")



🌀 Cluster -1 — 1 patches
🌡️  Mean SST: 25.64 °C
📉  Std Dev:  0.95
❄️  NaN fraction: 0.0%
🧭  Lat range: -5.4 to -5.4 (mean: -5.4)

🌀 Cluster 0 — 828 patches
🌡️  Mean SST: 7.11 °C
📉  Std Dev:  6.32
❄️  NaN fraction: 0.9%
🧭  Lat range: -71.6 to -26.9 (mean: -50.9)

🌀 Cluster 1 — 129 patches
🌡️  Mean SST: 24.85 °C
📉  Std Dev:  2.08
❄️  NaN fraction: 0.9%
🧭  Lat range: -30.6 to 35.1 (mean: -16.6)

🌀 Cluster 2 — 1191 patches
🌡️  Mean SST: 21.71 °C
📉  Std Dev:  9.86
❄️  NaN fraction: 1.3%
🧭  Lat range: -71.4 to 81.9 (mean: 20.3)

🌀 Cluster 3 — 250 patches
🌡️  Mean SST: 20.67 °C
📉  Std Dev:  3.31
❄️  NaN fraction: 0.6%
🧭  Lat range: -42.1 to -9.1 (mean: -29.6)
