In [4]:
import sys
import os
from pathlib import Path
import tensorflow as tf
import numpy as np
import shutil
import time

# Set ROOT path to access other directories in project
ROOT = Path.cwd().parent
if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))

import SnowDepth.data_loader as DL
import SnowDepth.data_splitter as DS
import SnowDepth.architecture as ARCH
import SnowDepth.optimal_features as OF
from SnowDepth.config import HOLDOUT_AOI
from SnowDepth.config import SEED

In [5]:
# Select holdout AOI
holdout_aoi = HOLDOUT_AOI

# H5 directory and expected output files
h5_dir = ROOT  / "data" / "h5_dir"
h5_path_HSIC = h5_dir / "HSIC" / "data_HSIC.h5"
h5_path_PCC  = h5_dir / "PCC"  / "data_PCC.h5"
h5_path_MI   = h5_dir / "MI"   / "data_MI.h5"

# If we have already written H5 files, we can skip a lot of steps here:
if all(p.exists() for p in [h5_path_HSIC, h5_path_PCC, h5_path_MI]):
    print("All H5 files already exist. Skipping feature selection and H5 generation.")
else:
    print("H5 files missing. Running feature selection and writing new H5 files")

    # SET IMPORTANT VARIABLES HERE:
    
    # Path to TIFF files
    data_dir = ROOT / "data" / "tif_files"

    # Number of features to select
    top_k = 10

    # Load dataframe
    df = DL.build_df(str(data_dir), drop_invalid=True, upper_threshold=3)
    dev_df = df[df["aoi_name"] != holdout_aoi].copy()

    # Run feature selection
    ff_algos = OF.optimal_feature_sets(dev_df, top_k=top_k, n_per_aoi=10000)

    # Write one H5 file per feature-set
    for name, feats in ff_algos.items():
        out_dir = h5_dir / name
        out_dir.mkdir(parents=True, exist_ok=True)
        DL.build_h5(
            data_dir=str(data_dir),
            out_dir=str(out_dir),
            write_mask=True,
            upper_threshold=3.0,
            selected_features=feats,
            out_name=f"data_{name}.h5",
        )
        print(f"Wrote new H5: {out_dir / f'data_{name}.h5'}")

All H5 files already exist. Skipping feature selection and H5 generation.


In [6]:
# Train models with each feature set. Do this in loop so the code is not so long

# Config
FEATURESETS = [
    ("HSIC", h5_path_HSIC),
    ("PCC",  h5_path_PCC),
    ("MI",   h5_path_MI),
]
PATCH_SIZE   = 128
STRIDE       = 64
MIN_VALID_FR = 0.80
VAL_FRACTION = 0.10
LR           = 1e-3
EPOCHS       = 50
BATCH_SIZE   = 4
EPS          = 1e-6

# Where to store temporary artifacts
ARTIFACTS_DIR = "Transformer_weights_and_norm"
os.makedirs(ARTIFACTS_DIR, exist_ok=True)

# Final (winner) artifacts
FINAL_NORM    = os.path.join(ARTIFACTS_DIR, "best_norm_stats.npz")
FINAL_WEIGHTS = os.path.join(ARTIFACTS_DIR, "transformer_best.weights.h5")
FINAL_TAG     = os.path.join(ARTIFACTS_DIR, "transformer_best.tag.txt")

results = []  # (name, best_val_loss, train_time_min, tmp_norm_path, tmp_weights_path)

for name, h5_path in FEATURESETS:

    # Split data (per-feature set)
    (X_train, y_train, m_train), (X_val, y_val, m_val), (_, _, _) = DS.DL_split(
        h5_path=str(h5_path),
        holdout_aoi=holdout_aoi,
        val_fraction=VAL_FRACTION,
        patch_size=PATCH_SIZE,
        stride=STRIDE,
        min_valid_frac=MIN_VALID_FR,
    )
    print(f"{name} Shapes: X_train {X_train.shape}, y_train {y_train.shape}, X_val {X_val.shape}, y_val {y_val.shape}")

    # Compute and save normalization stats (train only; NaN-safe)
    tmp_norm_path = os.path.join(ARTIFACTS_DIR, f"{name}_norm_stats.tmp.npz")
    mean = np.nanmean(X_train, axis=(0, 1, 2), keepdims=True)
    std  = np.nanstd (X_train, axis=(0, 1, 2), keepdims=True)
    std  = np.where(std < EPS, 1.0, std).astype("float32")
    np.savez(tmp_norm_path, mean=mean.astype("float32"), std=std.astype("float32"))

    # Normalize (replace non-finite before scaling)
    with np.load(tmp_norm_path) as f:
        mu, sigma = f["mean"], f["std"]
    X_train = np.nan_to_num(X_train, nan=mu, posinf=mu, neginf=mu).astype("float32")
    X_val   = np.nan_to_num(X_val,   nan=mu, posinf=mu, neginf=mu).astype("float32")
    X_train_n = ((X_train - mu) / sigma).astype("float32")
    X_val_n   = ((X_val   - mu) / sigma).astype("float32")

    # Labels and per-pixel weights
    y_train_f, w_train = ARCH.fill_nan_and_mask(y_train)
    y_val_f,   w_val   = ARCH.fill_nan_and_mask(y_val)
    w_train_4d = w_train[..., None].astype("float32")
    w_val_4d   = w_val[..., None].astype("float32")

    # Build and compile Transformer (keep hyperparams same across feature sets)
    model = ARCH.transformer_seg_model(
        input_shape=X_train_n.shape[1:],  # (H,W,C)
        patch_size=16,
        d_model=256,
        depth=4,
        num_heads=4,
        mlp_dim=512,
        dropout=0.0,
    )
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=LR),
        loss=tf.keras.losses.Huber(delta=1.0),
        metrics=[tf.keras.metrics.MeanAbsoluteError(name="MAE")],
    )

    # Train and save only best weights
    tmp_weights = os.path.join(ARTIFACTS_DIR, f"transformer_{name}.tmp.weights.h5")
    callbacks = [
        tf.keras.callbacks.ModelCheckpoint(tmp_weights, monitor="val_loss", save_best_only=True, save_weights_only=True, verbose=1),
        tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True),
        tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=5, min_lr=1e-5),
    ]

    start_time = time.time()
    hist = model.fit(
        X_train_n, y_train_f,
        sample_weight=w_train_4d,
        validation_data=(X_val_n, y_val_f, w_val_4d),
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        callbacks=callbacks,
        verbose=1,
    )
    train_time = (time.time() - start_time) / 60.0
    print(f"{name}: Training took {train_time:.2f} minutes")

    # Track best val loss
    best_val = float(np.min(hist.history["val_loss"]))
    results.append((name, best_val, train_time, tmp_norm_path, tmp_weights))
    print(f"{name}: best val_loss = {best_val:.6f}")



HSIC Shapes: X_train (576, 128, 128, 10), y_train (576, 128, 128, 1), X_val (64, 128, 128, 10), y_val (64, 128, 128, 1)
Epoch 1/50
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 167ms/step - MAE: 0.5451 - loss: 0.2281
Epoch 1: val_loss improved from inf to 0.08889, saving model to Transformer_weights_and_norm\transformer_HSIC.tmp.weights.h5
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 179ms/step - MAE: 0.5444 - loss: 0.2275 - val_MAE: 0.3268 - val_loss: 0.0889 - learning_rate: 0.0010
Epoch 2/50
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 164ms/step - MAE: 0.3053 - loss: 0.0755
Epoch 2: val_loss improved from 0.08889 to 0.07000, saving model to Transformer_weights_and_norm\transformer_HSIC.tmp.weights.h5
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 172ms/step - MAE: 0.3052 - loss: 0.0755 - val_MAE: 0.2828 - val_loss: 0.0700 - learning_rate: 0.0010
Epoch 3/50
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

In [7]:
# Pick the global winner
results_sorted = sorted(results, key=lambda t: t[1])  # by val_loss ascending
winner_name, winner_loss, winner_time, winner_norm, winner_weights = results_sorted[0]

print("\nValidation leaderboard (lower is better)")
for rank, (n, v, tmin, _, _) in enumerate(results_sorted, 1):
    print(f"{rank}. {n:>4s}  val_loss={v:.6f}  time={tmin:.2f}m")
print(f"\nWINNER: {winner_name} (val_loss={winner_loss:.6f})")

# Clean previous finals if exist
for p in [FINAL_NORM, FINAL_WEIGHTS, FINAL_TAG]:
    if os.path.exists(p):
        os.remove(p)

# Move winning artifacts to final names
shutil.move(winner_norm, FINAL_NORM)
shutil.move(winner_weights, FINAL_WEIGHTS)
with open(FINAL_TAG, "w") as f:
    f.write(f"winner={winner_name}\nval_loss={winner_loss:.6f}\n")

# Delete other temp files
for n, _, _, norm_path, weight_path in results_sorted[1:]:
    if os.path.exists(norm_path):  os.remove(norm_path)
    if os.path.exists(weight_path): os.remove(weight_path)

print(f"\nSaved winner norm stats -> {FINAL_NORM}")
print(f"Saved winner weights -> {FINAL_WEIGHTS}")
print(f"Wrote tag file -> {FINAL_TAG}")



Validation leaderboard (lower is better)
1.  PCC  val_loss=0.042974  time=13.23m
2.   MI  val_loss=0.047800  time=5.64m
3. HSIC  val_loss=0.058163  time=5.24m

WINNER: PCC (val_loss=0.042974)

Saved winner norm stats -> Transformer_weights_and_norm\best_norm_stats.npz
Saved winner weights -> Transformer_weights_and_norm\transformer_best.weights.h5
Wrote tag file -> Transformer_weights_and_norm\transformer_best.tag.txt
