### Selected samples IDW validation

**Author:** Jakub Walczak, PhD

This notebook contains validation of the IDW method.

In [None]:
import csv
import shutil
from functools import partial
from pathlib import Path
from typing import Any, Callable

import xarray as xr
from rich.console import Console

import climatrix as cm

%load_ext rich

In [None]:
console = Console()

NAN_POLICY = "resample"
console.print("[bold green]Using NaN policy: [/bold green]", NAN_POLICY)

SEED = 1
console.print("[bold green]Using seed: [/bold green]", SEED)

DSET_PATH = Path(__session__).parent.parent.joinpath("data")
console.print("[bold green]Using dataset path: [/bold green]", DSET_PATH)

EUROPE_BOUNDS = {"north": 71, "south": 36, "west": -24, "east": 35}
EUROPE_DOMAIN = cm.Domain.from_lat_lon(
    lat=slice(EUROPE_BOUNDS["south"], EUROPE_BOUNDS["north"], 0.1),
    lon=slice(EUROPE_BOUNDS["west"], EUROPE_BOUNDS["east"], 0.1),
    kind="dense",
)
cm.seed_all(SEED)

In [None]:
def get_all_dataset_idx() -> list[str]:
    return sorted(
        list({path.stem.split("_")[-1] for path in DSET_PATH.glob("*.nc")})
    )

In [None]:
def run_single_method(
    d: str, i: int, method: str, reconstruct_dense: bool = True, **params
):
    cm.seed_all(SEED)
    train_dset = xr.open_dataset(
        DSET_PATH / f"ecad_obs_europe_train_{d}.nc"
    ).cm
    val_dset = xr.open_dataset(DSET_PATH / f"ecad_obs_europe_val_{d}.nc").cm
    reconstructed_dset = train_dset.reconstruct(
        val_dset.domain,
        method=method,
        **params,
    )
    if reconstruct_dense:
        reconstructed_dense = train_dset.reconstruct(
            EUROPE_DOMAIN, method=method, **params
        )
    return val_dset, reconstructed_dset, reconstructed_dense

In [None]:
dset_idx = get_all_dataset_idx()
console.print(
    f"[bold green]There is [bold yellow]{len(dset_idx)}[/bold yellow] samples available [/bold green]"
)

In [None]:
IDX = 0

In [None]:
sinet_val_dset, sinet_reconstructed_dset, sinet_reconstructed_dense = (
    run_single_method(
        dset_idx[IDX],
        IDX,
        "idw",
        k=20,
        k_min=10,
        power=2
    )
)

In [None]:
cm.Comparison(sinet_val_dset, sinet_reconstructed_dset).compute_report()

### After optimising hyperpararmeters

In [None]:
BOUNDS = {
    "k": (1, 50),
    "power": (1e-7, 5.0),
    "k_min": (1, 40),
}
console.print("[bold green]Hyperparameter bounds: [/bold green]", BOUNDS)

OPTIM_INIT_POINTS: int = 50
console.print(
    "[bold green]Using nbr initial points for optimization: [/bold green]",
    OPTIM_INIT_POINTS,
)

OPTIM_N_ITERS: int = 100
console.print(
    "[bold green]Using iterations for optimization[/bold green]", OPTIM_N_ITERS
)
console.print(
    "[bold green]Dataset: [/bold green]", dset_idx[IDX]
)

In [None]:
def find_hyperparameters(
    train_dset: cm.BaseClimatrixDataset,
    val_dset: cm.BaseClimatrixDataset,
    bounds: dict[str, tuple],
    n_init_points: int = 30,
    n_iter: int = 200,
    seed: int = 0,
    verbose: int = 2,
) -> tuple[float, dict[str, float]]:
    finder = cm.optim.HParamFinder(
        "idw",
        train_dset,
        val_dset,
        metric="mae",
        n_iters=OPTIM_N_ITERS,
        bounds=BOUNDS,
        random_seed=SEED,
    )
    result = finder.optimize()
    return result


def run_single_experiment(d: str):
    train_dset = xr.open_dataset(
        DSET_PATH / f"ecad_obs_europe_train_{d}.nc"
    ).cm
    val_dset = xr.open_dataset(DSET_PATH / f"ecad_obs_europe_val_{d}.nc").cm
    result = find_hyperparameters(
        train_dset,
        val_dset,
        BOUNDS,
        n_init_points=OPTIM_INIT_POINTS,
        n_iter=OPTIM_N_ITERS,
        seed=SEED,
        verbose=2,
    )
    console.print("[bold yellow]Optimized parameters:[/bold yellow]")
    console.print(
        "[yellow]Power:[/yellow]", result["best_params"]["power"]
    )
    console.print("[yellow]k:[/yellow]", result["best_params"]["k"])
    console.print(
        "[yellow]k_min:[/yellow]", result["best_params"]["k_min"]
    )
    console.print("[yellow]Best loss:[/yellow]", result["best_score"])
    reconstructed_dset = train_dset.reconstruct(
        val_dset.domain,
        method="idw",
        k=result["best_params"]["k"],
        power=result["best_params"]["power"],
        k_min=result["best_params"]["k_min"],
    )
    cmp = cm.Comparison(reconstructed_dset, val_dset)
    metrics = cmp.compute_report()
    metrics["dataset_id"] = d
    hyperparams = {
        "dataset_id": d,
        "k": result["best_params"]["k"],
        "k_min": result["best_params"]["k_min"],
        "power": result["best_params"]["power"],
        "opt_loss": result["best_score"],
    }
    return (metrics, hyperparams)

In [None]:
metrics, hyperparams = run_single_experiment(dset_idx[IDX])

In [None]:
idw_val_dset, idw_reconstructed_dset, idw_reconstructed_dense = (
    run_single_method(
        dset_idx[IDX],
        IDX,
        "idw",
        k=hyperparams["k"],
        power=hyperparams["power"],
        k_min=hyperparams["k_min"],
    )
)

In [None]:
idw_reconstructed_dense.plot()

In [None]:
idw_val_dset.plot()

In [None]:
train_dset = xr.open_dataset(
    DSET_PATH / f"ecad_obs_europe_train_{dset_idx[IDX]}.nc"
).cm.plot()

In [None]:
cm.Comparison(sinet_val_dset, sinet_reconstructed_dset).compute_report()

In [None]:
cm.Comparison(sinet_val_dset, sinet_reconstructed_dset).compute_report()

In [None]:
cm.Comparison(sinet_val_dset, sinet_reconstructed_dset).compute_report()