In [None]:
import os, pickle, warnings
from tqdm import tqdm
from pprint import pprint

import pandas as pd
import numpy as np

# modeling/experimentation
import mlflow
from scipy import interpolate
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

# to load parameters of the experiments
from kedro.config import ConfigLoader

# utils
from joblib import Parallel, delayed
from pandas import IndexSlice as idx
from IPython.display import display

# viz
import seaborn as sns
import matplotlib.pyplot as plt

# experiments
from experiments.configs.evaluation import eval_sets
from experiments.noaa.deterministic import NOAADeterministicExperiment
from spatial_interpolation.visualization import map_viz
# print("Available experiment configs:",NOAADeterministicExperiment.config.config.keys())

# local utils and other imports
from spatial_interpolation.interpolators import ScipyInterpolator
from spatial_interpolation import data, utils
from spatial_interpolation.utils.experiments import conf 
from spatial_interpolation.data.load_data import get_ml_workspace
from spatial_interpolation.utils import tqdm_joblib
from spatial_interpolation.visualization import plot_interpolation, map_viz


# notebook configuration
import dotenv
warnings.simplefilter(action='ignore', category=FutureWarning)
dotenv.load_dotenv()

In [None]:
# load data
experiment = NOAADeterministicExperiment("linear_set3", n_jobs=1)
config = experiment.get_config()
dataset = data.NDBCDataLoader(**config.data_loading).load()
df, gdf = dataset.buoys_data, dataset.buoys_geo
available_time = df.index.get_level_values("time").unique()
time_range = pd.date_range(available_time.min(), available_time.max(), freq="H")

locations_within_area = gdf.loc[gdf.within(eval_sets.ndbc[config.eval_set].area)].index.get_level_values("buoy_id").unique()
df = df.loc[df.index.get_level_values("buoy_id").isin(locations_within_area)]
gdf = gdf.loc[idx[:, locations_within_area],:]
dataset = data.NDBCData(df, gdf)

train, test = dataset.split_slice(test=eval_sets.ndbc[config.eval_set].eval)
train_df = train.join()
test_df = test.join()

time_steps = (
    time_range
    .to_series(name="time_step")
    .rename_axis("time")
    .rank(ascending=True)
)

target = config.target
train_by_times = (
    train_df
    .reset_index()
    .rename(columns={"buoy_id": "location_id"})
    .set_index(["time", "location_id"])
    .sort_index()
    .dropna(subset=[target])
    .join(time_steps)
)
test_by_times = (
    test_df
    .reset_index()
    .rename(columns={"buoy_id": "location_id"})
    .set_index(["time", "location_id"])
    .sort_index()
    .dropna(subset=[target])
    .join(time_steps)
)


In [None]:
time = pd.Timestamp(
    "2020-05-10 05:00"
 )

In [None]:
from scipy.spatial import Delaunay

dims = ["longitude","latitude"]
points = train_by_times.loc[time,dims].values
tri = Delaunay(points, incremental=True)
# plot the triangulation
fig,ax = plt.subplots(1,1,figsize=(10,10))
df_countries = data.load_world_borders()
df_countries.plot(ax=ax,alpha=0.5)
xmin, ymin, xmax, ymax = eval_sets.ndbc[config.eval_set].area.bounds
ax.set_xlim(xmin, xmax); ax.set_ylim(ymin, ymax)
ax.plot(points[:,0], points[:,1], 'o', color="green",label="Training points")
ax.set(xlabel="Longitude",ylabel="Latitude",title=f"Delaunay triangulation of the NDBC data (Area {config.eval_set[-1]}, {time: %b %d, %Y})")
ax.triplot(points[:,0], points[:,1], tri.simplices.copy(), color="grey")
test_points = test_by_times.loc[time,dims].values
ax.plot(test_points[:,0], test_points[:,1], 'o', color="red", label="Test points")
ind_tri = tri.find_simplex(test_points)
tri_mask = np.zeros(len(tri.simplices))
tri_mask[ind_tri] = 1
ax.tripcolor(
    points[:,0], points[:,1], tri.simplices.copy(), tri_mask, cmap="Reds", alpha=0.25, 
    label="Simplices of the triangulation of the test points")
closest_points = points[tri.simplices[ind_tri]][0]
ax.legend()

In [None]:
linear_interpolator = ScipyInterpolator(
    interpolate.LinearNDInterpolator,
    dimensions=[["longitude","latitude"]],
)
time = pd.Timestamp("2020-05-10 05:00")
linear_interpolator.fit(train_by_times.loc[time],y=target)
cm = plot_interpolation(
    train_by_times.loc[time,"longitude"],train_by_times.loc[time,"latitude"],
    test_by_times.loc[time,"longitude"],test_by_times.loc[time, "latitude"],
    test_by_times.loc[time][target],
    interpolator=linear_interpolator,
    bbox=eval_sets.ndbc[config.eval_set].area.bounds,
    radius=.4,
    title=f"Linear Barycentric interpolation on {time:%b %-d, %Y}",
    cmap="rainbow",
    zmin=0,zmax=3
)

In [None]:
kernel = "gaussian"
rbf_interpolator = ScipyInterpolator(
    interpolate.RBFInterpolator,
    dimensions=[["longitude","latitude"]],
    kernel=kernel,
    epsilon=.6
)
rbf_interpolator.fit(train_by_times.loc[time],y=target)
plot_interpolation(
    train_by_times.loc[time,"longitude"],train_by_times.loc[time,"latitude"],
    test_by_times.loc[time,"longitude"],test_by_times.loc[time, "latitude"],
    test_by_times.loc[time][target],
    rbf_interpolator,
    bbox=eval_sets.ndbc[config.eval_set].area.bounds,
    radius=.4,
    cmap=cm.cmap,
    zmin=0,zmax=3,
    title=f"Radial Basis Function (RBF) Interpolation with {kernel=}\n{time:%b %-d, %Y}",
)

In [None]:
rbf_interpolator.predict(test_by_times.loc[time]), test_by_times.loc[time, target].values

### Temporal-Spatial Interpolation:

In [None]:
delta = pd.Timedelta("1D")
train_by_times.loc[(time-delta):time,["longitude","latitude", "time_step"]]

In [None]:
kernel = "gaussian"
time = pd.Timestamp(time)
epsilon=0.5
rbf_interpolator = ScipyInterpolator(
    interpolate.RBFInterpolator,
    dimensions=[["longitude","latitude"]],
    kernel=kernel,
    epsilon=epsilon
)
rbf_interpolator.fit(train_by_times.loc[time],y=target)
plot_interpolation(
    train_by_times.loc[time,rbf_interpolator.dim_cols[0]], train_by_times.loc[time,rbf_interpolator.dim_cols[1]],
    test_by_times.loc[time, rbf_interpolator.dim_cols[0]], test_by_times.loc[time, rbf_interpolator.dim_cols[1]],
    test_by_times.loc[time][target],
    rbf_interpolator,
    bbox=eval_sets.ndbc[config.eval_set].area.bounds,
    radius=.4,
    cmap="rainbow",
    zmin=0,zmax=3,
    title=f"{kernel.capitalize()} RBF Interpolation ($\{epsilon=}$) on {time:%b %-d, %Y}",
)

In [None]:
from spatial_interpolation.interpolators import IDWInterpolator
time = pd.Timestamp(time)
rbf_interpolator = ScipyInterpolator(
    IDWInterpolator,
    dimensions=[["longitude","latitude"]],
)
rbf_interpolator.fit(train_by_times.loc[time],y=target)
plot_interpolation(
    train_by_times.loc[time,rbf_interpolator.dim_cols[0]], train_by_times.loc[time,rbf_interpolator.dim_cols[1]],
    test_by_times.loc[time, rbf_interpolator.dim_cols[0]], test_by_times.loc[time, rbf_interpolator.dim_cols[1]],
    test_by_times.loc[time][target],
    rbf_interpolator,
    bbox=eval_sets.ndbc[config.eval_set].area.bounds,
    radius=.4,
    cmap="rainbow",
    zmin=0,zmax=3,
    title=f"IDW Interpolation on {time:%b %-d, %Y}",
)