In [None]:
import pandas as pd
import os
import json
import zipfile
from io import StringIO, BytesIO
import numpy as np
from scipy.interpolate import griddata, NearestNDInterpolator
from sklearn.neighbors import KDTree

In [None]:
def interpolate_data(df, grid_params, k=3, power=1.0, average_duplicates=True):
    """Regrid all numeric variables in df using k-NN inverse distance weighting (IDW).

    Parameters
    ----------
    df : DataFrame
        Must contain 'x' and 'y' columns and any number of numeric variables to interpolate.
    grid_params : dict
        {
          "x": {"min": ..., "max": ..., "n": ...},
          "y": {"min": ..., "max": ..., "n": ...}
        }
    k : int
        Number of neighbors for weighting.
    power : float
        IDW power parameter. Set to 0 for uniform averaging of neighbors.
    average_duplicates : bool
        If True, average duplicate (x,y) rows before building the tree.
    """
    print("Applying interpolation with IDW (k={}, power={})".format(k, power))
    x_min, x_max, x_n = grid_params["x"]["min"], grid_params["x"]["max"], grid_params["x"]["n"]
    y_min, y_max, y_n = grid_params["y"]["min"], grid_params["y"]["max"], grid_params["y"]["n"]
    print(grid_params)
    
    
    # Average duplicate (x,y) points
    if average_duplicates:
        # only average numeric columns; non-numerics are dropped
        numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
        if "x" not in numeric_cols: numeric_cols.append("x")
        if "y" not in numeric_cols: numeric_cols.append("y")
        dfu = (df[numeric_cols]
               .groupby(["x", "y"], as_index=False)
               .mean(numeric_only=True))
    else:
        dfu = df.copy()
        
        
    # Build query grid
    xi = np.linspace(x_min, x_max, x_n)
    yi = np.linspace(y_min, y_max, y_n)
    X, Y = np.meshgrid(xi, yi, indexing="xy")
    grid_points = np.column_stack([X.ravel(), Y.ravel()])

    # KDTree on unique points
    pts = dfu[["x", "y"]].to_numpy()
    n_pts = len(pts)
    if n_pts == 0:
        raise ValueError("No input points to interpolate.")
        
    k_eff = min(k, n_pts)  # in case dataset smaller than k
    tree = KDTree(pts)
    dist, ind = tree.query(grid_points, k=k_eff)

    # Prepare variables to interpolate (numeric, excluding x,y)
    all_numeric = dfu.select_dtypes(include=[np.number]).columns.tolist()
    variables = [c for c in all_numeric if c not in ("x", "y")]
    if not variables:
        raise ValueError("No numeric variables (besides x,y) found to interpolate.")    
        
    # Compute weights (IDW or uniform if power==0)
    if power == 0:
        # uniform weights across k neighbors
        weights = np.full_like(dist, 1.0 / dist.shape[1], dtype=float)
    else:
        # IDW weights; handle exact matches by setting that weight to 1
        with np.errstate(divide='ignore'):
            w = 1.0 / (np.power(dist, power) + 1e-12)
        # If any distance is effectively zero for a row, make that neighbor carry full weight
        zero_rows = np.any(dist < 1e-12, axis=1)
        if np.any(zero_rows):
            # For rows with zeros, zero all weights then set zeros to 1 (if multiple zeros, they’ll share equally)
            w[zero_rows] = 0.0
            zero_mask = dist[zero_rows] < 1e-12
            # Normalize per-row among the zero-distance neighbors (could be >1 if duplicates landed exactly on grid)
            w[zero_rows] = zero_mask / zero_mask.sum(axis=1, keepdims=True)
        # Normalize remaining rows
        row_sums = w.sum(axis=1, keepdims=True)
        # Safeguard in case of any weird numerical issue
        row_sums[row_sums == 0] = 1.0
        weights = w / row_sums

    # Interpolate each variable with the weights
    out = {}
    for var in variables:
        print(f"Interpolating {var} (k={k_eff}, power={power})")
        vals = dfu[var].to_numpy()
        # Gather neighbor values for each grid point and apply weights
        neigh_vals = vals[ind]                   # shape (n_grid, k_eff)
        out[var] = np.sum(weights * neigh_vals, axis=1)

    # Return flat DataFrame like your original (x,y alongside all variables)
    out["x"] = grid_points[:, 0]
    out["y"] = grid_points[:, 1]
    interpolated_df = pd.DataFrame(out)

    return interpolated_df

# test interpolation

In [None]:
df = pd.read_csv("../resources/testing_surf/MAFE_tsunami030s.csv", comment='#', sep='\s+')
df

In [None]:
template_path = os.path.join("../resources/benchmark_templates/ttpv1.json")
with open(template_path, 'r') as f:
    template = json.load(f)
    for file_info in template['files']:
        expected_structure = file_info
        if "grid" in expected_structure:
            interpolated_df = interpolate_data(df, expected_structure['grid'])
            break #Just run one for the testing
interpolated_df

# Plots

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [None]:
# === inputs ===
df = interpolated_df              # your regridded dataframe
var = "eta"                       # variable to plot
variable_dict = {"name": var, "unit": "m"}  # tweak as needed

# grid vectors
x_unique = np.sort(df["x"].unique())
y_unique = np.sort(df["y"].unique())
nx, ny = len(x_unique), len(y_unique)

# 2D field
Z = df[var].to_numpy().reshape(ny, nx)

# color limits (optional)
zmin = float(np.nanmin(Z))
zmax = float(np.nanmax(Z))

# index of y closest to 0 (works even if 0 isn't exactly on grid)
iy0 = int(np.argmin(np.abs(y_unique - 0.0)))
y0 = y_unique[iy0]
line_profile = Z[iy0, :]

# figure with two columns: heatmap (left) + cross-section (right)
fig = make_subplots(
    rows=1, cols=2,
    column_widths=[0.78, 0.22],
    horizontal_spacing=0.06,
    specs=[[{"type": "heatmap"}, {"type": "xy"}]],
    subplot_titles=(
        f"Heatmap: {variable_dict['name']} [{variable_dict['unit']}]",
        f"Cross-section at y={y0:.1f}"
    )
)

# heatmap (left)
fig.add_trace(
    go.Heatmap(
        x=x_unique,
        y=y_unique,
        z=Z,
        zmin=zmin, zmax=zmax,
        colorscale="RdBu_r",
        colorbar=dict(title=f"{variable_dict['name']} ({variable_dict['unit']})")
    ),
    row=1, col=1
)

# draw y=0 (or closest) line on top of the heatmap
fig.add_shape(
    type="line",
    x0=x_unique.min(), x1=x_unique.max(),
    y0=y0, y1=y0,
    line=dict(width=2, dash="dash"),
    row=1, col=1
)

# cross-section (right): value vs x at y≈0
fig.add_trace(
    go.Scatter(
        x=x_unique,
        y=line_profile,
        mode="lines",
        name=f"{variable_dict['name']} @ y={y0:.1f}"
    ),
    row=1, col=2
)

# make the heatmap square: lock y to x scale on the left subplot
fig.update_yaxes(scaleanchor="x", scaleratio=1, row=1, col=1)

# labels & layout
fig.update_xaxes(title_text="x (m)", row=1, col=1)
fig.update_yaxes(title_text="y (m)", row=1, col=1)
fig.update_xaxes(title_text="x (m)", row=1, col=2)
fig.update_yaxes(title_text=f"{variable_dict['name']} ({variable_dict['unit']})", row=1, col=2)

fig.update_layout(
    template="plotly_white",
    title=f"MAFE V2 with cross-section at y=0",
    margin=dict(l=60, r=20, t=60, b=50),
    showlegend=False,
    height=800
)

fig.show()

In [None]:
# === inputs ===
df_cur = interpolated_df                      # current regridded dataframe
prev_parquet_path = "../resources/testing_surf/MAFE_old_tsunami030s.parquet"    # <-- set your path
var = "eta"
variable_dict = {"name": var, "unit": "m"}

# --- load previous ---
df_prev = pd.read_parquet(prev_parquet_path)

# --- align grids (handle any row ordering differences) ---
# We'll build the union on (x,y) but prefer intersection to ensure same shape.
cur_xy = df_cur[["x","y"]]
prev_xy = df_prev[["x","y"]]
merged = (df_cur[["x","y",var]]
          .merge(df_prev[["x","y",var]].rename(columns={var: f"{var}_prev"}),
                 on=["x","y"], how="inner"))
if merged.empty:
    raise ValueError("No overlapping (x,y) points between current and previous datasets.")

# Recompute x/y vectors from the merged (guarantees consistent ordering)
x_unique = np.sort(merged["x"].unique())
y_unique = np.sort(merged["y"].unique())
nx, ny = len(x_unique), len(y_unique)

# Expect rectangular grid
if nx * ny != len(merged):
    # If not rectangular (e.g., masked), we can pivot to ensure proper shape or raise error.
    # Pivot (fast & safe):
    cur_grid = merged.pivot(index="y", columns="x", values=var).reindex(index=y_unique, columns=x_unique)
    prev_grid = merged.pivot(index="y", columns="x", values=f"{var}_prev").reindex(index=y_unique, columns=x_unique)
    Z_cur = cur_grid.to_numpy()
    Z_prev = prev_grid.to_numpy()
else:
    # If rectangular, sort by y then x and reshape
    merged_sorted = merged.sort_values(["y","x"])
    Z_cur  = merged_sorted[var].to_numpy().reshape(ny, nx)
    Z_prev = merged_sorted[f"{var}_prev"].to_numpy().reshape(ny, nx)

# color limits shared (so colors are comparable)
zmin = float(np.nanmin([np.nanmin(Z_cur), np.nanmin(Z_prev)]))
zmax = float(np.nanmax([np.nanmax(Z_cur), np.nanmax(Z_prev)]))

# cross-section at y≈0
iy0 = int(np.argmin(np.abs(y_unique - 0.0)))
y0 = y_unique[iy0]
profile_cur  = Z_cur[iy0, :]
profile_prev = Z_prev[iy0, :]

# --- figure: 1 row, 3 cols (current, previous, cross-section) ---
fig = make_subplots(
    rows=1, cols=3,
    column_widths=[0.42, 0.42, 0.16],
    horizontal_spacing=0.06,
    specs=[[{"type": "heatmap"}, {"type": "heatmap"}, {"type": "xy"}]],
    subplot_titles=(
        f"Current: {variable_dict['name']} [{variable_dict['unit']}]",
        f"Previous: {variable_dict['name']} [{variable_dict['unit']}]",
        f"Cross-section at y={y0:.1f} m"
    )
)

# current heatmap
fig.add_trace(
    go.Heatmap(
        x=x_unique, y=y_unique, z=Z_cur,
        zmin=zmin, zmax=zmax, colorscale="RdBu_r",
        colorbar=dict(title=f"{variable_dict['name']} ({variable_dict['unit']})")
    ),
    row=1, col=1
)

# previous heatmap
fig.add_trace(
    go.Heatmap(
        x=x_unique, y=y_unique, z=Z_prev,
        zmin=zmin, zmax=zmax, colorscale="RdBu_r",
        showscale=False  # share colorbar visually with left
    ),
    row=1, col=2
)

# y=0 guide lines on both heatmaps
for c in (1, 2):
    fig.add_shape(
        type="line",
        x0=x_unique.min(), x1=x_unique.max(),
        y0=y0, y1=y0,
        line=dict(width=2, dash="dash"),
        row=1, col=c
    )

# cross-section overlay (current vs previous)
fig.add_trace(
    go.Scatter(
        x=x_unique, y=profile_cur,
        mode="lines", name="Current", line=dict(width=2)
    ),
    row=1, col=3
)
fig.add_trace(
    go.Scatter(
        x=x_unique, y=profile_prev,
        mode="lines", name="Previous", line=dict(width=2)
    ),
    row=1, col=3
)

# square aspect for both heatmaps
fig.update_yaxes(scaleanchor="x", scaleratio=1, row=1, col=1)
fig.update_yaxes(scaleanchor="x2", scaleratio=1, row=1, col=2)  # anchor by subplot ref

# labels & layout
for c in (1, 2):
    fig.update_xaxes(title_text="x (m)", row=1, col=c)
    fig.update_yaxes(title_text="y (m)", row=1, col=c)

fig.update_xaxes(title_text="x (m)", row=1, col=3)
fig.update_yaxes(title_text=f"{variable_dict['name']} ({variable_dict['unit']})", row=1, col=3)

fig.update_layout(
    template="plotly_white",
    title="Fabian V5: Comparison: Current vs Previous (heatmaps) + Cross-section at y=0",
    margin=dict(l=60, r=20, t=60, b=50),
    height=800,
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1.0)
)

fig.show()

In [None]:
fig.write_html("fabian_v5_compare.html")

In [None]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go


var = "eta"
variable_dict = {"name": var, "unit": "m"}

# --- align by (x, y) ---
merged = (df_cur[["x","y",var]]
          .merge(df_prev[["x","y",var]].rename(columns={var: f"{var}_prev"}),
                 on=["x","y"], how="inner"))
if merged.empty:
    raise ValueError("No overlapping (x,y) points between current and previous datasets.")

x_unique = np.sort(merged["x"].unique())
y_unique = np.sort(merged["y"].unique())
nx, ny = len(x_unique), len(y_unique)

# pivot into grids
cur_grid  = merged.pivot(index="y", columns="x", values=var).reindex(index=y_unique, columns=x_unique)
prev_grid = merged.pivot(index="y", columns="x", values=f"{var}_prev").reindex(index=y_unique, columns=x_unique)

Z_diff = cur_grid.to_numpy() - prev_grid.to_numpy()

# symmetric colorbar around zero
max_abs = np.nanmax(np.abs(Z_diff))

fig = go.Figure()
fig.add_trace(
    go.Heatmap(
        x=x_unique,
        y=y_unique,
        z=Z_diff,
        zmin=-max_abs, zmax=max_abs,
        colorscale="RdBu_r",
        colorbar=dict(title=f"Δ {variable_dict['name']} ({variable_dict['unit']})")
    )
)

fig.update_yaxes(scaleanchor="x", scaleratio=1)  # square aspect
fig.update_layout(
    template="plotly_white",
    title=f"Difference Heatmap (Current − Previous) of {variable_dict['name']} [{variable_dict['unit']}]",
    xaxis_title="x (m)",
    yaxis_title="y (m)",
    height=800
)

fig.show()