# Coastal altimetry tide model rankings

This code compares multiple global ocean tide models against satellite altimetry data, and returns tide model performance and rankings in a standardised format for further analysis.

X-Track coastal altimetry data (v2.1, [10.24400/527896/a01-2022.020](doi.org/10.24400/527896/a01-2022.020)) used in this study were developed, validated by the CTOH/LEGOS, France and distributed by Aviso+.

> Birol, F., N. Fuller, F. Lyard, M. Cancet, F. Niño, C. Delebecque, S. Fleury, F. Toublanc, A. Melet, M. Saraceno, F. Léger, 2017. “Coastal Applications from Nadir Altimetry: Example of the X-TRACK Regional Products.” Advances in Space Research, 2017, 59 (4), p.936-953. doi:10.1016/j.asr.2016.11.005

## Getting started
Set working directory to top level of repo to ensure links work correctly:

In [None]:
cd ../..

Install additional packages directly from the requirements file

In [None]:
pip install -r requirements.in --quiet

### Load packages

In [None]:
%load_ext autoreload
%autoreload 2

import datetime
import glob

import geopandas as gpd
import numpy as np
import pandas as pd
import tqdm
import xarray as xr
from eo_tides.model import model_tides

# Set paths to data
tide_model_dir = "/var/share/tide_models/"
altimetry_path = "/gdata1/data/altimetry/X-TRACK 2.2"

# Models to run
models = [
    "EOT20",
    "FES2012",
    "FES2014_extrapolated",
    "FES2022_extrapolated",
    "HAMTIDE11",
    "GOT4.10",
    "GOT5.6_extrapolated",
    "TPXO10-atlas-v2-nc",
    "TPXO8-atlas-nc",
    "TPXO9-atlas-v5-nc",
]

### Preprocess altimetry data

In [None]:
# Load Collection 3 summary grid, reproject to Albers
c3_path = "https://data.dea.ga.gov.au/derivative/ga_summary_grid_c3.geojson"
c3_grid = gpd.read_file(c3_path)
xmin, ymin, xmax, ymax = c3_grid.total_bounds

# Choose which data to load
# paths = glob.glob(f"{altimetry_path}/ctoh.sla.ref.*.nc")  # All files
paths = glob.glob(f"{altimetry_path}/ctoh.sla.ref.S3*.nc")  # Sentinel-3 only
# paths = glob.glob(f"{altimetry_path}/ctoh.sla.ref.TP+*.nc")  # Topex/Jason-1/Jason-2/Jason-3
# paths = glob.glob(f"{altimetry_path}/ctoh.sla.ref.TPN*.nc")  # Topex/Jason-1/Jason-2 Interleaved orbit
# paths = glob.glob(f"{altimetry_path}/ctoh.sla.ref.TP*.nc")  # All Topex/Jason-1/Jason-2/Jason-3
# paths = glob.glob(f"{altimetry_path}/ctoh.sla.ref.ERS1*.nc")  # ERS-1/ERS-2/Envisat/SARAL/AltiKa
# paths = glob.glob(f"{altimetry_path}/ctoh.sla.ref.HY2*.nc")  # Haiyang-2A
# paths = glob.glob(f"{altimetry_path}/ctoh.sla.ref.GFO*.nc")  # Geosat Follow On

out = []

for path in tqdm.tqdm(paths):
    ds = xr.open_dataset(path, decode_times=False)
    ds_clean = (
        ds[["time", "ocean_tide", "sla", "solid_tide", "mssh"]]
        # Subset to study area
        .sel(points_numbers=(ds.lon > xmin) & (ds.lon < xmax) & (ds.lat > ymin) & (ds.lat < ymax))
        # Combine point and cycle dimensions into a single "z" dim
        # so we can convert to a table-like dataframe
        .stack({"z": ["points_numbers", "cycles_numbers"]})
        .to_dataframe()
        # Add satellite series and pass info as a variable (combining S3)
        .assign(pass_number=ds.Pass, satellites=path.split("/")[-1].split(".")[3])
        .replace({"satellites": {"S3A": "S3", "S3B": "S3"}})
        # Remove any rows with missing Sea Level Anomaly or tide data
        .dropna(how="any", axis=0, subset=["sla", "ocean_tide"])
        .reset_index(drop=True)
    )
    out.append(ds_clean)

# Combine
df = pd.concat(out, axis=0)

# Fix time
df["time"] = datetime.datetime(1950, 1, 1) + pd.to_timedelta(df.time, unit="days")

# Remove duplicates
df = df.loc[~df[["time", "lat", "lon"]].duplicated()]

#### Select subset

In [None]:
# Select subset
# df_subset = df
# df_subset = df.iloc[::1000]
# df_subset = df.iloc[0:30000]
# df_subset = df_subset.iloc[0:100000]
# df_subset = df.query("lon==111.84197313580249")
df_subset = df.loc[(df.time >= "2017") & (df.time < "2020")]

# Preview
df_subset

## Tide modelling

In [None]:
%%time
tide_df = model_tides(
    x=df_subset.lon,
    y=df_subset.lat,
    time=df_subset.time,
    model=models,
    mode="one-to-one",
    output_format="wide",
    directory=tide_model_dir,
    crop_buffer=5,
)
tide_df

### Apply tide correction using each model

In [None]:
# Add non-tide corrected SLA data to dataframe
tide_df["sla_notidecorr"] = (df_subset.sla + df_subset.ocean_tide).values

# Add satellite annotation
tide_df["satellites"] = df_subset.satellites.values

# Reshape to long format
tide_df_long = tide_df.melt(
    ignore_index=False,
    id_vars=["satellites", "sla_notidecorr"],
    value_vars=models,
    value_name="tide_m",
)

# Re-apply tide correction with each of our models
tide_df_long["sla_tidecorr"] = tide_df_long.sla_notidecorr - tide_df_long.tide_m
tide_df_long

### Calculate RMS error at every point and model

In [None]:
# Set Sentinel-3A and B to be the same
tide_df_long["satellites"] = tide_df_long.satellites.replace(["S3A", "S3B"], "S3")

# Calculate RMS for every model at each point
tide_df_long["sla_tidecorr_sq"] = tide_df_long[["sla_tidecorr"]] ** 2
df_rms = (
    np.sqrt(tide_df_long.groupby(["x", "y", "tide_model", "satellites"])[["sla_tidecorr_sq"]].mean())
    .unstack("tide_model")["sla_tidecorr_sq"]
    .reset_index()
)
df_rms

### Filter to points with sufficient observations

In [None]:
# Add number of observed timesteps
df_rms["n"] = (tide_df_long.query("tide_model == 'EOT20'").groupby(["x", "y", "satellites"]).size()).values

# Filter to observations with three years of data
df_rms = df_rms.query("n > 20")
df_rms

### Process to standard format

In [None]:
combined_df = (
    df_rms.rename_axis("point_id")
    .assign(valid_perc=1.0, statistic="rms")
    .rename({"satellites": "source"}, axis=1)
    .reindex(["x", "y", "valid_perc", "source", "statistic"] + models, axis=1)
)

# Update source
combined_df["source"] = "x-track altimetry (" + combined_df.source + ")"

# Add additional columns
combined_df["min"] = combined_df.loc[:, models].min(axis=1)
combined_df["max"] = combined_df.loc[:, models].max(axis=1)
combined_df["diff"] = combined_df["max"] - combined_df["min"]
combined_df["ave"] = combined_df.loc[:, models].mean(axis=1)
combined_df["median"] = combined_df.loc[:, models].median(axis=1)
combined_df["std"] = combined_df.loc[:, models].std(axis=1)
combined_df["missing"] = combined_df.loc[:, models].isna().sum(axis=1)

# Calculate ranks and best/worst models
combined_df = pd.concat(
    [combined_df, combined_df.loc[:, models].rank(axis=1, ascending=True).add_prefix("rank_")], axis=1
)
combined_df["top_model"] = combined_df.filter(regex="^rank").idxmin(axis=1).str[5:]
combined_df["worst_model"] = combined_df.filter(regex="^rank").idxmax(axis=1).str[5:]
combined_df

### Export

In [None]:
# pip install pyogrio

In [None]:
# Export to GeoJSON
xtrack_rms_gdf = gpd.GeoDataFrame(
    data=combined_df,
    geometry=gpd.points_from_xy(x=combined_df.x, y=combined_df.y),
    crs="EPSG:4326",
)
xtrack_rms_gdf.to_file("data/raw/rankings_altimetry_2017-2019_v2.fgb", engine="pyogrio")