In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib widget

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=RuntimeWarning)

import os

import numpy as np
import pandas as pd
import xarray as xr

import requests
import matplotlib.pyplot as plt

from station_analysis import station_analysis

# Load locations and meta

In [None]:
fn = "./data/tide_gauge_locations.csv"

# load the metadata file if it already exists
if os.path.exists(fn):
    locations = pd.read_csv(
        fn,
        dtype=dict(
            station_country_code=int,
            uhslc_id=int,
        ),
        index_col="uhslc_id",
    )

# otherewise create it
else:

    # --------------------------------------------------------------------
    # get basic metadata from UHSLC server
    locations = pd.read_csv(
        "https://uhslc.soest.hawaii.edu/erddap/tabledap/global_hourly_fast.csv?latitude%2Clongitude%2Cstation_name%2Cstation_country%2Cstation_country_code%2Cuhslc_id&time%3E=2023-12-24T00%3A00%3A00Z&time%3C=2023-12-31T22%3A59%3A59Z&distinct()",
        dtype=dict(station_country_code="Int64", uhslc_id="Int64"),
        index_col="uhslc_id",
    )
    locations = locations.drop(locations.index == pd.NA)

    # --------------------------------------------------------------------
    # add alpha-3 country codes
    country_codes = pd.read_csv(
        "./data/iso_3166_country_codes.csv", dtype=dict(numeric=int), index_col=5
    )
    locations["station_country_alpha3"] = None
    for uhid in locations.index:
        num_cc = locations.loc[uhid, "station_country_code"]
        if num_cc in country_codes.index:
            locations.loc[uhid, "station_country_alpha3"] = country_codes.loc[
                num_cc, "alpha_3"
            ]
        else:
            locations.loc[uhid, "station_country_alpha3"] = None

    # --------------------------------------------------------------------
    # get hdi values
    def get_hdi_from_api(a3):
        response = requests.get(
            f"https://api.hdrdata.org/CountryIndicators/filter?country={a3}&year=2021&indicator=hdi"
        ).json()
        return response[0]["value"] if len(response) > 0 else None

    locations["hdi"] = None
    for uhid in locations.index:
        cca3 = locations.loc[uhid, "station_country_alpha3"]
        locations.loc[uhid, "hdi"] = get_hdi_from_api(cca3)
        # if the api returns no hdi value, try getting the hdi of the sovereign
        if locations.loc[uhid, "hdi"] is None:
            sov = country_codes.loc[
                locations.loc[uhid, "station_country_code"], "sovereignty"
            ]
            sov_cca3 = [
                c.alpha_3
                for _, c in country_codes.iterrows()
                if c["name"][: len(sov)] == sov
            ]
            if len(sov_cca3) > 0:
                sov_cca3 = sov_cca3[0] if "USA" not in sov_cca3 else "USA"
                locations.loc[uhid, "hdi"] = get_hdi_from_api(sov_cca3)

    # --------------------------------------------------------------------
    # save
    locations.to_csv(fn, index=True)

locations

# Perform station analysis 
Analyze the global set of stations. This step includes calcuation of $\Delta h$.

In [None]:
min_hours_per_day = 20
min_days_per_year = 320
min_years_for_inclusion = 9

In [None]:
qc_fig_dir = "./figures/quality_control/"
os.makedirs(qc_fig_dir, exist_ok=True)

tide_prd_dir = "./data/tide_predictions/"
os.makedirs(tide_prd_dir, exist_ok=True)

ga_file = "./output/global_analysis.csv"
os.makedirs("./output", exist_ok=True)

In [None]:
if os.path.exists(ga_file):
    global_analysis = pd.read_csv(ga_file, index_col=0)
    global_analysis.index.name = "uhid"
else:
    global_analysis = None

for n, (uhid, tg) in enumerate(locations.iterrows()):
    if global_analysis is not None and uhid in global_analysis.index:
        continue
    print(
        f"Location {n+1} of {locations.index.size}",
        end="\r" if n + 1 < locations.index.size else "\n\n",
    )
    analysis = station_analysis(
        tg,
        min_hours_per_day,
        min_days_per_year,
        min_years_for_inclusion,
        tide_prd_dir,
        qc_fig_dir,
    )
    if analysis is not None:
        if global_analysis is None:
            dtypes = {
                c: (
                    int
                    if c in ["hdi", "n_good_years"]
                    else (object if c == "name" else float)
                )
                for c in analysis.index
            }
            global_analysis = pd.DataFrame(columns=analysis.index)
            global_analysis = global_analysis.astype(dtypes)
            global_analysis.index.name = "uhid"
        global_analysis.loc[uhid, :] = analysis

    global_analysis.to_csv(ga_file, index=True)

global_analysis

# Quick comparisons

In [None]:
c1 = "dh_median_26_days"
c2 = [
    "tide_dymx_std",
    "res_momn_std",
    "res_hf_dymx_std",
]

print(f"Correlation between {c1} and ...")
for c2i in c2:
    r = np.corrcoef([global_analysis[c1], global_analysis[c2i]])[0, 1]
    print(f"{c2i}: {r}")

plt.figure()
for c2i in c2:
    plt.plot(global_analysis[c1], global_analysis[c2i], "o", label=c2i)
_ = plt.legend()

In [None]:
c1 = "dh_median_26_days"
c2 = [
    "res_momn_std",
    "res_momn_q75_std",
    "res_momn_amx_std",
]

print(f"Correlation between {c1} and ...")
for c2i in c2:
    r = np.corrcoef([global_analysis[c1], global_analysis[c2i]])[0, 1]
    print(f"{c2i}: {r}")

plt.figure()
for c2i in c2:
    plt.plot(global_analysis[c1], global_analysis[c2i], "o", label=c2i)
_ = plt.legend()

# Temporal analysis
This step includes calculation of $\Delta t$ and SLR contributions.

First load the gridded Interagency SLR projections.

In [None]:
slr = xr.open_dataset("./data/slr_scenarios/TR_gridded_projections.nc")
massdef_components = ["AIS", "GIS", "glaciers", "landwaterstorage"]
for scn in ["Low", "IntLow", "Int", "IntHigh", "High"]:
    slr = xr.merge(
        [
            slr,
            xr.concat(
                [slr[f"rsl_{c}_{scn}"] for c in massdef_components], dim="component"
            )
            .rename(f"rsl_massanddeformation_{scn}")
            .sum(dim="component"),
        ]
    )
slr

Then stack the lon/lat coordinates and isolate ocean points.

In [None]:
slr_stack = slr.stack(dict(location=("lon", "lat")))
ocean = slr_stack.rsl_total_Int.isel(years=0, percentiles=1).values > -3e4
slr_stack = slr_stack.sel(location=ocean)
slr_stack

Loop over each station and perform the temporal calculations.

In [None]:
scn_names = ["Low", "IntLow", "Int", "IntHigh", "High"]
dh_days = [c for c in global_analysis.columns if c[:2] == "dh"]
dt_start_years = [2020, 2030, 2040, 2050, 2060]
slr_duration = 30  # years

t_analysis = dict()
for tgi, tg in global_analysis.iterrows():

    t_analysis[tgi] = pd.Series()

    # isolate the the closest location in the SLR grid
    closest_ocean = np.sqrt(
        (slr_stack.lon - tg.lon) ** 2 + (slr_stack.lat - tg.lat) ** 2
    ).idxmin()

    # get the time series of total SLR for the closest grid cell
    var_names = [f"rsl_total_{s}" for s in scn_names]
    tg_slr = (
        slr_stack.sel(
            lon=closest_ocean.lon, lat=closest_ocean.lat, percentiles=50
        ).to_pandas()[var_names]
        / 10  # cm
    )
    tg_slr.columns = scn_names

    # interpolate to annual resolution
    annual_index = range(int(slr_stack.years[0]), int(slr_stack.years[-1]) + 1)
    tg_slr = tg_slr.reindex(annual_index)
    tg_slr = tg_slr.interpolate(method="cubicspline")

    # loop over the dh values, start years, and scenarios; calculate and tabulate dt
    for dh in dh_days:
        dt_str_base = f"dt{dh[2:]}"
        for sy in dt_start_years:
            dt = (tg_slr - tg_slr.loc[sy] >= tg[dh]).idxmax(axis=0) - sy
            # dt values <= zero mean the transition never occurs due to, for
            # example, uplift; assign such transtions to have infinite duration
            dt.loc[dt <= 0] = np.inf
            for scn in scn_names:
                dt_str = f"{dt_str_base}_{scn}_{sy}"
                t_analysis[tgi].loc[dt_str] = dt.loc[scn]

    # get change in each contribution to SLR during intervals following the dt start yrs
    tg_slr = slr_stack.sel(
        lon=closest_ocean.lon, lat=closest_ocean.lat, percentiles=50
    ).to_pandas()
    for y0 in dt_start_years:
        for scn in scn_names:
            components = dict(
                total=f"rsl_total_{scn}",
                ocean_dyn=f"rsl_oceandynamics_{scn}",
                vlm=f"rsl_verticallandmotion_{scn}",
                ais=f"rsl_AIS_{scn}",
                gis=f"rsl_GIS_{scn}",
                glaciers=f"rsl_glaciers_{scn}",
                landwater=f"rsl_landwaterstorage_{scn}",
                massdef=f"rsl_massanddeformation_{scn}",
            )
            for c in components:
                quantity = f"slr_{c}_{scn}_{y0}_{y0 + slr_duration}"
                if quantity not in global_analysis.columns:
                    global_analysis[quantity] = None
                t_analysis[tgi].loc[quantity] = (
                    tg_slr[components[c]].loc[[y0, y0 + slr_duration]].diff().values[1]
                    / 10  # cm
                )

# make a dataframe from the temporal analysis
t_analysis_df = pd.DataFrame(t_analysis).T

# replace existing t_analysis columns in global_analysis; append new ones
new_ga_columns = []
for tc in t_analysis_df.columns:
    if tc in global_analysis.columns:
        global_analysis[tc] = t_analysis_df[tc]
    else:
        new_ga_columns.append(tc)
global_analysis = pd.concat([global_analysis, t_analysis_df[new_ga_columns]], axis=1)

# save and show
global_analysis.to_csv(ga_file, index=True)
global_analysis