In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib widget

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=RuntimeWarning)

import os

import numpy as np
import pandas as pd
import xarray as xr

import matplotlib.pyplot as plt

from station_analysis import station_analysis, load_and_process_station_data

## Setup

In [None]:
fn = "./data/tide_gauge_locations.csv"
locations = pd.read_csv(fn, index_col="uhslc_id")
locations

In [None]:
min_hours_per_day = 20
min_days_per_year = 320
min_years_for_inclusion = 9

In [None]:
qc_fig_dir = "./figures/quality_control/"
os.makedirs(qc_fig_dir, exist_ok=True)

tide_prd_dir = "./data/tide_predictions/"
os.makedirs(tide_prd_dir, exist_ok=True)

## Choose a station

In [None]:
uhid = 370
tg = locations.loc[uhid]
tg

## Show QC'd version
If QC instuctions already exist in ```quality_control.py```, then the following figure will include the QC adjustments.

In [None]:
hsl, hsl_trnd, tide, quality_years = load_and_process_station_data(
    tg,
    min_hours_per_day,
    min_days_per_year,
    min_years_for_inclusion,
    tide_prd_dir,
    qc_fig_dir,
)

# detrended hourly values
hsldt = hsl - hsl_trnd

# calculate tidal residuals
res = hsldt - tide

# figure
plt.figure()
plt.plot(hsldt, label="detrended hourly")
# plt.plot(tide, label="predicted tide")
plt.plot(res, label="nontidal residuals")
plt.legend()
plt.title(f"{uhid:03d}: {tg.station_name} ({len(quality_years)} quality years)")
_ = plt.xticks(rotation=45)
plt.tight_layout()

## Identify station issues

In [None]:
hrs_in_epoch = int(24 * 365.25 * 19)  # 19 years

hsl = xr.load_dataset(f"./data/tide_gauge_data/h{uhid:03d}.nc")
hsl = hsl.isel(record_id=0).sea_level.to_pandas()
hsl.index = hsl.index.round("h")
hsl = hsl.loc[~hsl.index.duplicated(keep="first")]
hsl /= 10
# hsl.loc[hsl < -50] = None

Nt = hsl.size
t_steps = ["2014-02-01", "2015-10-20", "2016-07-01", "2017-09-01"]
step = pd.concat([pd.Series(0, index=hsl.index) for _ in t_steps], axis=1)
for k, t in enumerate(t_steps):
    step.loc[t:, k] = 1
A = np.vstack([np.ones(Nt), np.arange(Nt), step.values.T]).T
x = hsl.values
z = ~np.isnan(x)
c = np.linalg.lstsq(A[z, :], x[z], rcond=None)[0]
y = pd.Series(A @ c, index=hsl.index)

tide_prd_file = f"{tide_prd_dir}t{uhid:03d}.csv"
tide = pd.read_csv(tide_prd_file, index_col="time", parse_dates=True)["tide_prediction"]
tide -= tide.iloc[-hrs_in_epoch:].mean()

condition = (hsl - tide).loc["2022-01-25":"2022-03"] > 170
drop = condition.loc[condition].index

plt.figure()
plt.plot(hsl, label="hourly sea leve")
# plt.plot(hsl.loc[drop], ".r")
# plt.plot(tide, label="predicted tide")
plt.plot(hsl - tide, label="nontidal residuals")
plt.plot((hsl - tide).loc[drop], ".r")
# plt.plot((hsl - tide).loc[hsl - tide > 180], ".r", label="detrended hourly")
# plt.plot(y, "r", label="trend with step")
plt.legend()
plt.title(f"{uhid:03d}: {tg.station_name}")
_ = plt.xticks(rotation=45)
plt.tight_layout()

In [None]:
drop

In [None]:
with pd.option_context("display.max_rows", None, "display.max_columns", None):
    print(hsl.loc[drop])

In [None]:
plt.figure()
plt.plot(hsl - tide)
plt.plot((hsl - tide).groupby(pd.Grouper(freq="A")).apply(lambda x: x.dropna().var()))
_ = plt.xticks(rotation=45)

## Rerun station analysis and save

In [None]:
os.remove(f"{tide_prd_dir}t{uhid:03d}.csv")

ga_file = "./output/global_analysis.csv"

global_analysis = pd.read_csv(ga_file, index_col=0)
global_analysis.index.name = "uhid"

analysis = station_analysis(
    tg,
    min_hours_per_day,
    min_days_per_year,
    min_years_for_inclusion,
    tide_prd_dir,
    qc_fig_dir,
)

if analysis is not None:
    global_analysis.loc[uhid, :] = analysis
    global_analysis.to_csv(ga_file, index=True)
    print("Analysis complete.")
else:
    print("Analysis not performed, because global_analysis returned None.")

global_analysis

In [None]:
plt.close("all")