In [None]:
import pandas as pd
import holoviews as hv
import hvplot.pandas

from seastats.stats import get_stats

## load files 

In [None]:
sim = pd.read_parquet('../tests/data/abed_sim.parquet')
obs = pd.read_parquet('../tests/data/abed_obs.parquet')

In [None]:
# sim and obs need to be Series
obs = obs[obs.columns[0]]
sim = sim[sim.columns[0]]

In [None]:
obs.hvplot(line_dash = "dashed") * sim.hvplot()

## Basic metrics
comparing `obs` pd.Series with `sim` pd.Series

In [None]:
stats = get_stats(obs, sim)
pd.DataFrame(stats, index = ['abed'])

## Storm analysis

In [None]:
from seastats.storms import storm_metrics
from pyextremes import get_extremes

quantile = 0.99
cluster_duration = 72

modeled extremes

In [None]:
threshold = sim.quantile(quantile)
ext_ = get_extremes(sim, "POT", threshold=threshold, r=f"{cluster_duration}h")
extremes1 = pd.DataFrame({"modeled" : ext_, "time_model" : ext_.index}, index=ext_.index)
ext_

In [None]:
sim_plot = sim.hvplot() * hv.Scatter((extremes1.index, extremes1.modeled)).opts(color="red")
sim_plot

observed extremes

In [None]:
threshold = obs.quantile(quantile)
ext_ = get_extremes(obs, "POT", threshold=threshold, r=f"{cluster_duration}h")
extremes2 = pd.DataFrame({"modeled" : ext_, "time_model" : ext_.index}, index=ext_.index)
ext_

In [None]:
obs_plot = obs.hvplot(line_dash = "dashed") * hv.Scatter((extremes2.index, extremes2.modeled)).opts(color="red")
obs_plot

match extremes

In [None]:
from seastats.storms import match_extremes
extremes_df = match_extremes(sim, obs, 0.99, cluster = 72)
extremes_df

In [None]:
hv.Points(
    extremes_df[['tdiff','diff']].rename(columns={'tdiff':'time difference (hours)','diff':'peak difference (m)'}), 
    kdims=['time difference (hours)','peak difference (m)']).opts(
    size = 8, tools = ['hover']
    )

we defined the following metrics for the storms events: 

* `R1`/`R3`/`error_metric`: we select the biggest observated storms, and then calculate error (so the absolute value of differenc between the model and the observed peaks)
  * `R1` is the error for the biggest storm
  * `R3` is the mean error for the 3 biggest storms
  * `error_metric` is the mean error for all the storms above the threshold.

* `R1_norm`/`R3_norm`/`error`: Same methodology, but values are in normalised (in %) by the observed peaks.

In [None]:
metrics = storm_metrics(sim, obs, quantile=0.99, cluster=72)
pd.DataFrame(dict(stats, **metrics), index=['abed'])