# Protocol for skill assessment

The period of the skill assessment is the whole year 2023: from 2023-01-01 to 2023-11-30. 

It is the most energetic period of the year for cylones, shown is the next figure ![figure](https://private-user-images.githubusercontent.com/18373442/311166159-412c182f-9dd5-43d7-936e-00393c6854f2.png?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3MTM0MjkxODgsIm5iZiI6MTcxMzQyODg4OCwicGF0aCI6Ii8xODM3MzQ0Mi8zMTExNjYxNTktNDEyYzE4MmYtOWRkNS00M2Q3LTkzNmUtMDAzOTNjNjg1NGYyLnBuZz9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFWQ09EWUxTQTUzUFFLNFpBJTJGMjAyNDA0MTglMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjQwNDE4VDA4MjgwOFomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPTE1ZjM2ZjZhMmQxZDdjNjUyZTJlOWU2MGYxZjJjNGE3NzcxZmFmNzBlOGE0OTAyZDY1ZTI0NTM5ZGU2MDgzOWQmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0JmFjdG9yX2lkPTAma2V5X2lkPTAmcmVwb19pZD0wIn0.onbyTy-cJv_DgcedHqPRQhbauxBuV2inXMvttdvYgZw)

it seems that the begenning of the year was marked with cyclone in the south Indian ocean. 

Whereas during the fall, the cylones were more frequent in the Pacific and north Atlantic oceans. 

## 1 - get information cyclone / event  
We will extract the cylone track from the [IBtracks](https://www.ncei.noaa.gov/products/international-best-track-archive) data.

In [None]:
! mkdir -p data
! wget https://www.ncei.noaa.gov/data/international-best-track-archive-for-climate-stewardship-ibtracs/v04r00/access/netcdf/IBTrACS.ALL.v04r00.nc -O data/IBTrACS.ALL.v04r00.nc

In [None]:
# load IBtracks data 
import xarray as xr 
import numpy as np
import pandas as pd
import tqdm
import glob
import os
import json
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from analysea.tide import detide 
import geopandas as gpd
from scipy.spatial import cKDTree

cyclones = xr.open_dataset('data/IBTrACS.ALL.v04r00.nc')
cyclones

In [None]:
# select only the summer 2023 period
indice_storm_2023 = []
def is_in_summer_2023(times):
    test1 = [pd.Timestamp(t.decode()) < pd.Timestamp('2023-11-30') for t in times]
    test2 = [pd.Timestamp(t.decode()) > pd.Timestamp('2023-01-01') for t in times]
    test = np.logical_and(test1, test2)
    return np.any(test)

for i_storm in tqdm.tqdm(cyclones.storm.values[13300:]): # no need to loop over 19th/20th century!! 
    times = cyclones.isel(storm=i_storm).iso_time.values
    if is_in_summer_2023(times): 
        indice_storm_2023.append(i_storm)
# takes ~ 20sec to run

In [None]:
def extract_specs(istorm): 
    event = cyclones.isel(storm=int(istorm))
    x, y, time = np.array(event.lon), np.array(event.lat), np.array(event.time)
    mask = ~np.isnan(x)
    x = x[mask]
    y = y[mask]
    time = time[mask]
    wind_max = np.array(event.usa_wind)[mask]
    r64 = np.array(event.usa_rmw)[mask]/(111 * np.cos(np.deg2rad(y)))
    name = np.array(event.name)
    basin = np.array(event.basin)[mask]
    return x, y, time, wind_max, r64, name, basin

def plot_cyclone_track(x, y, wind_max, r64, name, ax = None):
    if ax is None:
        fig, ax = plt.subplots(1,1, figsize=(16,8))
    im = ax.scatter(x, y, c = wind_max, label=name, s = 1, cmap = 'jet', vmin = 0, vmax = 100)
    for ix, xi in enumerate(x):
        circ = plt.Circle((x[ix], y[ix]), 
                          radius=r64[ix], 
                          fill=False, 
                          hatch = '////', 
                          color = cm.jet(np.min([1, wind_max[ix]/100])), 
                          alpha = 0.3)
        ax.add_patch(circ)
    # ax.axis('equal')
    return ax, im

In [None]:
# check 2023 cyclone tracks
fig, ax = plt.subplots(1,1, figsize=(16,8))
for istorm in indice_storm_2023:
    x, y, time, wind_max, r64, name, basin = extract_specs(istorm)
    ax, im = plot_cyclone_track(x, y, wind_max, r64, name, ax)
    if max(wind_max) > 100: 
        print(f"index IBtracks #{istorm}, name: {name}, max wind {np.max(wind_max)}, start: {np.min(time)} end: {np.max(time)}")
plt.colorbar(im, ax = ax, orientation = 'horizontal', fraction = 0.05, aspect = 60)
plt.tight_layout()

## 2 - compare the cyclone tracks with tide gauges locations
### 2.1 - get clean tide gauges 
from `ioc_cleanup` clean tide gauges: https://github.com/seareport/ioc_cleanup

In [None]:
# get clean tide gauges 
def files_in_folder(folder, ext = '.csv'):
    list_files = []
    for item in glob.glob(folder + '*'):
        tmp = item.split(ext)[0]
        root, name = os.path.split(tmp)
        if item.endswith(ext):
            if ext == ".parquet":
                list_files.append(name)
            elif ext == ".csv":
                list_files.append(name)
    return list_files

In [None]:
CLEAN_FOLDER = "/home/tomsail/work/python/seareport_org/ioc_cleanup/clean/"
list_TG_2023 = files_in_folder(CLEAN_FOLDER, ext = '.parquet')
len(list_TG_2023)

### 2.2 - identify tide gauges with SEASET

In [None]:
## get SEASET stations -- latest version
seaset_full = pd.read_csv("/home/tomsail/Documents/work/python/oceanmodelling/seaset/Notebooks/catalog_full_updated.csv", index_col=0)
# some ioc code got remove for same location ex prin/prin2

def is_similar_station(ioc_code, station_list):
    return any(station.startswith(ioc_code) for station in station_list)

ioc_seaset = seaset_full.dropna(subset='ioc_code')
subset_2023 = ioc_seaset[ioc_seaset['ioc_code'].apply(is_similar_station, station_list=list_TG_2023)]
subset_2023

### 2.3 - refine to stations which are in the vicity of the cyclone tracks

In [None]:
dist_max = 5 # in degrees

def dist(lon1, lat1, lon2, lat2):
    return np.sqrt((lat2 - lat1) ** 2 + (lon2 - lon1) **2)

def is_close_station(station, lons, lats, dist_max = 5):
    lons[lons > 180] = lons[lons > 180] - 360
    lon, lat = station
    return np.any(dist(lon, lat, lons, lats) < dist_max)

def get_tracks(cyclone_data):
    lons = []
    lats = []
    for storm in cyclone_data.storm.values:
        lons.extend(cyclone_data.isel(storm=storm).lon.values)
        lats.extend(cyclone_data.isel(storm=storm).lat.values)
    return lons, lats

def subset_from_cyclone(df, cyclone_data, dist_max = 5):
    lons, lats = get_tracks(cyclone_data)
    close_stations = df[df
    .apply(
        lambda row: is_close_station(
            (row['longitude'], row['latitude']), 
            np.array(lons), 
            np.array(lats), 
            dist_max = dist_max
        ), 
        axis=1
    )]
    return close_stations

def subset_from_tracks(df, lons, lats, dist_max = 5):
    close_stations = df[df
    .apply(
        lambda row: is_close_station(
            (row['longitude'], row['latitude']), 
            np.array(lons), 
            np.array(lats),
            dist_max = dist_max
        ), 
        axis=1
    )]
    return close_stations


In [None]:
# superpose with 2023 cyclone tracks
fig, ax = plt.subplots(1,1, figsize=(16,8))
for istorm in indice_storm_2023:
    x, y, time, wind_max, r64, name, basin = extract_specs(istorm)
    ax, im = plot_cyclone_track(x, y, wind_max, r64, name, ax)
stations_impacted_all = subset_from_cyclone(subset_2023, cyclones.isel(storm=indice_storm_2023))
stations_impacted_all.plot.scatter(x='longitude', y='latitude', ax=ax, s= 100, c = 'r', marker = "*", edgecolor = 'k')

### 2.4 - split the analysis into basins
the basins in IBTracks are the following ones 

In [None]:
basins = {
    "East Pacific":  'EP',
    "North Atlantic":'NA',
    "North Indian":  'NI',
    "South Indian":  'SI',
    "South Pacific": 'SP',
    "West Pacific":  'WP',
}

In [None]:
out = dict()
for basin in basins.keys():
    zone = basins[basin]
    for istorm in indice_storm_2023:
        x, y, time, wind_max, r64, name, basins_ = extract_specs(istorm)
        list_ = [b.decode() for b in basins_]
        if zone in list_:
            stations_impacted = subset_from_tracks(subset_2023, x, y)
            stations_impacted.plot.scatter(x='longitude', y='latitude',ax=ax, s= 100, c = 'r', marker = "*", edgecolor = 'k')
            if len(stations_impacted) > 0:
                params = {
                    'name': str(name)[2:-1], #byte litteral
                    'start' : pd.Timestamp(time[0]).strftime('%Y-%m-%d %H:%M:%S'),
                    'end' : pd.Timestamp(time[-1]).strftime('%Y-%m-%d %H:%M:%S'),
                    'stations': [c_ for c_ in stations_impacted.ioc_code.values]
                }
                out[str(istorm)] = params
                print(params)
with open('stations_impacted_2023.json', 'w') as f:
    json.dump(out, f, indent=2)

## 3 - detide the selected stations
create a folder for detide stations first

In [None]:
! mkdir -p data/surge

In [None]:
SURGE_FOLDER = "./data/surge/"

In [None]:
for ii,ioc_code in enumerate(subset_2023.ioc_code):
    lat = subset_2023.iloc[ii].latitude
    if not os.path.exists(SURGE_FOLDER + f"{ioc_code}.parquet"):
        df = pd.read_parquet(CLEAN_FOLDER + f"{ioc_code}.parquet")
        surge = detide(df[df.columns[0]], lat=lat, resample_detide = True)
        surge.to_frame().to_parquet(SURGE_FOLDER + f"{ioc_code}.parquet")
# 1 hour without resampling 
# 30 sec 

## 4 - compare with model results 

In [None]:
BASE = "/home/tomsail/Documents/work/python/pyPoseidon/Tutorial/models/"
v0 = "/home/tomsail/work/python/pyPoseidon/Tutorial/models/v0.0/telemac/results_2D.nc"
v0p2 = "/home/tomsail/work/python/pyPoseidon/Tutorial/models/v0.2/telemac/results_2D.nc"
# v2p0 = "/home/tomsail/work/models/results/global-v2/202307_2D_tri.zarr"

In [None]:
## load result file -- 2D
model_tel_v0 = xr.open_dataset(v0)
model_tel_v0p2 = xr.open_dataset(v0p2)
# model_tel_v2 = xr.open_dataset(v2p0)
model_tel_v0p2

In [None]:
# load json
import json

with open("./data/storms/cyclones_2023.json", "r") as f:
    events = json.load(f)

global variables for the following plots

In [None]:
ds_2D = [model_tel_v0, model_tel_v0p2 ]
version2D = ["v0.0", "v0.2.0","v0.2.1", "v2.0"]
alpha = [1, 0.6, 0.4, 0.3,  0.3]
colors = ['blue', 'red', 'purple', 'brown', 'green']

useful functions

In [None]:

def is_overlapping(tris, meshx):
    PIR = 180
    x1, x2, x3 = meshx[tris].T
    return np.logical_or(abs(x2 - x1) > PIR, abs(x3 - x1) > PIR)

def extract_max_elev(ds, times): 
    maxH = ds.elev.sel(time=times, method='nearest').max(dim='time')
    return maxH

def plot_max_elev(ax, ds, times): 
    m = is_overlapping(ds.face_nodes,ds.longitude)
    max_elev = extract_max_elev(ds,times)
    im = ax.tricontourf(
        ds.longitude,
        ds.latitude,
        ds.face_nodes[~m],
        max_elev.values, 
        levels = np.arange(-0, 0.5, 0.02), 
        extend = 'both')
    return im

def closest_n_points(nodes, N, meshXY, dist_max=np.inf):
    mytree = cKDTree(meshXY)
    d_, indice = mytree.query(nodes, range(1, N + 1))
    indice[d_ > dist_max] = -1
    mask = indice != -1
    return indice[mask].T, d_[mask].T

def extract_t_elev_1D(ds, seaset_id):
    idx_ds = np.where(ds.seaset_id == seaset_id)[0]
    if len(idx_ds) > 0:
        elev_ = ds.isel(node=idx_ds[0]).elev.values
        t_ = [pd.Timestamp(ti) for ti in ds.isel(node=idx_ds[0]).time.values]
    else: 
        print(f"station: {ioc_code}, seaset_id: {seaset_id} not found in model")
        t_ = None; elev_ = None
    return pd.Series(elev_, index=t_)

def extract_t_elev_2D(ds, x, y):
    lons, lats = ds.longitude.values, ds.latitude.values
    indx, dist_ = closest_n_points(np.array([x, y]).T, 1, np.array([lons,lats]).T)
    ds_ = ds.isel(node=indx[0])
    elev_ = ds_.elev.values
    t_ = [pd.Timestamp(ti) for ti in ds_.time.values]
    return pd.Series(elev_, index=t_), np.round(dist_, 2)

def get_corr(df1: pd.DataFrame, df2: pd.Series): 
    ts1, ts2 = df1.align(df2, axis = 0)
    ts1 = ts1.interpolate()
    nan_mask1 = pd.isna(ts1)
    nan_mask2 = pd.isna(ts2)
    nan_mask = np.logical_or(nan_mask1.values.T[0], nan_mask2.values)
    ts1 = ts1[~nan_mask]
    ts2 = ts2[~nan_mask]
    corr = ts1.corr(ts2)
    return np.round(corr, 2), ts1, ts2

def get_percentiles(ts1, ts2):
    x = np.arange(0, 0.99, 0.001)
    x = np.hstack([x, np.arange(0.99, 1, 0.0001)])
    pc1 = np.zeros(len(x))
    pc2 = np.zeros(len(x))
    for it, thd in enumerate(x):
        pc1[it] = ts1.quantile(thd)
        pc2[it] = ts2.quantile(thd)
    return pc1, pc2

We do here comparison for each cyclone.

For Europe there is the special case of [Babet](https://en.wikipedia.org/wiki/Storm_Babet), which is not in the IBTracks database.

Autumn 2023 was an active season for northern europe ([source](https://en.wikipedia.org/wiki/2023%E2%80%9324_European_windstorm_season)), with important storms like [Ciaran](https://en.wikipedia.org/wiki/Storm_Ciar%C3%A1n) or [Babet](https://en.wikipedia.org/wiki/Storm_Babet). 

In [None]:
%matplotlib widget

In [None]:
for storm in events:
    fig, ax = plt.subplots(1,1, figsize=(14,8))
    x, y, time, wind_max, r64, name, basin = extract_specs(storm)
    xmin, xmax = np.min(x), np.max(x)
    ymin, ymax = np.min(y), np.max(y)
    tmin = pd.Timestamp(events[storm]['start'])
    tmax = pd.Timestamp(events[storm]['end'])
    name = events[storm]['name']
    im = plot_max_elev(ax, ds_2D[-1], pd.date_range(tmin, tmax, freq='1h'))
    ax, im2 = plot_cyclone_track(x, y, wind_max, r64, name, ax = ax)
    ax.axis('equal')
    if name == "BABET": xmin, xmax, ymin, ymax = -10, 20, 40, 70 #
    ax.set_xlim(xmin - 5 , xmax + 5 )
    ax.set_ylim(ymin - 5 , ymax + 5 )
    ax.set_title(f"cyclone {name}, id#{storm} from {pd.Timestamp(tmin)} to {pd.Timestamp(tmax)}")
    plt.colorbar(im2, ax = ax, orientation = 'horizontal', pad = 0.07, fraction = 0.05, aspect = 60, label = 'max wind speed (m/s)')
    plt.colorbar(im, ax = ax, pad = 0.02, fraction = 0.05, label = 'max elevation (m)')
    stations_impacted = subset_2023[subset_2023.ioc_code.isin(events[storm]['stations'])]
    plt.tight_layout()
    # 
    fig1, ax1 = plt.subplots(len(stations_impacted), 1, figsize=(14,3*len(stations_impacted)))
    if len(stations_impacted) == 1: ax1 = [ax1]
    for i_s, ioc_code in enumerate(stations_impacted.ioc_code):
        s = stations_impacted.iloc[i_s]
        xl, yl = s.longitude, s.latitude
        ax.scatter(xl, yl,
            s=100, 
            lw=0.5, 
            c = colors[i_s % len(colors)], 
            marker = "*", 
            edgecolors = 'white', 
            label = ioc_code)
        obs = pd.read_parquet(SURGE_FOLDER + f"{ioc_code}.parquet")
        seaset_id = s.seaset_id
        
        # observations
        obs = obs.loc[tmin:tmax]
        ax1[i_s].plot(obs.index, obs.values , label=f"{ioc_code}", color = 'k', linestyle = '--')

        # models
        for ids, ds in enumerate(ds_2D):
            mod , d_= extract_t_elev_2D(ds, xl, yl)
            corr, ts1, ts2 = get_corr(mod, obs[obs.columns[0]])
            ax1[i_s].plot(mod.index,mod.values , label=f"model 2D {version2D[ids]}", color = colors[i_s % len(colors)], alpha = alpha[ids])

        ax1[i_s].set_xlim(tmin, tmax)
        ax1[i_s].legend()
    ax1[0].set_title(f"cyclone {name}, id#{storm}")
    ax.legend()
    plt.tight_layout()
    print(storm, name)

what about the for the whole period of simulation?

Some storms might not have been saved in the IBTracks database but still induce a significant surge in some stations.

Let's check the signal for all the stations in the surge folder

In [None]:
tmin = pd.Timestamp(2023,1,1)
tmax = pd.Timestamp(2023,10,31)

list_surge_manual = ['viti', 'prus', 'mill', 'pkem', 'vard', 'wood', 'kahu', 'cald2', 'hmda', 'kinl', 'benc', 'dzao', 
                     'lerw2', 'live', 'cres', 'hie2', 'ptmt', 'ilfa', 'wick', 'rorv', 'pwil2', 'dkwa', 'bame', 'abed', 'lime', 'nshi', 
                     'newl',  'chst', 'bamf', 'kush', 'greg', 'bamd', 'honn', 'dutc',  'talc2', 'dpnc', 'quir2', 'fpnt', 'chrp', 'ishig', 
                     'bgct',  'vhbc', 'thev', 'npor', 'pslu', 'cher', 'boma', 'stjo', 'djve', 'ross', 'elak', 'hana', 'saig', 'pich2', 
                     'dapi', 'ptal2', 'stor', 'ande', 'aren', 'ohig3', 'newl2', 'trst', 'kusm', 'alam', 'leit', 'sado', 'asto', 'shee', 
                     'malo', 'corr2', 'prin2', 'nkfa', 'heys', 'helg', 'kawa', 'atka', 'qtro2', 'dove', 'waka', 'treg', 'cuxh', 'tosa', 
                     'yaku', 'brom', 'smog', 'darw', 'cwfl', 'huat', 'sprg', 'fue2', 'abur', 'guam', 'kwfl', 'pcha2', 'barn', 
                     'bapj', 'amal', 'hilo', 'wpwa', 'plym', 'coru', 'gokr', 'pmur', 'whit', 'nhav', 'porp', 'kungr', 'mumb', 'stqy2', 'crom', 
                     'sitk', 'stqy', 'oste', 'acnj', 'bang', 'naga', 'mare', 'fuka', 'herb', 'pagb', 'work', 'mhav', 'lajo', 'harw', 
                     'omae', 'coqu2', 'holy2', 'horn', 'sdpt', 'lowe', 'naha']
surge_stations = subset_2023[subset_2023.ioc_code.isin(list_surge_manual)]
countries = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

for i_s, ioc_code in tqdm.tqdm(enumerate(surge_stations.ioc_code)):        
    # get station coordinates
    s = surge_stations.iloc[i_s]
    xl, yl = s.longitude, s.latitude
    # get observations
    obs = pd.read_parquet(SURGE_FOLDER + f"{ioc_code}.parquet")
    obs = obs.loc[tmin:tmax]
    seaset_id = s.seaset_id
    # 
    mod , d_= extract_t_elev_2D(ds_2D[-1], xl, yl) # we take the v0.2 results only
    corr, ts2, ts1 = get_corr(mod, obs[obs.columns[0]])
    if corr > 0.0: 
        # if coefficient is too low, we don't plot it
        fig = plt.figure(figsize=(16,5))
        gs = fig.add_gridspec(1,3,  width_ratios=(2, 1, 1),
                            left=0, right=0.99, bottom=0, top=0.99,
                            wspace=0.07, hspace=0.03)
        ax_plot1 = fig.add_subplot(gs[ 0])
        ax_plot2 = fig.add_subplot(gs[ 1], sharey = ax_plot1)
        ax_map = fig.add_subplot(gs[ 2])

        ax.scatter(xl, yl,
            s=100, 
            lw=0.5, 
            c = colors[i_s % len(colors)], 
            marker = "*", 
            edgecolors = 'white', 
            label = ioc_code)
        
        # models
        for ids, ds in enumerate(ds_2D[-1:]):
            mod , d_= extract_t_elev_2D(ds, xl, yl)
            corr, ts2, ts1 = get_corr(mod, obs[obs.columns[0]])
            mod95 = mod.quantile(0.95)
            corr95, _, _ = get_corr(mod[mod > mod95], obs[obs.columns[0]])
            mod99 = mod.quantile(0.99)
            corr99, _, _ = get_corr(mod[mod > mod99], obs[obs.columns[0]])
            ax_plot1.plot(mod.index,mod.values , label=f"model {version2D[ids]}", color = colors[i_s % len(colors)], alpha = alpha[ids])
        ax_plot2.scatter(ts1.values, ts2.values, c= 'k', label=f"model {version2D[ids]}, dist={d_}, Cr={corr}", s=1, alpha = 0.3)
        ax_plot2.set_xlabel('measured data')
        ax_plot2.set_ylabel('modelled data')
        pc1, pc2 = get_percentiles(ts1, ts2)
        ax_plot2.axline([0,0],slope = 1, lw =1, linestyle = '--', color = 'k')
        ax_plot2.scatter(pc1, pc2, color = colors[i_s % len(colors)], alpha = alpha[ids])
        ax_plot2.plot(pc1, pc2, color = colors[i_s % len(colors)], alpha = alpha[ids])
        # observations
        ax_plot1.plot(obs.index, obs.values , label=f"{ioc_code}", color = 'k', linestyle = '--')
        ax_plot1.set_xlim(tmin, tmax)
        ax_plot1.legend()
        ax_plot2.legend()
        ax_plot1.grid(axis='both', color = 'grey')
        ax_plot2.grid(axis='both', color = 'grey')
        # map
        ax_map.scatter(xl, yl, marker = "*", c = 'r')
        _ = countries.plot(color='lightgrey', ax=ax_map, zorder=-1)
        ax_map.set_xlim(xl-20, xl+20)
        ax_map.set_ylim(yl-20, yl+20)