to get this notebook working: 

```
poetry add ipykernel hvplot shapely searvey geoviews utide
```
let's add also a package we use for cleaning tide gauges ([link](https://github.com/seareport/ioc_cleanup/tree/ioc_2024)):
```
poetry add git+https://github.com/seareport/ioc_cleanup.git#ioc_2024
```

In [None]:
import searvey
import shapely
import utide
import pandas as pd
import hvplot.pandas

import ioc_cleanup as C

some functions

In [None]:
def data_availability(series: pd.Series, freq="60min") -> float:
    resampled = series.resample(freq).mean()
    data_avail_ratio = 1 - resampled.isna().sum() / len(resampled)
    return float(data_avail_ratio)

# small function to detide signal (using Utide: https://github.com/wesleybowman/UTide)
def surge(ts: pd.Series, lat: float, resample: int = None): 
    ts0 = ts.copy()
    OPTS = {
        "constit": "auto", 
        "method": "ols", 
        "order_constit": "frequency",
        "Rayleigh_min": 0.97,
        "lat": lat,
        "verbose": True
    }
    if resample is not None:
        ts = ts.resample(f"{resample}min").mean()
        ts = ts.shift(freq=f"{resample / 2}min")
    coef = utide.solve(ts.index, ts, **OPTS)
    tidal = utide.reconstruct(ts0.index, coef, verbose = OPTS["verbose"])
    return pd.Series(data=ts0.values - tidal.h, index = ts0.index)

In [None]:
# Albatross project sites
albat_dict = {
    "Site": [
        "Keta Basin, Ghana",
        "Kigamboni District Hub, Tanzania",
        "Morondava District Hub, Madagascar"
    ],
    "lat": [5.9000, -6.8500, -20.2833],
    "lon": [0.9833, 39.3000, 44.3167]
}

albatross_sites = pd.DataFrame(albat_dict)
albatross_sites

get stations around africa

In [None]:
ioc_df = searvey.get_ioc_stations()
africa = shapely.box(-26, -35, 63, 38)
ioc_africa = ioc_df[ioc_df.geometry.within(africa)]

example for `zanz`, station in Zanzibar: 

In [None]:
station = "zanz"
sensor = "prs"

details about the station, and location:

In [None]:
ioc_df[ioc_df.ioc_code==station]
plot = ioc_africa.hvplot(
 tiles=True, 
 hover_cols = ['ioc_code'], 
 label = "IOC stations"
) * ioc_df[ioc_df.ioc_code==station].hvplot(
 geo=True, 
 hover_cols = ['ioc_code'], 
 c="r", 
 label = "zanz"
) * albatross_sites.hvplot.points(
 x = "lon",
 y = "lat",
 geo=True, 
 hover_cols = ['Site'], 
 c="g",
 s = 700,
 marker = "*", 
 label = "albatross sites")
plot.opts(width = 1000, height = 1000)

let's extract data and check it's availability (extraction for 25 years should take around 3min with an average internet connection)

In [None]:
raw = searvey.fetch_ioc_station(
    station, 
    "2000-01-01", 
    pd.Timestamp.now()
)
raw.describe()

In [None]:
_lat = ioc_df[ioc_df.ioc_code == station].lat.values[0]

In [None]:
raw[sensor].loc["2014-10":].resample("1h").mean().hvplot()

let's clean the data, using [ioc_cleanup](https://github.com/seareport/ioc_cleanup/tree/ioc_2024)

In [None]:
!mkdir -p transformations
import requests
open(f'transformations/{station}_{sensor}.json', 'wb').write(
    requests.get(f'https://raw.githubusercontent.com/seareport/ioc_cleanup/refs/heads/ioc_2024/transformations/{station}_{sensor}.json').content
)

In [None]:
! head -20 "transformations/{station}_{sensor}.json" 

let's clean the signal using the transformation

In [None]:
trans = C.load_transformation(station, sensor)
ts = C.transform(raw, trans)[sensor]
ts.resample("1h").mean().hvplot(title=f"water level signal in '{station}'")

let's detide the signal to isolate storm surges

In [None]:
detided = surge(ts, lat = _lat, resample=2)

visualise the raw signal

In [None]:
detided.resample("1h").mean().dropna().hvplot().opts(width=1300, height = 400, title=f"surge level signal in '{station}'")

not really interesting.. no particular events happened in 2022-2024, although we can see a small contribution in september 2024

Unfortunately we only cleaned from 2022 to 2025, for our model validation purposes. 

However you cans still change the start date to 2000 to get more data

let's have a look at the bigger time series.. and modify the transformation accordingly


In [None]:
trans.start 
trans.start = pd.Timestamp(2014,10,10).to_pydatetime()
trans.dropped_date_ranges.append([pd.Timestamp(2020,4,14).to_pydatetime(), pd.Timestamp(2021,10,27).to_pydatetime()])

In [None]:
ts = C.transform(raw, trans)[sensor]
ts.resample("1h").mean().hvplot(title=f"water level signal in '{station}'")

detide for the last 10 years (this might take time.. and some processing)

In [None]:
detided = surge(ts, lat = _lat, resample=20)

In [None]:
YEAR = 2014
detided.loc[f"{YEAR}":f"{YEAR+1}"].resample("1h").mean().hvplot().opts(width=1300, height = 400, title=f"surge level signal in '{station}'")