In [None]:
import copernicusmarine as cm
import pandas as pd
import xarray as xr
import hvplot.xarray
import hvplot.pandas
import numpy as np
import holoviews as hv

In [None]:
cm.login()

In [None]:
ioc_cleanup = pd.read_csv('ioc_cleanup_2023.csv', index_col=0)
ioc_cleanup

In [None]:
# subset
ioc_re = ioc_cleanup[ioc_cleanup.ioc_code == 'boma']
ioc_re

In [4]:
import os
os.makedirs('data/nc/', exist_ok=True)
os.makedirs('data/parquet/', exist_ok=True)

In [None]:
for it, item in ioc_cleanup.iterrows():
    lonmax = np.ceil(item.longitude) + 1
    lonmin = np.floor(item.longitude)
    latmax = np.ceil(item.latitude)
    latmin = np.floor(item.latitude)
    cm.subset(
        dataset_id="cmems_mod_glo_phy_anfc_0.083deg_PT1H-m",
        variables=["zos"],
        minimum_longitude=lonmin,
        maximum_longitude=lonmax,
        minimum_latitude=latmin,
        maximum_latitude=latmax,
        start_datetime="2022-01-01T00:00:00",
        end_datetime="2023-12-31T23:00:00",
        output_filename = f"data/nc/{item.ioc_code}.nc", 
        force_download=True
    )

In [30]:
def get_closest_coordinates(x, y, ds):
    xx, yy = np.meshgrid(ds.longitude.values, ds.latitude.values)
    xall, yall = xx.ravel(), yy.ravel()
    data = ds.zos[-1, 0, :, :].values.ravel()
    non_nan_mask = ~np.isnan(data)
    xall_non_nan = xall[non_nan_mask]
    yall_non_nan = yall[non_nan_mask]
    distances_squared = (xall_non_nan - x)**2 + (yall_non_nan - y)**2
    # print(distances_squared)
    closest_idx = np.argmin(distances_squared)
    lo_, la_ = xall_non_nan[closest_idx], yall_non_nan[closest_idx]
    ilon = np.argmin(abs(ds.longitude.values - lo_))
    ilat = np.argmin(abs(ds.latitude.values - la_))
    return ilon, ilat

def extract_parquet(stations: pd.DataFrame):
    for it, item in stations.iterrows():
        print(item.ioc_code)
        ds = xr.open_dataset(f'data/nc/{item.ioc_code}.nc')
        ilon, ilat = get_closest_coordinates(item.longitude, item.latitude, ds)
        df = pd.DataFrame({'zos':ds.zos[:,0,ilat, ilon].values}, ds.time.values)
        df.to_parquet(f'data/parquet/{item.ioc_code}.parquet')


In [None]:
extract_parquet(ioc_cleanup)

In [None]:
# check if we have data
for it, item in ioc_cleanup.iterrows():
    print(it, item.ioc_code)
    df = pd.read_parquet(f'./data/parquet/{item.ioc_code}.parquet')
    df.hvplot()
    if it > 200: 
        break