In [None]:
import os
import glob
import time

from tqdm import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import xarray as xa
from cartopy.crs import PlateCarree, LambertAzimuthalEqualArea

In [None]:
laea = LambertAzimuthalEqualArea(central_longitude=0, central_latitude=90)
pc = PlateCarree()

In [None]:
buoys = pd.read_feather('data/buoys.feather').dropna()

In [None]:
sic_dir = '/Data/sim/data/OSISAF_ice_conc_CDR_v3p0/'
years = range(2024, 2002, -1)
min_size = 1

buoys_subs = []
for year in years:
    ofile = f'data/buoys_sic_{year}.feather'
    if os.path.exists(ofile):
        print(f'{ofile} exists')
        buoys_sub = pd.read_feather(ofile)
        buoys_subs.append(buoys_sub)
        continue

    sic_files = sorted(glob.glob(f'{sic_dir}/{year}/*/*.nc'))
    print(len(sic_files), os.path.basename(sic_files[0]), os.path.basename(sic_files[-1]))

    with xa.open_dataset(sic_files[0]) as ds0:
        ds0x = ds0.xc.values * 1000
        ds0y = ds0.yc.values * 1000
        ds0sic = ds0.ice_conc.values[0]

    t0 = time.time()
    with xa.open_mfdataset(sic_files, combine='by_coords') as ds:
        ds_time = ds.time.values
        ds_ice_conc = ds.ice_conc.values
    print('Time:', time.time() - t0)

    buoys_sub = buoys[(buoys.index >= ds_time[0]) & (buoys.index <= ds_time[-1])].copy()
    if buoys_sub.shape[0] < min_size:
        print('Too small:', buoys_sub.shape[0])
        continue
    x, y = laea.transform_points(pc, buoys_sub.Lon.values, buoys_sub.Lat.values).T[:2]
    cols = np.round((x - ds0x[0]) / (ds0x[-1] - ds0x[0]) * ds0x.size).astype(int)
    rows = np.round((y - ds0y[0]) / (ds0y[-1] - ds0y[0]) * ds0y.size).astype(int)
    cols = np.clip(cols, 0, ds0x.size - 1)
    rows = np.clip(rows, 0, ds0y.size - 1)
    time_indices = np.array([np.argmin(np.abs(ds_time - np.datetime64(date))).item() for date in buoys_sub.index])
    ice_conc_sub = ds_ice_conc[time_indices, rows, cols]
    buoys_sub['sic'] = ice_conc_sub
    buoys_subs.append(buoys_sub)

    plt.imshow(ds0sic, cmap='jet')
    plt.scatter(cols, rows, c=ice_conc_sub)
    print('Size:', ice_conc_sub.size)
    plt.show()
    buoys_sub.to_feather(ofile)


In [None]:
buoys_subs = pd.concat(buoys_subs, axis=0)
buoys_subs.to_feather('data/buoys_sic.feather')

# Split buoy trajectories by low SIC

In [None]:
buoys = pd.read_feather('data/buoys_sic.feather').dropna()
groups = buoys.groupby('BuoyId')

min_sic = 15
segments = []
for bi, buoy in tqdm(groups):
    buoy.sort_index(inplace=True)
    if buoy.sic.max() > min_sic:
        hi_sic_idx = np.where(buoy.sic > min_sic)[0]
        starts = np.where(np.diff(hi_sic_idx) > 1)[0] + 1
        starts = np.concatenate(([0], starts))
        ends = np.concatenate((starts[1:], [len(hi_sic_idx)])) - 1
        for segment_id, (start, end) in enumerate(zip(starts, ends)):
            segment = buoy.iloc[hi_sic_idx[start]:hi_sic_idx[end]].copy()
            segment['segment_id'] = segment_id
            segment['Age'] -= segment['Age'].min()
            segments.append(segment)
segments = pd.concat(segments, axis=0)
segments.to_feather('data/segments.feather')