In [None]:
import os
import glob

import pandas as pd
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import numpy as np
import xarray as xr

from scipy.ndimage import label

from cartopy.crs import LambertAzimuthalEqualArea, PlateCarree
crs_laea = LambertAzimuthalEqualArea(central_longitude=0, central_latitude=90)
crs_pc = PlateCarree()

%matplotlib inline

In [None]:
iabp_dir = 'IABP_live'
iabp_files = sorted(glob.glob(f'{iabp_dir}/*.dat'))
iabp_ids = [f.split('.')[0].split('/')[-1] for f in iabp_files]
print(f'Found {len(iabp_files)} IABP files')

In [None]:
min_df_shape = 10

used_files = []
buoys = []
for i, ifile in tqdm(enumerate(iabp_files), total=len(iabp_files)):
    if os.path.basename(ifile).startswith('FR_'):
        continue
    df = pd.read_csv(ifile, sep='\\s+', usecols=['Year', 'DOY', 'Lat', 'Lon']).dropna()
    valid_lon_lat = (df.Lon >= -180) & (df.Lon <= 180) & (df.Lat > 50) & (df.Lat <= 90)
    valid_year = (df.Year <= 2025) & (df.Year >= 1990)
    df = df[valid_lon_lat & valid_year]
    if df.shape[0] < min_df_shape:
        continue
    df.index = pd.to_timedelta(df['DOY'], unit='D') + pd.to_datetime(df['Year'], format='%Y')
    x, y = crs_laea.transform_points(crs_pc, df.Lon.values, df.Lat.values)[:, :2].T
    buoy = pd.DataFrame({
            'x': x,
            'y': y,
        },
        index=df.index)
    buoy = buoy.sort_index().rolling('1D').mean().resample('1D').mean().iloc[1:-1].dropna()
    buoys.append(buoy)
    used_files.append(ifile)

print(len(buoys), 'IABP buoys read successfully.')    

In [None]:
nc_dir = 'Arctic_buoys'
nc_files = sorted(glob.glob(f'{nc_dir}/*nc'))
print(f'Found {len(nc_files)} trajectory files.')

In [None]:
for nc_file in tqdm(nc_files):
    nc_id = nc_file.split('/')[-1].split('.')[0].replace('iabp_', '')
    if nc_id in iabp_ids:
        continue
    with xr.open_dataset(nc_file) as ds:
        df = ds[['lat', 'lon']].to_dataframe().dropna()
    df = df[df.lat > 50]
    if df.shape[0] < 1:
        continue

    x, y = crs_laea.transform_points(crs_pc, df.lon.values, df.lat.values)[:, :2].T
    buoy = pd.DataFrame({
            'x': x,
            'y': y,
        },
        index=df.index)
    buoy = buoy.sort_index().rolling('1D').mean().resample('1D').mean().iloc[1:-1].dropna()
    buoys.append(buoy)

print(len(buoys), 'Total buoys read successfully.')

In [None]:
max_dtime = 90
max_drift = 500
max_speed = 100
n_iters = 5

dtimes = []
drifts = []
speeds = []
clean_buoys = []
for abuoy in tqdm(buoys):
    buoy = abuoy.copy()
    for i in range(n_iters):
        dtime = (buoy.index.to_series().diff().dt.total_seconds()) / (24*60*60) # in days
        drift = np.hypot(buoy.x.diff(), buoy.y.diff()) / 1000  # in km
        speed = drift / dtime
        buoy = buoy[speed < max_speed]
    dtime = (buoy.index.to_series().diff().dt.total_seconds()) / (24*60*60) # in days
    drift = np.hypot(buoy.x.diff(), buoy.y.diff()) / 1000  # in km
    speed = drift / dtime
    dtimes.append(dtime)
    drifts.append(drift)
    speeds.append(speed)
    
    gpi = (dtime < max_dtime) & (drift < max_drift) & (speed < max_speed)
    labels, num_labels = label(gpi)
    clean_buoys.extend([buoy[labels == label_u] for label_u in np.unique(labels[labels > 0])])

print(len(clean_buoys), 'Clean buoys created successfully.')

In [None]:
dtimes = np.hstack(dtimes)
drifts = np.hstack(drifts)
speeds = np.hstack(speeds)
maxage = np.array([(buoy.index[-1] - buoy.index[0]).total_seconds()/ (24*60*60) for buoy in clean_buoys])

gpi = (dtimes < max_dtime) & (drifts < max_drift) & (speeds < max_speed)

fig, axs = plt.subplots(1, 4, figsize=(15, 3))
axs[0].hist(dtimes[gpi], bins=50, range=[0, max_dtime])
axs[1].hist(drifts[gpi], bins=50, range=[0, max_drift])
axs[2].hist(speeds[gpi], bins=50, range=[0, max_speed])
axs[3].hist(maxage/365, bins=50, range=[0.5, 6])
for ax in axs:
    ax.set_yscale('log')
plt.show()

In [None]:
buoys = []
for i, buoy in enumerate(clean_buoys):
    buoy['age'] = (buoy.index - buoy.index[0]).total_seconds()/ (24*60*60)
    buoy['id'] = i
    buoys.append(buoy)
buoys = pd.concat(buoys)    

In [None]:
plt.figure(figsize=(20,5))
plt.scatter(buoys.index, buoys.age/365, c=np.hypot(buoys.x, buoys.y), cmap='jet', s=1, alpha=0.1)
plt.colorbar()

In [None]:
buoys.to_feather('data/buoys.feather')