In [None]:
import os
import glob

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import xarray as xa
from cartopy.crs import PlateCarree, LambertAzimuthalEqualArea

In [None]:
laea = LambertAzimuthalEqualArea(central_latitude=90, central_longitude=0)
pc = PlateCarree()
buoys = pd.read_feather('data/segments.feather').dropna()
idir = '../NERSC_arctic25km_sea_ice_age_v2p1/nc'

In [None]:
for year in range(2024, 2002, -1):
    ofile = f'data/buoys_sia_{year}.feather'
    if os.path.exists(ofile):
        print(f'{ofile} exists, skipping')
        continue

    print(f'Processing {year}')
    sia_files = sorted(glob.glob(f'{idir}/{year}/arctic25km_sea_ice_age_v2p0*.nc'))
    print(len(sia_files))

    with xa.open_dataset(sia_files[0]) as ds:
        ds_x = ds.x.values
        ds_y = ds.y.values

    concs = []
    with xa.open_mfdataset(sia_files, combine='by_coords') as ds:
        ds_time = ds.time.values
        for var_name in sorted(ds.variables, reverse=True):
            if 'conc_' in var_name:
                concs.append(ds.variables[var_name].values)
        ds_sia = ds.sia.values
    concs = np.array(concs)
    conc_cumsum = concs.cumsum(axis=0)[::-1]

    buoys_sub = buoys[(buoys.index >= ds_time[0]) & (buoys.index <= ds_time[-1])].copy()
    print(buoys_sub.shape)

    time_indices = np.array([np.argmin(np.abs(ds_time - np.datetime64(date))).item() for date in buoys_sub.index])
    x, y = laea.transform_points(pc, buoys_sub.Lon.values, buoys_sub.Lat.values).T[:2]
    cols = np.round((x - ds_x[0]) / (ds_x[-1] - ds_x[0]) * ds_x.size).astype(int)
    rows = np.round((y - ds_y[0]) / (ds_y[-1] - ds_y[0]) * ds_y.size).astype(int)
    rows = np.clip(rows, 0, ds_y.size - 1)
    cols = np.clip(cols, 0, ds_x.size - 1)
    buoy_sia = ds_sia[time_indices, rows, cols]
    buoy_conc_cumsum = conc_cumsum[:, time_indices, rows, cols]

    buoys_sub['sia'] = buoy_sia
    for i in range(buoy_conc_cumsum.shape[0]):
        buoys_sub[f'conc{i}'] = buoy_conc_cumsum[i]
    buoys_sub.dropna().to_feather(ofile)

In [None]:
buoys_sia_files = sorted(glob.glob('data/buoys_sia_*.feather'))
buoys_sia = pd.concat([pd.read_feather(f) for f in buoys_sia_files])
conc_columns = [f'conc{i}' for i in range(7)]
conc_array = buoys_sia[conc_columns].values


In [None]:
buoys_max_age = np.argmin(conc_array > 0.15, axis=1) - 1
correct_ratio_weighted_sia = (buoys_sia.sia >= buoys_sia.Age/365).sum() / buoys_sia.shape[0]
correct_ratio_max_sia = ((1 + buoys_max_age) >= buoys_sia.Age/365).sum() / buoys_sia.shape[0]

max_age = 5
n_bins = 30
bins = np.linspace(0, max_age, n_bins)
bins_i = np.zeros(bins.size*2)
bins_i[0::2] = bins
bins_i[1::2] = bins
bins_y = bins_i[2:]
bins_x = bins_i[1:-1]


In [None]:
age_underestimation = np.where(buoys_sia.sia < buoys_sia.Age/365, buoys_sia.Age/365 - buoys_sia.sia, 0)

In [None]:
year = 2020
sia_files = sorted(glob.glob(f'{idir}/{year}/arctic25km_sea_ice_age_v2p1_*.nc'))
print(len(sia_files))

with xa.open_dataset(sia_files[0]) as ds:
    ds_x = ds.x.values
    ds_y = ds.y.values
    ds_status_flag = ds.status_flag.values
land = np.where(ds_status_flag[0] == 1, 1, np.nan)

x, y = laea.transform_points(pc, buoys_sia.Lon.values, buoys_sia.Lat.values).T[:2]
cols = np.round((x - ds_x[0]) / (ds_x[-1] - ds_x[0]) * ds_x.size).astype(int)
rows = np.round((y - ds_y[0]) / (ds_y[-1] - ds_y[0]) * ds_y.size).astype(int)


In [None]:
overest_sum = np.zeros((ds_y.size, ds_x.size))
overest_cnt = np.zeros((ds_y.size, ds_x.size))
np.add.at(overest_sum, (rows, cols), age_underestimation)
np.add.at(overest_cnt, (rows, cols), 1)
overest_avg = overest_sum / overest_cnt
overest_avg = overest_sum / overest_cnt
overest_avg[overest_cnt == 0] = 0

In [None]:
fig, axs = plt.subplots(1, 3, figsize=(15, 5))
h0 = axs[0].hist2d(buoys_sia.sia, buoys_sia.Age/365, [n_bins, n_bins], [[0,max_age],[0,max_age]], cmin=1, vmax=10000)[3]
plt.colorbar(h0, ax=axs[0], shrink=0.5)
axs[0].plot(bins_x, bins_y, 'r-')

axs[0].set_title(f'Correct: {correct_ratio_weighted_sia * 100:.1f} %')

h1 = axs[1].hist2d(buoys_max_age, buoys_sia.Age/365, [6, 24], [[0,6],[0,6]], cmin=1, vmax=20000)[3]
axs[1].plot([0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6], [1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6], 'r--')
plt.colorbar(h1, ax=axs[1], shrink=0.5)
axs[1].set_title(f'Correct: {correct_ratio_max_sia * 100:.1f} %')

for ax in axs:
    ax.set_aspect('equal')
axs[0].set_xlabel('LM-SIAge average age, years')
axs[1].set_xlabel('LM-SIAge max age, years')
axs[0].set_ylabel('IABP buoy age, years')


imsh0 = axs[2].imshow(overest_avg, interpolation='nearest', clim=[0, 2])
axs[2].imshow(land, interpolation='nearest', clim=[0, 2], cmap='gray')
plt.colorbar(imsh0, ax=axs[2], shrink=0.5)
axs[2].set_xlim([100, 300])
axs[2].set_ylim([330, 100])
axs[2].set_axis_off()
axs[2].set_title('Age underestimation, years')

plt.tight_layout()
plt.savefig('../figures/figure15_validation.png', dpi=100, bbox_inches='tight', pad_inches=0.1)
plt.show()