In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle


In [None]:
import netCDF4 as nc
from scipy.interpolate import griddata
import netCDF4 as nc
from scipy.interpolate import RegularGridInterpolator
import time

# Field Data

fname = f'/srv/scratch/z3533156/26year_BRAN2020/outer_avg_01461.nc'

dataset = nc.Dataset(fname)

lon_rho = np.transpose(dataset.variables['lon_rho'], axes=(1, 0))
lat_rho = np.transpose(dataset.variables['lat_rho'], axes=(1, 0))
mask_rho = np.transpose(dataset.variables['mask_rho'], axes=(1, 0))
h =  np.transpose(dataset.variables['h'], axes=(1, 0))
angle = dataset.variables['angle'][0, 0]
z_r = np.load('/srv/scratch/z5297792/z_r.npy')
z_r = np.transpose(z_r, (1, 2, 0))[150, 150, :]

def distance(lat1, lon1, lat2, lon2):
    EARTH_RADIUS = 6357
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat, dlon = lat2 - lat1, lon2 - lon1
    a = np.sin(dlat/2)**2 + np.cos(lat1)*np.cos(lat2)*np.sin(dlon/2)**2
    return EARTH_RADIUS * 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))

j_mid = lon_rho.shape[1] // 2
i_mid = lon_rho.shape[0] // 2

dx = distance(lat_rho[:-1, j_mid], lon_rho[:-1, j_mid],
              lat_rho[1:, j_mid], lon_rho[1:, j_mid])
dy = distance(lat_rho[i_mid, :-1], lon_rho[i_mid, :-1],
              lat_rho[i_mid, 1:], lon_rho[i_mid, 1:])

x_grid = np.insert(np.cumsum(dx), 0, 0)
y_grid = np.insert(np.cumsum(dy), 0, 0)
X_grid, Y_grid = np.meshgrid(x_grid, y_grid, indexing='ij')

res = 1  # 1 km resolution
x_new = np.arange(0, x_grid[-1], res)
y_new = np.arange(0, y_grid[-1], res)
X_new, Y_new = np.meshgrid(x_new, y_new, indexing='ij')
new_points = np.column_stack((X_new.ravel(), Y_new.ravel()))

interp_lon = RegularGridInterpolator((x_grid, y_grid), lon_rho,
                                     method='linear', bounds_error=False, fill_value=np.nan)
interp_lat = RegularGridInterpolator((x_grid, y_grid), lat_rho,
                                     method='linear', bounds_error=False, fill_value=np.nan)

lon_new = interp_lon(new_points).reshape(len(x_new), len(y_new))
lat_new = interp_lat(new_points).reshape(len(x_new), len(y_new))


In [None]:
df_eddies_true = pd.read_pickle("/srv/scratch/z5297792/Chapter2/df_eddies_1462_10650.pkl")
df_eddies_true


In [None]:
# Refine eddy data to mesoscale
df_eddies = df_eddies_true.sort_values(by=['ID', 'Day']).drop('Eddy', axis=1).drop('next_num', axis=1).copy()
df_eddies = df_eddies[df_eddies.groupby('ID')['ID'].transform('count') >= 21] # 3 Weeks
df_eddies['ID'] = df_eddies['ID'].rank(method='dense').astype(int)
df_eddies = df_eddies[['ID'] + [col for col in df_eddies.columns if col != 'ID']]
# Find Lon and Lat values
xg, yg = X_new.T[0, :], Y_new.T[:, 0] 
lon_interp = RegularGridInterpolator((yg, xg), lon_new.T, bounds_error=False, fill_value=np.nan)
lat_interp = RegularGridInterpolator((yg, xg), lat_new.T, bounds_error=False, fill_value=np.nan)
points = np.column_stack((df_eddies['y0'], df_eddies['x0']))
lon_vals, lat_vals = lon_interp(points), lat_interp(points)
df_eddies['Lon'], df_eddies['Lat'] = lon_vals, lat_vals
cols = list(df_eddies.columns)
njc_idx = cols.index('njc')
new_order = cols[:njc_idx + 1] + ['Lon', 'Lat'] + [col for col in cols if col not in ['Lon', 'Lat'] and col not in cols[:njc_idx + 1]]
df_eddies = df_eddies[new_order]
df_eddies = df_eddies.reset_index(drop=True)

df_eddies = df_eddies.drop(columns=[col for col in df_eddies.columns if col.startswith('n')]).copy() # Drop Nencioli data
df_eddies


In [None]:
# Fill in missing eddy data

def interpolate_eddy_tracks(df):
    df = df.copy()
    df_interp = []

    for eddy_id, group in df.groupby('ID'):
        # Build a full range of days for this eddy
        full_days = pd.DataFrame({'Day': range(group['Day'].min(), group['Day'].max() + 1)})
        full_days['ID'] = eddy_id

        # Merge to reindex with all days, keeping columns of interest
        merged = pd.merge(full_days, group, on=['ID', 'Day'], how='left')

        # Interpolate numerical columns
        merged['Lon'] = merged['Lon'].interpolate()
        merged['Lat'] = merged['Lat'].interpolate()
        merged['x0'] = merged['x0'].interpolate()
        merged['y0'] = merged['y0'].interpolate()

        # Fill categorical column
        merged['Cyc'] = merged['Cyc'].ffill().bfill()

        df_interp.append(merged)

    df_result = pd.concat(df_interp, ignore_index=True).sort_values(by=['ID', 'Day'])

    return df_result

df_eddies = interpolate_eddy_tracks(df_eddies)
df_eddies['Age'] = df_eddies.groupby('ID')['ID'].transform('count')
fnames = [
    f"/srv/scratch/z3533156/26year_BRAN2020/outer_avg_{1461 + ((day - 1462) // 30) * 30:05}.nc"
    for day in df_eddies['Day']
]
df_eddies['fname'] = fnames
# Find closest grid points
from scipy.spatial import cKDTree
points = np.column_stack((X_grid.ravel(), Y_grid.ravel()))
tree = cKDTree(points)
x0s = df_eddies['x0'].to_numpy()
y0s = df_eddies['y0'].to_numpy()
query_points = np.column_stack((x0s, y0s))
_, indices = tree.query(query_points)
ics, jcs = np.unravel_index(indices, X_grid.shape)
ics = ics.astype(int)
jcs = jcs.astype(int)
insert_at = df_eddies.columns.get_loc('Lat') + 1
df_eddies.insert(insert_at, 'ic', ics)
df_eddies.insert(insert_at + 1, 'jc', jcs)
df_eddies


In [None]:
df_eddies.to_pickle('/srv/scratch/z5297792/Clim_data/df_eddies_processed.pkl')
