In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle


In [2]:
import netCDF4 as nc
from scipy.interpolate import griddata
import netCDF4 as nc
from scipy.interpolate import RegularGridInterpolator
import time

# Field Data
fname = f'/srv/scratch/z3533156/26year_BRAN2020/outer_avg_01461.nc'

dataset = nc.Dataset(fname)

lon_rho = np.transpose(dataset.variables['lon_rho'], axes=(1, 0))
lat_rho = np.transpose(dataset.variables['lat_rho'], axes=(1, 0))
mask_rho = np.transpose(dataset.variables['mask_rho'], axes=(1, 0))
h =  np.transpose(dataset.variables['h'], axes=(1, 0))
angle = dataset.variables['angle'][0, 0]
z_r = np.load('/srv/scratch/z5297792/z_r.npy')
z_r = np.transpose(z_r, (1, 2, 0))[150, 150, :]

def distance(lat1, lon1, lat2, lon2):
    EARTH_RADIUS = 6357
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat, dlon = lat2 - lat1, lon2 - lon1
    a = np.sin(dlat/2)**2 + np.cos(lat1)*np.cos(lat2)*np.sin(dlon/2)**2
    return EARTH_RADIUS * 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))

j_mid = lon_rho.shape[1] // 2
i_mid = lon_rho.shape[0] // 2

dx = distance(lat_rho[:-1, j_mid], lon_rho[:-1, j_mid],
              lat_rho[1:, j_mid], lon_rho[1:, j_mid])
dy = distance(lat_rho[i_mid, :-1], lon_rho[i_mid, :-1],
              lat_rho[i_mid, 1:], lon_rho[i_mid, 1:])

x_grid = np.insert(np.cumsum(dx), 0, 0)
y_grid = np.insert(np.cumsum(dy), 0, 0)
X_grid, Y_grid = np.meshgrid(x_grid, y_grid, indexing='ij')


In [3]:
df_eddies_true = pd.read_pickle(f"/srv/scratch/z5297792/Chapter2/SEACOFS_26yr_Eddy_Dataset/df_eddies_1462_10650.pkl")
df_eddies_true


Unnamed: 0,Eddy_idx,Day,Cyc,nLon,nLat,nxc,nyc,nic,njc,xc,yc,w,Q11,Q12,Q22,Rc,psi0,Eddy,next_num
0,0,1462,AE,160.569072,-28.075269,830.0,1515.0,830,1515,830.505283,1516.358103,0.000017,0.002904,-0.000396,0.005650,24.781075,-36.259933,0,96650
1,1,1462,AE,155.725082,-27.535036,358.0,1408.0,358,1408,357.830407,1407.502025,0.000031,0.009018,-0.002230,0.006642,49.001445,-48.187084,1,96650
2,2,1462,CE,161.070068,-29.736625,928.0,1356.0,928,1356,928.354369,1356.372262,-0.000011,-0.001907,0.001393,-0.003376,73.000954,169.571400,2,96650
3,3,1462,CE,156.973832,-28.452969,506.0,1354.0,506,1354,505.897600,1353.012251,-0.000033,-0.008769,0.001546,-0.007521,69.500036,70.832145,3,96650
4,4,1462,AE,159.170505,-29.807511,754.0,1285.0,754,1285,753.364302,1284.532290,0.000022,0.006509,-0.001848,0.004420,64.751400,-23.291117,4,96650
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
412605,22,10650,AE,157.993708,-38.734350,912.0,300.0,912,300,911.755090,299.627733,0.000009,0.003026,-0.000616,0.001248,39.001497,-10.352792,96604,96650
412606,23,10650,CE,157.092189,-39.604423,860.0,180.0,860,180,860.284107,180.170175,-0.000008,-0.000905,-0.000273,-0.002966,59.000500,10.728906,96577,96650
412607,24,10650,AE,151.460338,-38.218269,349.0,158.0,349,158,350.853554,156.210164,0.000033,0.010661,0.001276,0.005981,38.006653,-17.427862,96628,96650
412609,26,10650,AE,156.242892,-40.165033,805.0,95.0,805,95,804.758015,95.239729,0.000011,0.003095,0.001063,0.002337,44.750654,-15.333051,96572,96650


In [4]:
# Clean
df_eddies = df_eddies_true[['Eddy', 'Day', 'Cyc', 'xc', 'yc', 'w', 'Q11', 'Q12', 'Q22', 'Rc', 'psi0']].copy().sort_values(by=['Eddy', 'Day'])
df_eddies = df_eddies.groupby('Eddy').filter(lambda g: g['Day'].max() - g['Day'].min() >= 21) # Keep only mesoscale - older than 3 weeks
df_eddies['Eddy'] = df_eddies['Eddy'].rank(method='dense').astype(int) # Relabel the Eddy's 1,2,3..

# Find Lon and Lat values
xg, yg = x_grid, y_grid
lon_interp = RegularGridInterpolator((yg, xg), lon_rho.T, bounds_error=False, fill_value=np.nan)
lat_interp = RegularGridInterpolator((yg, xg), lat_rho.T, bounds_error=False, fill_value=np.nan)
points = np.column_stack((df_eddies['yc'].to_numpy(), df_eddies['xc'].to_numpy()))
df_eddies['Lon'] = lon_interp(points)
df_eddies['Lat'] = lat_interp(points)
# Re order columns
cols = df_eddies.columns.tolist()
i = cols.index('Cyc') + 1
base = [c for c in cols if c not in ('Lon','Lat')]
new_order = base[:i] + ['Lon','Lat'] + base[i:]
df_eddies = df_eddies[new_order].reset_index(drop=True)
df_eddies


Unnamed: 0,Eddy,Day,Cyc,Lon,Lat,xc,yc,w,Q11,Q12,Q22,Rc,psi0
0,1,1462,CE,156.969773,-28.461100,505.897600,1353.012251,-0.000033,-0.008769,0.001546,-0.007521,69.500036,70.832145
1,1,1463,CE,156.993096,-28.433992,507.149717,1356.633687,-0.000030,-0.007934,0.000890,-0.007072,52.250752,156.388574
2,1,1464,CE,157.097940,-28.381485,515.130000,1365.650728,-0.000028,-0.006920,0.001050,-0.006852,67.000485,56.418837
3,1,1465,CE,157.094334,-28.315658,512.570772,1372.426342,-0.000033,-0.006959,0.002144,-0.009312,52.001786,31.355771
4,1,1466,CE,157.106324,-28.346479,514.728351,1369.599726,-0.000029,-0.005883,0.001179,-0.008429,56.251116,29.524244
...,...,...,...,...,...,...,...,...,...,...,...,...,...
100973,2975,10646,AE,157.825620,-37.030656,850.502045,476.447416,0.000013,0.002921,0.000642,0.003759,72.751791,-76.302293
100974,2975,10647,AE,157.880796,-36.972725,853.608024,484.316677,0.000014,0.002819,0.000983,0.003948,74.250924,-47.064730
100975,2975,10648,AE,157.868516,-36.874363,849.759258,494.400614,0.000014,0.002968,0.000566,0.003910,75.750780,-79.414689
100976,2975,10649,AE,157.753058,-36.882303,840.037603,489.981707,0.000013,0.002804,0.000722,0.003863,76.000006,-92.084211


In [5]:
# Fill in missing eddy data
def interpolate_eddy_tracks(df):
    df = df.copy()
    df_interp = []

    for eddy_id, group in df.groupby('Eddy'):
        # Build a full range of days for this eddy
        full_days = pd.DataFrame({'Day': range(group['Day'].min(), group['Day'].max() + 1)})
        full_days['Eddy'] = eddy_id

        # Merge to reindex with all days, keeping columns of interest
        merged = pd.merge(full_days, group, on=['Eddy', 'Day'], how='left')

        # Interpolate numerical columns
        merged['Lon'] = merged['Lon'].interpolate()
        merged['Lat'] = merged['Lat'].interpolate()
        merged['xc'] = merged['xc'].interpolate()
        merged['yc'] = merged['yc'].interpolate()

        merged['w'] = merged['w'].interpolate()
        merged['Q11'] = merged['Q11'].interpolate()
        merged['Q12'] = merged['Q12'].interpolate()
        merged['Q22'] = merged['Q22'].interpolate()

        merged['Rc'] = merged['Rc'].interpolate()
        merged['psi0'] = merged['psi0'].interpolate()

        # Fill categorical column
        merged['Cyc'] = merged['Cyc'].ffill().bfill()

        df_interp.append(merged)

    df_result = pd.concat(df_interp, ignore_index=True).sort_values(by=['Eddy', 'Day'])

    return df_result

df_eddies = interpolate_eddy_tracks(df_eddies)
df_eddies['Age'] = df_eddies.groupby('Eddy')['Eddy'].transform('count')

df_eddies['Date'] = pd.Timestamp('1990-01-01') + pd.to_timedelta(df_eddies.Day, unit='D')

fnames = [
    f"/srv/scratch/z3533156/26year_BRAN2020/outer_avg_{1461 + ((day - 1462) // 30) * 30:05}.nc"
    for day in df_eddies['Day']
]
df_eddies['fname'] = fnames

# Find closest grid points
from scipy.spatial import cKDTree
tree = cKDTree(np.column_stack((X_grid.ravel(), Y_grid.ravel())))
xcs = df_eddies['xc'].to_numpy()
ycs = df_eddies['yc'].to_numpy()
valid = np.isfinite(xcs) & np.isfinite(ycs)

idx = np.full(xcs.shape, -1, dtype=int)
if valid.any():
    _, ind = tree.query(np.column_stack((xcs[valid], ycs[valid])))
    ii, jj = np.unravel_index(ind, X_grid.shape)
    idx[valid] = ind  # keep for optional debugging

    ics = np.full(xcs.shape, np.nan)
    jcs = np.full(ycs.shape, np.nan)
    ics[valid] = ii
    jcs[valid] = jj
else:
    ics = np.full(xcs.shape, np.nan)
    jcs = np.full(ycs.shape, np.nan)
# Insert after 'Lat' if present, else at the end
pos = df_eddies.columns.get_indexer_for(['Lat'])
insert_at = (pos[0] + 1) if len(pos) else len(df_eddies.columns)
df_eddies.insert(insert_at, 'ic', pd.Series(ics, index=df_eddies.index).astype('Int64'))
df_eddies.insert(insert_at + 1, 'jc', pd.Series(jcs, index=df_eddies.index).astype('Int64'))
df_eddies = df_eddies[['Eddy', 'Day'] + [col for col in df_eddies.columns if col not in ['Eddy', 'Day']]]
df_eddies


Unnamed: 0,Eddy,Day,Cyc,Lon,Lat,ic,jc,xc,yc,w,Q11,Q12,Q22,Rc,psi0,Age,Date,fname
0,1,1462,CE,156.969773,-28.461100,179,274,505.897600,1353.012251,-0.000033,-0.008769,0.001546,-0.007521,69.500036,70.832145,57,1994-01-02,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
1,1,1463,CE,156.993096,-28.433992,179,275,507.149717,1356.633687,-0.000030,-0.007934,0.000890,-0.007072,52.250752,156.388574,57,1994-01-03,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
2,1,1464,CE,157.097940,-28.381485,181,276,515.130000,1365.650728,-0.000028,-0.006920,0.001050,-0.006852,67.000485,56.418837,57,1994-01-04,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
3,1,1465,CE,157.094334,-28.315658,181,278,512.570772,1372.426342,-0.000033,-0.006959,0.002144,-0.009312,52.001786,31.355771,57,1994-01-05,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
4,1,1466,CE,157.106324,-28.346479,181,277,514.728351,1369.599726,-0.000029,-0.005883,0.001179,-0.008429,56.251116,29.524244,57,1994-01-06,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
117257,2975,10646,AE,157.825620,-37.030656,248,97,850.502045,476.447416,0.000013,0.002921,0.000642,0.003759,72.751791,-76.302293,24,2019-02-24,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
117258,2975,10647,AE,157.880796,-36.972725,249,98,853.608024,484.316677,0.000014,0.002819,0.000983,0.003948,74.250924,-47.064730,24,2019-02-25,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
117259,2975,10648,AE,157.868516,-36.874363,248,100,849.759258,494.400614,0.000014,0.002968,0.000566,0.003910,75.750780,-79.414689,24,2019-02-26,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
117260,2975,10649,AE,157.753058,-36.882303,246,99,840.037603,489.981707,0.000013,0.002804,0.000722,0.003863,76.000006,-92.084211,24,2019-02-27,/srv/scratch/z3533156/26year_BRAN2020/outer_av...


In [6]:
small_eddies = []
for eddy in df_eddies.Eddy.unique():
    df = df_eddies[df_eddies.Eddy==eddy]
    if df.Rc.mean() <= 15:
        small_eddies.append(eddy)
small_eddies


[np.int64(65),
 np.int64(76),
 np.int64(78),
 np.int64(81),
 np.int64(86),
 np.int64(87),
 np.int64(88),
 np.int64(90),
 np.int64(91),
 np.int64(94),
 np.int64(108),
 np.int64(204),
 np.int64(207),
 np.int64(209),
 np.int64(286),
 np.int64(671),
 np.int64(768),
 np.int64(786),
 np.int64(788),
 np.int64(794),
 np.int64(797),
 np.int64(801),
 np.int64(923),
 np.int64(927),
 np.int64(1008),
 np.int64(1032),
 np.int64(1113),
 np.int64(1128),
 np.int64(1134),
 np.int64(1159),
 np.int64(1249),
 np.int64(1250),
 np.int64(1374),
 np.int64(1381),
 np.int64(1395),
 np.int64(1476),
 np.int64(1477),
 np.int64(1508),
 np.int64(1515),
 np.int64(1754),
 np.int64(2198),
 np.int64(2292),
 np.int64(2301),
 np.int64(2406),
 np.int64(2436),
 np.int64(2910),
 np.int64(2926),
 np.int64(2934)]

In [7]:
df_eddies = df_eddies.groupby('Eddy').filter(lambda g: g['Rc'].mean() > 15).reset_index(drop=True) # Keep only mesoscale - larger than 30km scale
df_eddies['Eddy'] = df_eddies['Eddy'].rank(method='dense').astype(int) # Relabel the Eddy's 1,2,3..


In [8]:
df_eddies.to_pickle(f'/srv/scratch/z5297792/Chapter2/SEACOFS_26yr_Eddy_Dataset/df_eddies_processed_{df_eddies.Day.min()}_{df_eddies.Day.max()}.pkl')


#### Extra code to update old data to take into account small eddy removal

In [9]:
break

SyntaxError: 'break' outside loop (668683560.py, line 1)

In [None]:
# Updated list of removed eddies from user
removed_eddies_updated = small_eddies
# Original eddy IDs
original_eddies = list(range(1, 2976))
# Remove updated list
remaining_eddies_updated = [e for e in original_eddies if e not in removed_eddies_updated]
# Mapping old → new
eddy_mapping_updated = {old: new for new, old in enumerate(remaining_eddies_updated, start=1)}
# eddy_mapping_updated


In [None]:
with open('/srv/scratch/z5297792/Chapter2/SEACOFS_26yr_Eddy_Dataset/Vertical_Eddy_Dataset/including_small_eddies/dic_vert_eddies_including_meanR_le_15.pkl', 'rb') as f:
    eddies_dict = pickle.load(f)

# Remove unwanted eddies
for rm in removed_eddies_updated:
    eddies_dict.pop(f"Eddy{rm}", None)

# Rename keys based on mapping
eddies_dict = {f"Eddy{eddy_mapping_updated[int(k[4:])]}": v 
               for k, v in eddies_dict.items()}

with open('/srv/scratch/z5297792/Chapter2/SEACOFS_26yr_Eddy_Dataset/Vertical_Eddy_Dataset/dic_vert_eddies.pkl', "wb") as f:
    pickle.dump(eddies_dict, f)

