In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle


In [2]:
import netCDF4 as nc
from scipy.interpolate import griddata
import netCDF4 as nc
from scipy.interpolate import RegularGridInterpolator
import time

# Field Data
fname = f'/srv/scratch/z3533156/26year_BRAN2020/outer_avg_01461.nc'

dataset = nc.Dataset(fname)

lon_rho = np.transpose(dataset.variables['lon_rho'], axes=(1, 0))
lat_rho = np.transpose(dataset.variables['lat_rho'], axes=(1, 0))
mask_rho = np.transpose(dataset.variables['mask_rho'], axes=(1, 0))
h =  np.transpose(dataset.variables['h'], axes=(1, 0))
angle = dataset.variables['angle'][0, 0]
z_r = np.load('/srv/scratch/z5297792/z_r.npy')
z_r = np.transpose(z_r, (1, 2, 0))[150, 150, :]

def distance(lat1, lon1, lat2, lon2):
    EARTH_RADIUS = 6357
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat, dlon = lat2 - lat1, lon2 - lon1
    a = np.sin(dlat/2)**2 + np.cos(lat1)*np.cos(lat2)*np.sin(dlon/2)**2
    return EARTH_RADIUS * 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))

j_mid = lon_rho.shape[1] // 2
i_mid = lon_rho.shape[0] // 2

dx = distance(lat_rho[:-1, j_mid], lon_rho[:-1, j_mid],
              lat_rho[1:, j_mid], lon_rho[1:, j_mid])
dy = distance(lat_rho[i_mid, :-1], lon_rho[i_mid, :-1],
              lat_rho[i_mid, 1:], lon_rho[i_mid, 1:])

x_grid = np.insert(np.cumsum(dx), 0, 0)
y_grid = np.insert(np.cumsum(dy), 0, 0)
X_grid, Y_grid = np.meshgrid(x_grid, y_grid, indexing='ij')


In [3]:
df_eddies_true = pd.read_pickle(f"/srv/scratch/z5297792/Chapter2/SEACOFS_26yr_Eddy_Dataset/Eddy_Data/df_eddies_1462_6994.pkl")
df_eddies_true


Unnamed: 0,Eddy_idx,Day,Cyc,nLon,nLat,nxc,nyc,nic,njc,xc,...,w,q11,q12,q22,Rc,psi0,A,R,Eddy,next_num
0,0,1462,AE,160.569072,-28.075269,830.0,1515.0,830,1515,830.505283,...,0.000017,0.720417,-0.098272,1.401490,98.027033,-40.680969,0.004234,24.781075,0,66683
1,1,1462,AE,155.725082,-27.535036,358.0,1408.0,358,1408,357.830407,...,0.000031,1.216814,-0.300862,0.896207,82.818108,-45.872592,0.006688,49.001445,1,66683
2,2,1462,CE,161.070068,-29.736625,928.0,1356.0,928,1356,928.354369,...,-0.000011,0.899317,-0.656994,1.591920,102.471873,40.739860,-0.003880,73.000954,2,66683
3,3,1462,CE,156.973832,-28.452969,506.0,1354.0,506,1354,505.897600,...,-0.000033,1.099922,-0.193920,0.943344,104.792967,71.469206,-0.006508,69.500036,3,66683
4,4,1462,AE,159.170505,-29.807511,754.0,1285.0,754,1285,753.364302,...,0.000022,1.292653,-0.367089,0.877850,101.823569,-43.652129,0.004210,64.751400,4,66683
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
250780,33,6994,AE,150.609371,-37.117982,240.0,247.0,240,247,239.955803,...,0.000027,0.504426,0.249467,2.105826,25.283672,-5.048287,0.007897,20.503427,66625,66683
250781,34,6994,AE,157.263745,-39.080598,860.0,241.0,860,241,859.799709,...,0.000009,0.934116,0.134339,1.089851,51.127649,-5.051486,0.001932,61.001612,66681,66683
250782,35,6994,CE,151.568833,-37.626142,338.0,223.0,338,223,338.236030,...,-0.000029,0.665666,-0.414943,1.760910,52.327831,20.634722,-0.007536,37.750457,66682,66683
250783,36,6994,AE,151.007779,-38.616745,325.0,103.0,325,103,325.263518,...,0.000040,1.163492,-0.517956,1.090062,62.568744,-36.106441,0.009223,42.000616,65229,66683


In [4]:
# Clean
df_eddies = df_eddies_true[['Eddy', 'Day', 'Cyc', 'xc', 'yc', 'w', 'q11', 'q12', 'q22', 'Rc', 'psi0', 'A', 'R']].copy().sort_values(by=['Eddy', 'Day'])
df_eddies = df_eddies.groupby('Eddy').filter(lambda g: g['Day'].max() - g['Day'].min() >= 21) # Keep only mesoscale - older than 3 weeks
df_eddies['Eddy'] = df_eddies['Eddy'].rank(method='dense').astype(int) # Relabel the Eddy's 1,2,3..

# Find Lon and Lat values
xg, yg = x_grid, y_grid
lon_interp = RegularGridInterpolator((yg, xg), lon_rho.T, bounds_error=False, fill_value=np.nan)
lat_interp = RegularGridInterpolator((yg, xg), lat_rho.T, bounds_error=False, fill_value=np.nan)
points = np.column_stack((df_eddies['yc'].to_numpy(), df_eddies['xc'].to_numpy()))
df_eddies['Lon'] = lon_interp(points)
df_eddies['Lat'] = lat_interp(points)
# Re order columns
cols = df_eddies.columns.tolist()
i = cols.index('Cyc') + 1
base = [c for c in cols if c not in ('Lon','Lat')]
new_order = base[:i] + ['Lon','Lat'] + base[i:]
df_eddies = df_eddies[new_order].reset_index(drop=True)
df_eddies


Unnamed: 0,Eddy,Day,Cyc,Lon,Lat,xc,yc,w,q11,q12,q22,Rc,psi0,A,R
0,1,1462,CE,156.969773,-28.461100,505.897600,1353.012251,-0.000033,1.099922,-0.193920,0.943344,104.792967,71.469206,-0.006508,69.500036
1,1,1463,CE,156.993096,-28.433992,507.149717,1356.633687,-0.000030,1.066764,-0.119714,0.950849,102.305989,70.018058,-0.006690,52.250752
2,1,1464,CE,157.097940,-28.381485,515.130000,1365.650728,-0.000028,1.016800,-0.154298,1.006892,115.245476,75.250400,-0.005666,67.000485
3,1,1465,CE,157.094334,-28.315658,512.570772,1372.426342,-0.000033,0.896874,-0.276325,1.200119,90.907044,55.585304,-0.006726,52.001786
4,1,1466,CE,157.106324,-28.346479,514.728351,1369.599726,-0.000029,0.847358,-0.169788,1.214160,100.425496,58.705551,-0.005821,56.251116
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61643,1824,6989,CE,154.046048,-37.890174,554.565159,269.997103,-0.000007,1.213967,0.044797,0.825399,73.585707,15.235247,-0.002814,53.000908
61644,1824,6991,CE,154.121845,-37.531688,549.670958,309.999231,-0.000008,3.146780,0.016008,0.317867,56.081580,5.657382,-0.001799,42.013350
61645,1824,6992,CE,154.003467,-37.645734,543.275124,294.416933,-0.000006,1.417053,-0.146573,0.720851,72.785024,11.906976,-0.002248,41.251686
61646,1824,6993,CE,153.701377,-37.869010,524.903624,261.812814,-0.000011,0.915386,0.170795,1.124303,79.974749,14.033126,-0.002194,43.250303


In [5]:
# Fill in missing eddy data
def interpolate_eddy_tracks(df):
    df = df.copy()
    df_interp = []

    for eddy_id, group in df.groupby('Eddy'):
        # Build a full range of days for this eddy
        full_days = pd.DataFrame({'Day': range(group['Day'].min(), group['Day'].max() + 1)})
        full_days['Eddy'] = eddy_id

        # Merge to reindex with all days, keeping columns of interest
        merged = pd.merge(full_days, group, on=['Eddy', 'Day'], how='left')

        # Interpolate numerical columns
        merged['Lon'] = merged['Lon'].interpolate()
        merged['Lat'] = merged['Lat'].interpolate()
        merged['xc'] = merged['xc'].interpolate()
        merged['yc'] = merged['yc'].interpolate()

        merged['w'] = merged['w'].interpolate()
        merged['q11'] = merged['q11'].interpolate()
        merged['q12'] = merged['q12'].interpolate()
        merged['q22'] = merged['q22'].interpolate()

        merged['Rc'] = merged['Rc'].interpolate()
        merged['psi0'] = merged['psi0'].interpolate()
        merged['A'] = merged['A'].interpolate()
        merged['R'] = merged['R'].interpolate()

        # Fill categorical column
        merged['Cyc'] = merged['Cyc'].ffill().bfill()

        df_interp.append(merged)

    df_result = pd.concat(df_interp, ignore_index=True).sort_values(by=['Eddy', 'Day'])

    return df_result

df_eddies = interpolate_eddy_tracks(df_eddies)
df_eddies['Age'] = df_eddies.groupby('Eddy')['Eddy'].transform('count')

df_eddies['Date'] = pd.Timestamp('1990-01-01') + pd.to_timedelta(df_eddies.Day, unit='D')

fnames = [
    f"/srv/scratch/z3533156/26year_BRAN2020/outer_avg_{1461 + ((day - 1462) // 30) * 30:05}.nc"
    for day in df_eddies['Day']
]
df_eddies['fname'] = fnames

# Find closest grid points
from scipy.spatial import cKDTree
tree = cKDTree(np.column_stack((X_grid.ravel(), Y_grid.ravel())))
xcs = df_eddies['xc'].to_numpy()
ycs = df_eddies['yc'].to_numpy()
valid = np.isfinite(xcs) & np.isfinite(ycs)

idx = np.full(xcs.shape, -1, dtype=int)
if valid.any():
    _, ind = tree.query(np.column_stack((xcs[valid], ycs[valid])))
    ii, jj = np.unravel_index(ind, X_grid.shape)
    idx[valid] = ind  # keep for optional debugging

    ics = np.full(xcs.shape, np.nan)
    jcs = np.full(ycs.shape, np.nan)
    ics[valid] = ii
    jcs[valid] = jj
else:
    ics = np.full(xcs.shape, np.nan)
    jcs = np.full(ycs.shape, np.nan)
# Insert after 'Lat' if present, else at the end
pos = df_eddies.columns.get_indexer_for(['Lat'])
insert_at = (pos[0] + 1) if len(pos) else len(df_eddies.columns)
df_eddies.insert(insert_at, 'ic', pd.Series(ics, index=df_eddies.index).astype('Int64'))
df_eddies.insert(insert_at + 1, 'jc', pd.Series(jcs, index=df_eddies.index).astype('Int64'))
df_eddies = df_eddies[['Eddy', 'Day'] + [col for col in df_eddies.columns if col not in ['Eddy', 'Day']]]
df_eddies


Unnamed: 0,Eddy,Day,Cyc,Lon,Lat,ic,jc,xc,yc,w,q11,q12,q22,Rc,psi0,A,R,Age,Date,fname
0,1,1462,CE,156.969773,-28.461100,179,274,505.897600,1353.012251,-0.000033,1.099922,-0.193920,0.943344,104.792967,71.469206,-0.006508,69.500036,57,1994-01-02,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
1,1,1463,CE,156.993096,-28.433992,179,275,507.149717,1356.633687,-0.000030,1.066764,-0.119714,0.950849,102.305989,70.018058,-0.006690,52.250752,57,1994-01-03,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
2,1,1464,CE,157.097940,-28.381485,181,276,515.130000,1365.650728,-0.000028,1.016800,-0.154298,1.006892,115.245476,75.250400,-0.005666,67.000485,57,1994-01-04,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
3,1,1465,CE,157.094334,-28.315658,181,278,512.570772,1372.426342,-0.000033,0.896874,-0.276325,1.200119,90.907044,55.585304,-0.006726,52.001786,57,1994-01-05,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
4,1,1466,CE,157.106324,-28.346479,181,277,514.728351,1369.599726,-0.000029,0.847358,-0.169788,1.214160,100.425496,58.705551,-0.005821,56.251116,57,1994-01-06,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71733,1824,6990,CE,154.083947,-37.710931,190,59,552.118058,289.998167,-0.000008,2.180373,0.030402,0.571633,64.833644,10.446315,-0.002306,47.507129,23,2009-02-20,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
71734,1824,6991,CE,154.121845,-37.531688,189,63,549.670958,309.999231,-0.000008,3.146780,0.016008,0.317867,56.081580,5.657382,-0.001799,42.013350,23,2009-02-21,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
71735,1824,6992,CE,154.003467,-37.645734,188,60,543.275124,294.416933,-0.000006,1.417053,-0.146573,0.720851,72.785024,11.906976,-0.002248,41.251686,23,2009-02-22,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
71736,1824,6993,CE,153.701377,-37.869010,184,53,524.903624,261.812814,-0.000011,0.915386,0.170795,1.124303,79.974749,14.033126,-0.002194,43.250303,23,2009-02-23,/srv/scratch/z3533156/26year_BRAN2020/outer_av...


In [6]:
small_eddies = []
for eddy in df_eddies.Eddy.unique():
    df = df_eddies[df_eddies.Eddy==eddy]
    if df.R.mean() <= 15:
        small_eddies.append(eddy)
small_eddies


[np.int64(65),
 np.int64(76),
 np.int64(80),
 np.int64(85),
 np.int64(86),
 np.int64(87),
 np.int64(89),
 np.int64(90),
 np.int64(93),
 np.int64(107),
 np.int64(207),
 np.int64(209),
 np.int64(285),
 np.int64(670),
 np.int64(767),
 np.int64(773),
 np.int64(786),
 np.int64(788),
 np.int64(794),
 np.int64(797),
 np.int64(801),
 np.int64(922),
 np.int64(1007),
 np.int64(1113),
 np.int64(1128),
 np.int64(1134),
 np.int64(1157),
 np.int64(1246),
 np.int64(1251),
 np.int64(1252),
 np.int64(1376),
 np.int64(1383),
 np.int64(1397),
 np.int64(1480),
 np.int64(1481),
 np.int64(1513),
 np.int64(1519),
 np.int64(1760)]

In [7]:
df_eddies = df_eddies.groupby('Eddy').filter(lambda g: g['R'].mean() > 15).reset_index(drop=True) # Keep only mesoscale - larger than 30km scale
df_eddies['Eddy'] = df_eddies['Eddy'].rank(method='dense').astype(int) # Relabel the Eddy's 1,2,3..


In [8]:
df_eddies.to_pickle(f'/srv/scratch/z5297792/Chapter2/SEACOFS_26yr_Eddy_Dataset/Eddy_Data/df_eddies_processed_{df_eddies.Day.min()}_{df_eddies.Day.max()}.pkl')


In [9]:
df_eddies


Unnamed: 0,Eddy,Day,Cyc,Lon,Lat,ic,jc,xc,yc,w,q11,q12,q22,Rc,psi0,A,R,Age,Date,fname
0,1,1462,CE,156.969773,-28.461100,179,274,505.897600,1353.012251,-0.000033,1.099922,-0.193920,0.943344,104.792967,71.469206,-0.006508,69.500036,57,1994-01-02,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
1,1,1463,CE,156.993096,-28.433992,179,275,507.149717,1356.633687,-0.000030,1.066764,-0.119714,0.950849,102.305989,70.018058,-0.006690,52.250752,57,1994-01-03,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
2,1,1464,CE,157.097940,-28.381485,181,276,515.130000,1365.650728,-0.000028,1.016800,-0.154298,1.006892,115.245476,75.250400,-0.005666,67.000485,57,1994-01-04,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
3,1,1465,CE,157.094334,-28.315658,181,278,512.570772,1372.426342,-0.000033,0.896874,-0.276325,1.200119,90.907044,55.585304,-0.006726,52.001786,57,1994-01-05,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
4,1,1466,CE,157.106324,-28.346479,181,277,514.728351,1369.599726,-0.000029,0.847358,-0.169788,1.214160,100.425496,58.705551,-0.005821,56.251116,57,1994-01-06,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70745,1786,6990,CE,154.083947,-37.710931,190,59,552.118058,289.998167,-0.000008,2.180373,0.030402,0.571633,64.833644,10.446315,-0.002306,47.507129,23,2009-02-20,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
70746,1786,6991,CE,154.121845,-37.531688,189,63,549.670958,309.999231,-0.000008,3.146780,0.016008,0.317867,56.081580,5.657382,-0.001799,42.013350,23,2009-02-21,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
70747,1786,6992,CE,154.003467,-37.645734,188,60,543.275124,294.416933,-0.000006,1.417053,-0.146573,0.720851,72.785024,11.906976,-0.002248,41.251686,23,2009-02-22,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
70748,1786,6993,CE,153.701377,-37.869010,184,53,524.903624,261.812814,-0.000011,0.915386,0.170795,1.124303,79.974749,14.033126,-0.002194,43.250303,23,2009-02-23,/srv/scratch/z3533156/26year_BRAN2020/outer_av...


In [13]:
df_eddies[(df_eddies.Eddy==201)&(df_eddies.Day==2115)]


Unnamed: 0,Eddy,Day,Cyc,Lon,Lat,ic,jc,xc,yc,w,q11,q12,q22,Rc,psi0,A,R,Age,Date,fname
8335,201,2115,AE,156.849184,-31.664877,201,205,600.935094,1012.815648,1.1e-05,1.228848,1.645817,1.3905,23.728814,-0.521259,0.000926,6.255711,42,1995-10-17,/srv/scratch/z3533156/26year_BRAN2020/outer_av...


#### Extra code to update old data to take into account small eddy removal

In [10]:
break

SyntaxError: 'break' outside loop (668683560.py, line 1)

In [None]:
# Updated list of removed eddies from user
removed_eddies_updated = small_eddies
# Original eddy IDs
original_eddies = list(range(1, 2976))
# Remove updated list
remaining_eddies_updated = [e for e in original_eddies if e not in removed_eddies_updated]
# Mapping old → new
eddy_mapping_updated = {old: new for new, old in enumerate(remaining_eddies_updated, start=1)}
# eddy_mapping_updated


In [None]:
with open('/srv/scratch/z5297792/Chapter2/SEACOFS_26yr_Eddy_Dataset/Vertical_Eddy_Dataset/including_small_eddies/dic_vert_eddies_including_meanR_le_15.pkl', 'rb') as f:
    eddies_dict = pickle.load(f)

# Remove unwanted eddies
for rm in removed_eddies_updated:
    eddies_dict.pop(f"Eddy{rm}", None)

# Rename keys based on mapping
eddies_dict = {f"Eddy{eddy_mapping_updated[int(k[4:])]}": v 
               for k, v in eddies_dict.items()}

with open('/srv/scratch/z5297792/Chapter2/SEACOFS_26yr_Eddy_Dataset/Vertical_Eddy_Dataset/dic_vert_eddies.pkl', "wb") as f:
    pickle.dump(eddies_dict, f)

