In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle


In [2]:
import netCDF4 as nc
from scipy.interpolate import griddata
import netCDF4 as nc
from scipy.interpolate import RegularGridInterpolator
import time

# Field Data
fname = f'/srv/scratch/z3533156/26year_BRAN2020/outer_avg_01461.nc'

dataset = nc.Dataset(fname)

lon_rho = np.transpose(dataset.variables['lon_rho'], axes=(1, 0))
lat_rho = np.transpose(dataset.variables['lat_rho'], axes=(1, 0))
mask_rho = np.transpose(dataset.variables['mask_rho'], axes=(1, 0))
h =  np.transpose(dataset.variables['h'], axes=(1, 0))
angle = dataset.variables['angle'][0, 0]
z_r = np.load('/srv/scratch/z5297792/z_r.npy')
z_r = np.transpose(z_r, (1, 2, 0))[150, 150, :]

def distance(lat1, lon1, lat2, lon2):
    EARTH_RADIUS = 6357
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat, dlon = lat2 - lat1, lon2 - lon1
    a = np.sin(dlat/2)**2 + np.cos(lat1)*np.cos(lat2)*np.sin(dlon/2)**2
    return EARTH_RADIUS * 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))

j_mid = lon_rho.shape[1] // 2
i_mid = lon_rho.shape[0] // 2

dx = distance(lat_rho[:-1, j_mid], lon_rho[:-1, j_mid],
              lat_rho[1:, j_mid], lon_rho[1:, j_mid])
dy = distance(lat_rho[i_mid, :-1], lon_rho[i_mid, :-1],
              lat_rho[i_mid, 1:], lon_rho[i_mid, 1:])

x_grid = np.insert(np.cumsum(dx), 0, 0)
y_grid = np.insert(np.cumsum(dy), 0, 0)
X_grid, Y_grid = np.meshgrid(x_grid, y_grid, indexing='ij')


In [3]:
df_eddies_true = pd.read_pickle(f"/srv/scratch/z5297792/Chapter2/SEACOFS_26yr_Eddy_Dataset/Eddy_Data/df_eddies_1462_10650.pkl")
df_eddies_true


Unnamed: 0,Eddy_idx,Day,Cyc,nLon,nLat,nxc,nyc,nic,njc,xc,...,w,q11,q12,q22,Rc,psi0,A,R,Eddy,next_num
0,0,1462,AE,160.569072,-28.075269,830.0,1515.0,830,1515,830.505283,...,0.000017,0.720417,-0.098272,1.401490,105.813185,-45.393673,0.004054,45.530133,0,109875
1,1,1462,AE,155.725082,-27.535036,358.0,1408.0,358,1408,357.830407,...,0.000031,1.216814,-0.300862,0.896207,82.950121,-45.956139,0.006679,50.251427,1,109875
2,2,1462,CE,161.070068,-29.736625,928.0,1356.0,928,1356,928.354369,...,-0.000011,0.899317,-0.656994,1.591920,101.288877,40.278850,-0.003926,74.500939,2,109875
3,3,1462,CE,156.973832,-28.452969,506.0,1354.0,506,1354,505.897600,...,-0.000033,1.099922,-0.193920,0.943344,104.677927,71.391686,-0.006515,69.250036,3,109875
4,4,1462,AE,159.170505,-29.807511,754.0,1285.0,754,1285,753.364302,...,0.000022,1.292653,-0.367089,0.877850,103.630924,-44.453856,0.004139,66.001363,4,109875
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
412605,22,10650,AE,157.993708,-38.734350,912.0,300.0,912,300,911.755090,...,0.000009,1.641599,-0.334344,0.677258,67.842948,-7.686696,0.001670,39.001479,109816,109875
412606,23,10650,CE,157.092189,-39.604423,860.0,180.0,860,180,860.284107,...,-0.000008,0.560145,0.168820,1.836133,71.094321,9.528792,-0.001885,60.500490,109782,109875
412607,24,10650,AE,151.460338,-38.218269,349.0,158.0,349,158,350.853554,...,0.000033,1.352490,0.161857,0.758747,89.358603,-55.568728,0.006959,38.006653,109844,109875
412609,26,10650,AE,156.242892,-40.165033,805.0,95.0,805,95,804.758015,...,0.000011,1.253003,0.430411,0.945930,53.246635,-8.978789,0.003167,44.750665,109776,109875


In [4]:
# Clean
df_eddies = df_eddies_true[['Eddy', 'Day', 'Cyc', 'xc', 'yc', 'w', 'q11', 'q12', 'q22', 'Rc', 'psi0', 'A', 'R']].copy().sort_values(by=['Eddy', 'Day'])
df_eddies = df_eddies.groupby('Eddy').filter(lambda g: g['Day'].max() - g['Day'].min() >= 21) # Keep only mesoscale - older than 3 weeks
df_eddies['Eddy'] = df_eddies['Eddy'].rank(method='dense').astype(int) # Relabel the Eddy's 1,2,3..

# Find Lon and Lat values
xg, yg = x_grid, y_grid
lon_interp = RegularGridInterpolator((yg, xg), lon_rho.T, bounds_error=False, fill_value=np.nan)
lat_interp = RegularGridInterpolator((yg, xg), lat_rho.T, bounds_error=False, fill_value=np.nan)
points = np.column_stack((df_eddies['yc'].to_numpy(), df_eddies['xc'].to_numpy()))
df_eddies['Lon'] = lon_interp(points)
df_eddies['Lat'] = lat_interp(points)
# Re order columns
cols = df_eddies.columns.tolist()
i = cols.index('Cyc') + 1
base = [c for c in cols if c not in ('Lon','Lat')]
new_order = base[:i] + ['Lon','Lat'] + base[i:]
df_eddies = df_eddies[new_order].reset_index(drop=True)
df_eddies


Unnamed: 0,Eddy,Day,Cyc,Lon,Lat,xc,yc,w,q11,q12,q22,Rc,psi0,A,R
0,1,1462,CE,156.969773,-28.461100,505.897600,1353.012251,-0.000033,1.099922,-0.193920,0.943344,104.677927,71.391686,-0.006515,69.250036
1,1,1463,CE,156.993096,-28.433992,507.149717,1356.633687,-0.000030,1.066764,-0.119714,0.950849,110.542281,77.351002,-0.006330,62.000626
2,1,1464,CE,157.097940,-28.381485,515.130000,1365.650728,-0.000028,1.016800,-0.154298,1.006892,115.245476,75.250400,-0.005666,67.000485
3,1,1465,CE,157.094334,-28.315658,512.570772,1372.426342,-0.000033,0.896874,-0.276325,1.200119,114.738549,72.789618,-0.005529,66.501582
4,1,1466,CE,157.106324,-28.346479,514.728351,1369.599726,-0.000029,0.847358,-0.169788,1.214160,100.425496,58.705551,-0.005821,56.251116
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
101233,2988,10646,AE,157.825620,-37.030656,850.502045,476.447416,0.000013,0.898473,0.197470,1.156400,114.894173,-41.491437,0.003143,72.751791
101234,2988,10647,AE,157.880796,-36.972725,853.608024,484.316677,0.000014,0.884289,0.308226,1.238286,111.853372,-42.195517,0.003373,73.000933
101235,2988,10648,AE,157.868516,-36.874363,849.759258,494.400614,0.000014,0.883521,0.168561,1.163994,107.180060,-43.102690,0.003752,75.750780
101236,2988,10649,AE,157.753058,-36.882303,840.037603,489.981707,0.000013,0.873263,0.225002,1.203104,100.368133,-37.967048,0.003769,75.750006


In [5]:
# Fill in missing eddy data
def interpolate_eddy_tracks(df):
    df = df.copy()
    df_interp = []

    for eddy_id, group in df.groupby('Eddy'):
        # Build a full range of days for this eddy
        full_days = pd.DataFrame({'Day': range(group['Day'].min(), group['Day'].max() + 1)})
        full_days['Eddy'] = eddy_id

        # Merge to reindex with all days, keeping columns of interest
        merged = pd.merge(full_days, group, on=['Eddy', 'Day'], how='left')

        # Interpolate numerical columns
        merged['Lon'] = merged['Lon'].interpolate()
        merged['Lat'] = merged['Lat'].interpolate()
        merged['xc'] = merged['xc'].interpolate()
        merged['yc'] = merged['yc'].interpolate()

        merged['w'] = merged['w'].interpolate()
        merged['q11'] = merged['q11'].interpolate()
        merged['q12'] = merged['q12'].interpolate()
        merged['q22'] = merged['q22'].interpolate()

        merged['Rc'] = merged['Rc'].interpolate()
        merged['psi0'] = merged['psi0'].interpolate()
        merged['A'] = merged['A'].interpolate()
        merged['R'] = merged['R'].interpolate()

        # Fill categorical column
        merged['Cyc'] = merged['Cyc'].ffill().bfill()

        df_interp.append(merged)

    df_result = pd.concat(df_interp, ignore_index=True).sort_values(by=['Eddy', 'Day'])

    return df_result

df_eddies = interpolate_eddy_tracks(df_eddies)
df_eddies['Age'] = df_eddies.groupby('Eddy')['Eddy'].transform('count')

df_eddies['Date'] = pd.Timestamp('1990-01-01') + pd.to_timedelta(df_eddies.Day, unit='D')

fnames = [
    f"/srv/scratch/z3533156/26year_BRAN2020/outer_avg_{1461 + ((day - 1462) // 30) * 30:05}.nc"
    for day in df_eddies['Day']
]
df_eddies['fname'] = fnames

# Find closest grid points
from scipy.spatial import cKDTree
tree = cKDTree(np.column_stack((X_grid.ravel(), Y_grid.ravel())))
xcs = df_eddies['xc'].to_numpy()
ycs = df_eddies['yc'].to_numpy()
valid = np.isfinite(xcs) & np.isfinite(ycs)

idx = np.full(xcs.shape, -1, dtype=int)
if valid.any():
    _, ind = tree.query(np.column_stack((xcs[valid], ycs[valid])))
    ii, jj = np.unravel_index(ind, X_grid.shape)
    idx[valid] = ind  # keep for optional debugging

    ics = np.full(xcs.shape, np.nan)
    jcs = np.full(ycs.shape, np.nan)
    ics[valid] = ii
    jcs[valid] = jj
else:
    ics = np.full(xcs.shape, np.nan)
    jcs = np.full(ycs.shape, np.nan)
# Insert after 'Lat' if present, else at the end
pos = df_eddies.columns.get_indexer_for(['Lat'])
insert_at = (pos[0] + 1) if len(pos) else len(df_eddies.columns)
df_eddies.insert(insert_at, 'ic', pd.Series(ics, index=df_eddies.index).astype('Int64'))
df_eddies.insert(insert_at + 1, 'jc', pd.Series(jcs, index=df_eddies.index).astype('Int64'))
df_eddies = df_eddies[['Eddy', 'Day'] + [col for col in df_eddies.columns if col not in ['Eddy', 'Day']]]
df_eddies


Unnamed: 0,Eddy,Day,Cyc,Lon,Lat,ic,jc,xc,yc,w,q11,q12,q22,Rc,psi0,A,R,Age,Date,fname
0,1,1462,CE,156.969773,-28.461100,179,274,505.897600,1353.012251,-0.000033,1.099922,-0.193920,0.943344,104.677927,71.391686,-0.006515,69.250036,57,1994-01-02,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
1,1,1463,CE,156.993096,-28.433992,179,275,507.149717,1356.633687,-0.000030,1.066764,-0.119714,0.950849,110.542281,77.351002,-0.006330,62.000626,57,1994-01-03,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
2,1,1464,CE,157.097940,-28.381485,181,276,515.130000,1365.650728,-0.000028,1.016800,-0.154298,1.006892,115.245476,75.250400,-0.005666,67.000485,57,1994-01-04,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
3,1,1465,CE,157.094334,-28.315658,181,278,512.570772,1372.426342,-0.000033,0.896874,-0.276325,1.200119,114.738549,72.789618,-0.005529,66.501582,57,1994-01-05,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
4,1,1466,CE,157.106324,-28.346479,181,277,514.728351,1369.599726,-0.000029,0.847358,-0.169788,1.214160,100.425496,58.705551,-0.005821,56.251116,57,1994-01-06,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
117635,2988,10646,AE,157.825620,-37.030656,248,97,850.502045,476.447416,0.000013,0.898473,0.197470,1.156400,114.894173,-41.491437,0.003143,72.751791,24,2019-02-24,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
117636,2988,10647,AE,157.880796,-36.972725,249,98,853.608024,484.316677,0.000014,0.884289,0.308226,1.238286,111.853372,-42.195517,0.003373,73.000933,24,2019-02-25,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
117637,2988,10648,AE,157.868516,-36.874363,248,100,849.759258,494.400614,0.000014,0.883521,0.168561,1.163994,107.180060,-43.102690,0.003752,75.750780,24,2019-02-26,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
117638,2988,10649,AE,157.753058,-36.882303,246,99,840.037603,489.981707,0.000013,0.873263,0.225002,1.203104,100.368133,-37.967048,0.003769,75.750006,24,2019-02-27,/srv/scratch/z3533156/26year_BRAN2020/outer_av...


In [6]:
small_eddies = []
for eddy in df_eddies.Eddy.unique():
    df = df_eddies[df_eddies.Eddy==eddy]
    if df.R.mean() <= 15:
        small_eddies.append(eddy)
small_eddies


[np.int64(65),
 np.int64(78),
 np.int64(81),
 np.int64(86),
 np.int64(87),
 np.int64(88),
 np.int64(90),
 np.int64(91),
 np.int64(94),
 np.int64(108),
 np.int64(205),
 np.int64(208),
 np.int64(210),
 np.int64(670),
 np.int64(766),
 np.int64(772),
 np.int64(785),
 np.int64(787),
 np.int64(793),
 np.int64(796),
 np.int64(800),
 np.int64(922),
 np.int64(1002),
 np.int64(1006),
 np.int64(1031),
 np.int64(1113),
 np.int64(1133),
 np.int64(1158),
 np.int64(1246),
 np.int64(1369),
 np.int64(1376),
 np.int64(1382),
 np.int64(1396),
 np.int64(1512),
 np.int64(1513),
 np.int64(1519),
 np.int64(1758),
 np.int64(2091),
 np.int64(2107),
 np.int64(2170),
 np.int64(2205),
 np.int64(2299),
 np.int64(2308),
 np.int64(2414),
 np.int64(2444),
 np.int64(2714),
 np.int64(2719),
 np.int64(2923),
 np.int64(2939),
 np.int64(2947)]

In [7]:
df_eddies = df_eddies.groupby('Eddy').filter(lambda g: g['R'].mean() > 15).reset_index(drop=True) # Keep only mesoscale - larger than 30km scale


# tol = 0.1  # allowed deviation from 1
# df_eddies = (
#     df_eddies
#     .groupby("Eddy")
#     .filter(lambda g: np.all(np.isclose(g.q11 * g.q22 - g.q12**2, 1, atol=tol)))
#     .reset_index(drop=True)
# )

df_eddies = (
    df_eddies
    .groupby("Eddy")
    .filter(lambda g: np.all(g.q11 * g.q22 - g.q12**2 > 0))
    .reset_index(drop=True)
)

df_eddies['Eddy'] = df_eddies['Eddy'].rank(method='dense').astype(int) # Relabel the Eddy's 1,2,3..


In [8]:
df_eddies.to_pickle(f'/srv/scratch/z5297792/Chapter2/SEACOFS_26yr_Eddy_Dataset/Eddy_Data/df_eddies_processed_{df_eddies.Day.min()}_{df_eddies.Day.max()}.pkl')


In [9]:
# data = df_eddies[(df_eddies.Eddy==1553)&(df_eddies.Day==6308)]
# df_eddies[(df_eddies.Eddy==1553)&(df_eddies.Day==6315)]


In [10]:
# data.q11*data.q22 - data.q12**2


In [11]:
df_eddies


Unnamed: 0,Eddy,Day,Cyc,Lon,Lat,ic,jc,xc,yc,w,q11,q12,q22,Rc,psi0,A,R,Age,Date,fname
0,1,1462,CE,156.969773,-28.461100,179,274,505.897600,1353.012251,-0.000033,1.099922,-0.193920,0.943344,104.677927,71.391686,-0.006515,69.250036,57,1994-01-02,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
1,1,1463,CE,156.993096,-28.433992,179,275,507.149717,1356.633687,-0.000030,1.066764,-0.119714,0.950849,110.542281,77.351002,-0.006330,62.000626,57,1994-01-03,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
2,1,1464,CE,157.097940,-28.381485,181,276,515.130000,1365.650728,-0.000028,1.016800,-0.154298,1.006892,115.245476,75.250400,-0.005666,67.000485,57,1994-01-04,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
3,1,1465,CE,157.094334,-28.315658,181,278,512.570772,1372.426342,-0.000033,0.896874,-0.276325,1.200119,114.738549,72.789618,-0.005529,66.501582,57,1994-01-05,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
4,1,1466,CE,157.106324,-28.346479,181,277,514.728351,1369.599726,-0.000029,0.847358,-0.169788,1.214160,100.425496,58.705551,-0.005821,56.251116,57,1994-01-06,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
105233,2631,10646,AE,157.825620,-37.030656,248,97,850.502045,476.447416,0.000013,0.898473,0.197470,1.156400,114.894173,-41.491437,0.003143,72.751791,24,2019-02-24,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
105234,2631,10647,AE,157.880796,-36.972725,249,98,853.608024,484.316677,0.000014,0.884289,0.308226,1.238286,111.853372,-42.195517,0.003373,73.000933,24,2019-02-25,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
105235,2631,10648,AE,157.868516,-36.874363,248,100,849.759258,494.400614,0.000014,0.883521,0.168561,1.163994,107.180060,-43.102690,0.003752,75.750780,24,2019-02-26,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
105236,2631,10649,AE,157.753058,-36.882303,246,99,840.037603,489.981707,0.000013,0.873263,0.225002,1.203104,100.368133,-37.967048,0.003769,75.750006,24,2019-02-27,/srv/scratch/z3533156/26year_BRAN2020/outer_av...
