# Detect Eddy Centers

In [1]:
import netCDF4 as nc
import numpy as np
from sklearn.cluster import DBSCAN
import pandas as pd
import math
from math import isnan
import pickle


def detect_eddy_centers(u_surf_t, v_surf_t, ssh_t, ow_surf_t, cur_speed_surf_t):

    def find_local_maxima(matrix):
        local_maxima = []
        for i in range(len(matrix)):
            for j in range(len(matrix[0])):
                if is_local_max(matrix, i, j):
                    local_maxima.append((i, j))
        return local_maxima

    def is_local_max(matrix, row, col):
        current_value = matrix[row][col]
        if row > 0 and current_value <= matrix[row - 1][col]:
            return False
        if row < len(matrix) - 1 and current_value <= matrix[row + 1][col]:
            return False
        if col > 0 and current_value <= matrix[row][col - 1]:
            return False
        if col < len(matrix[0]) - 1 and current_value <= matrix[row][col + 1]:
            return False
        return True

    ssh_t[ssh_t > 5] = 0
    local_ssh_max = set(find_local_maxima(ssh_t))
    local_ssh_min = set(find_local_maxima(-ssh_t))
    ssh_peaks_coord = local_ssh_max.union(local_ssh_min)
    # CRITICAL CURRENT SPEED
    mask = cur_speed_surf_t < 1
    crit_cur_speed_r, crit_cur_speed_c = np.where(mask)
    crit_cur_coord = set(list(zip(crit_cur_speed_r, crit_cur_speed_c)))
    # LARGE NEGATIVE OW
    mask = ow_surf_t < 0
    lrg_ow_surf_r, lrg_ow_surf_c = np.where(mask)
    lrg_ow_surf_coord = set(list(zip(lrg_ow_surf_r, lrg_ow_surf_c)))
    # POTENTIAL CENTERS
    pot = lrg_ow_surf_coord & crit_cur_coord
    pot = pot & ssh_peaks_coord
    if len(pot) != 0:
        pot_list = list(pot)
        pot_r, pot_c = zip(*pot_list)
        X = np.column_stack((list(lon_rho[pot_r,pot_c]),list(lat_rho[pot_r,pot_c])))
        # Set the distance threshold
        DIST_BTW_EDDIES = 0.33
        # Perform DBSCAN clustering
        dbscan = DBSCAN(eps=DIST_BTW_EDDIES, min_samples=1)
        eddy_id = dbscan.fit_predict(X)
        C = []
        for eddy in range(1, np.max(eddy_id) + 1):
            mean_lon = np.mean(X[eddy_id == eddy, 0])
            mean_lat = np.mean(X[eddy_id == eddy, 1])
            C.append((mean_lon, mean_lat))
    else:
        C = [(np.nan, np.nan)]
    return C

def day_dic_making(u_surf, v_surf, ssh, vort_surf, ow_surf, cur_speed_surf, lon_rho, lat_rho):
    daydic = {}

    for TIME in range(30):
        u_surf_t = u_surf[:, :, TIME].squeeze()
        v_surf_t = v_surf[:, :, TIME].squeeze()
        ssh_t = ssh[:, :, TIME].squeeze()
        ow_surf_t = ow_surf[:, :, TIME].squeeze()
        cur_speed_surf_t = cur_speed_surf[:, :, TIME].squeeze()
        vort_surf_t = vort_surf[:, :, TIME].squeeze()
        centers = detect_eddy_centers(u_surf_t, v_surf_t, ssh_t, ow_surf_t, cur_speed_surf_t)

        Clon = list(zip(*centers))[0]
        Clat = list(zip(*centers))[1]

        eVort = []
        for e in range(len(Clon)):
            R2 = (lon_rho - centers[e][0])**2 + (lat_rho - centers[e][1])**2
            ic, jc = np.unravel_index(np.argmin(R2, axis=None), R2.shape)
            eVort.append(vort_surf_t[ic, jc])

        eID = np.arange(1, len(Clon) + 1)

        dsum = {
            'Clon': Clon,
            'Clat': Clat,
            'Vort': eVort,
            'ID': eID
        }

        index_labels = [f'Eddy{n}' for n in range(1, len(Clon) + 1)]
        ddata = pd.DataFrame(dsum, index=index_labels)
        daydic[f'Day{TIME + 1}'] = ddata

    return daydic


In [2]:
fnumbers = [f'{num:05}' for num in range(1461, 1491+1, 30)] # last valid file is 10611 

In [3]:
ROMS_directory = {}

fname = '/srv/scratch/z3533156/26year_BRAN2020/outer_avg_'+str(fnumbers[0])+'.nc'

dataset = nc.Dataset(fname)
angle = dataset.variables['angle']
angle = angle[0,0]
lon_rho  = dataset.variables['lon_rho']
lon_rho = np.transpose(lon_rho, axes=(1, 0))
lat_rho  = dataset.variables['lat_rho']
lat_rho = np.transpose(lat_rho, axes=(1, 0))
def distance(lat1, lon1, lat2, lon2):
    EARTH_RADIUS = 6357000  # in meters
    lat1_rad = np.radians(lat1)
    lon1_rad = np.radians(lon1)
    lat2_rad = np.radians(lat2)
    lon2_rad = np.radians(lon2)
    # Haversine formula
    dlat = lat2_rad - lat1_rad
    dlon = lon2_rad - lon1_rad
    a = np.sin(dlat / 2.0)**2 + np.cos(lat1_rad) * np.cos(lat2_rad) * np.sin(dlon / 2.0)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    return EARTH_RADIUS * c
dx = distance(lat_rho[:-1, :], lon_rho[:-1, :], lat_rho[1:, :], lon_rho[1:, :])
dy = distance(lat_rho[:, :-1], lon_rho[:, :-1], lat_rho[:, 1:], lon_rho[:, 1:])
dx = np.repeat(dx[:, :, np.newaxis], 30, axis=2)
dy = np.repeat(dy[:, :, np.newaxis], 30, axis=2)

i_print = 0
for fnumber in fnumbers:

    if i_print % 20 == 0:
        print(fnumber)
    i_print += 1
   
    fname = '/srv/scratch/z3533156/26year_BRAN2020/outer_avg_'+str(fnumber)+'.nc'
    dataset = nc.Dataset(fname)
    u_east = dataset.variables['u_eastward']
    u_east = np.transpose(u_east, axes=(3, 2, 1, 0))
    u_east_surf = u_east[:,:,-1,:].squeeze()
    del u_east 
    v_north = dataset.variables['v_northward']
    v_north = np.transpose(v_north, axes=(3, 2, 1, 0))
    v_north_surf = v_north[:,:,-1,:].squeeze()
    del v_north
    ssh = dataset.variables['zeta']
    ssh = np.transpose(ssh, axes=(2, 1, 0))
    size_u_east_surf = u_east_surf.shape
    size_v_north_surf = v_north_surf.shape
    u_surf = np.full(size_u_east_surf, np.nan)
    v_surf = np.full(size_v_north_surf, np.nan)
    for TIME in range(30):
        u_surf[:,:,TIME] = v_north_surf[:,:,TIME] * np.sin(angle) + u_east_surf[:,:,TIME] * np.cos(angle)
        v_surf[:,:,TIME] = v_north_surf[:,:,TIME] * np.cos(angle) - u_east_surf[:,:,TIME] * np.sin(angle)
    del u_east_surf, v_north_surf
    # CHANGES IN VELOCITIES
    # For u_x
    u_x = (u_surf[2:,:,:] - u_surf[:-2,:,:]) / (dx[:-1,:] + dx[1:,:])
    # For u_y
    u_y = (u_surf[:,2:,:] - u_surf[:,:-2,:]) / (dy[:,:-1] + dy[:,1:])
    # For v_x
    v_x = (v_surf[2:,:,:] - v_surf[:-2,:,:]) / (dx[:-1,:] + dx[1:,:])
    # For v_y
    v_y = (v_surf[:,2:,:] - v_surf[:,:-2,:]) / (dy[:,:-1] + dy[:,1:])
    WIDTH, LENGTH = lat_rho.shape
    TOTAL_NUM_DAYS = 30
    # For u_x
    u_x = np.concatenate((np.zeros((1, LENGTH, TOTAL_NUM_DAYS)), u_x, np.zeros((1, LENGTH, TOTAL_NUM_DAYS))), axis=0)
    # For v_x
    v_x = np.concatenate((np.zeros((1, LENGTH, TOTAL_NUM_DAYS)), v_x, np.zeros((1, LENGTH, TOTAL_NUM_DAYS))), axis=0)
    # For u_y
    u_y = np.concatenate((np.zeros((WIDTH, 1, TOTAL_NUM_DAYS)), u_y, np.zeros((WIDTH, 1, TOTAL_NUM_DAYS))), axis=1)
    # For v_y
    v_y = np.concatenate((np.zeros((WIDTH, 1, TOTAL_NUM_DAYS)), v_y, np.zeros((WIDTH, 1, TOTAL_NUM_DAYS))), axis=1)
    # NORMAL STRAIN
    s_n_surf = u_x - v_y
    # SHEAR STRAIN
    s_s_surf = v_x + u_y
    # VORTICITY
    vort_surf = v_x - u_y
    # OKUBO-WEISS
    ow_surf = s_n_surf**2 + s_s_surf**2 - vort_surf**2
    # Set boundary values to large positive
    ow_surf[0, :] = 10        # Top row
    ow_surf[-1, :] = 10       # Bottom row
    ow_surf[:, 0] = 10        # Left column
    ow_surf[:, -1] = 10       # Right column
    # CURRENT SPEED
    cur_speed_surf = np.sqrt(u_surf**2 + v_surf**2)

    del u_x, v_x, u_y, v_y, s_n_surf, s_s_surf

    ROMS_directory[fnumber] = day_dic_making(u_surf, v_surf, ssh, vort_surf, ow_surf, cur_speed_surf, lon_rho, lat_rho)

del dx, dy

01461


In [4]:
# Relabel days 30, 31, 32, ...
n = 0
for data_file in ROMS_directory.keys():
    ROMS_directory[data_file] = {f'Day{i + 30*n}': v for k, v in ROMS_directory[data_file].items() for i in [int(k[3:])]}
    n += 1

In [5]:
# Combine all days
ROMS_daydic = {}
for d in ROMS_directory.values():
    ROMS_daydic.update(d)

del ROMS_directory

In [6]:
def eddy_tracking(daydic):

    DAYS_TO_LOOK_BACK = 5
    VORT_WEIGHT = .5*1E9

    next_num = max(list(range(1,daydic['Day1'].shape[0])))+1
    for day in range(2, len(daydic)+1):
        ddata_pre = daydic['Day'+str(day-1)]
        ddata_post = daydic['Day'+str(day)]
        pre_C_lon  = ddata_pre.Clon
        pre_C_lat  = ddata_pre.Clat
        post_C_lon  = ddata_post.Clon
        post_C_lat  = ddata_post.Clat
        pre_vort  = ddata_pre.Vort
        post_vort  = ddata_post.Vort
        eID = np.full(len(post_C_lon), np.nan)
        for j in range(len(post_C_lon)):
            for i in range(len(pre_C_lon)):
                R = np.sqrt((pre_C_lon.iloc[i] - post_C_lon.iloc[j])**2 + (pre_C_lat.iloc[i] - post_C_lat.iloc[j])**2 + VORT_WEIGHT*(pre_vort.iloc[i]-post_vort.iloc[j])**2)
                if R < .5 and not any(eID == ddata_pre.iloc[i].ID):
                    eID[j] = int(ddata_pre.iloc[i].ID)
            for back_check in range(2, DAYS_TO_LOOK_BACK):
                if np.isnan(eID[j]) and day > back_check:
                    pre_C_lon = daydic['Day'+str(day-back_check)].Clon
                    pre_C_lat = daydic['Day'+str(day-back_check)].Clat
                    pre_vort = daydic['Day'+str(day-back_check)].Vort
                    for i in range(len(pre_C_lat)):
                        R = np.sqrt((pre_C_lon.iloc[i] - post_C_lon.iloc[j])**2 + (pre_C_lat.iloc[i] - post_C_lat.iloc[j])**2 + VORT_WEIGHT*(pre_vort.iloc[i]-post_vort.iloc[j])**2)
                        if R < .5 and not any(eID == daydic['Day'+str(day-back_check)].iloc[i].ID):
                            eID[j] = int(daydic['Day'+str(day-back_check)].iloc[i].ID)
            ddata_pre = daydic['Day'+str(day-1)]
            pre_C_lon  = ddata_pre.Clon
            pre_C_lat  = ddata_pre.Clat
            pre_vort = ddata_pre.Vort
            if np.isnan(eID[j]):
                eID[j] = next_num
                next_num += 1
        eID = [round(x) for x in eID]
        ddata_post = daydic['Day'+str(day)]
        ddata_post.ID = eID
        ddata_post.index = ['Eddy' + str(i) for i in eID]
    
    return daydic

In [7]:
ROMS_daydic_tracked = eddy_tracking(ROMS_daydic)
del ROMS_daydic

In [8]:
def eddy_dic_making(daydic,meso_age):

    def distance_between_non_nan(lst):
        first_non_nan = None
        last_non_nan = None
        for i in range(len(lst)):
            if not isinstance(lst[i], float) or not math.isnan(lst[i]):
                first_non_nan = i
                break
        for i in range(len(lst) - 1, -1, -1):
            if not isinstance(lst[i], float) or not math.isnan(lst[i]):
                last_non_nan = i
                break
        if first_non_nan is not None and last_non_nan is not None:
            distance = last_non_nan - first_non_nan + 1
            return distance
        else:
            return None  

    max_id = max([max(daydic[key].ID) for key in daydic.keys()])

    eddies = {}

    for e in range(1, max_id + 1):
        eLon = []
        eLat = []
        eVort = []
        eId = []

        idd = 0
        for dayinfo in daydic.values():
            if f'Eddy{e}' in dayinfo.index:
                dayeddyinfo = dayinfo.loc[f'Eddy{e}']
                eLon.append(dayeddyinfo.Clon)
                eLat.append(dayeddyinfo.Clat)
                eVort.append(dayeddyinfo.Vort)
                eId.append(round(dayeddyinfo.ID))
                idd = dayeddyinfo.ID
            else:
                eLon.append(np.nan)
                eLat.append(np.nan)
                eVort.append(np.nan)
                eId.append(np.nan)

        eAge = [distance_between_non_nan(eLon)] * len(daydic)
        # give id even for ghost centers
        eId = [idd] * len(daydic)

        ed = pd.DataFrame({
            'Lon': eLon,
            'Lat': eLat,
            'Vort': eVort,
            'ID': eId,
            'Age': eAge
        }, index=[d for d in daydic.keys()])

        # Interpolate values for Lon, Lat, Vort
        first_non_nan = ed['Lon'].first_valid_index()
        last_non_nan = ed['Lon'].last_valid_index()
        
        ed.loc[first_non_nan:last_non_nan, 'Lon'] = ed.loc[first_non_nan:last_non_nan, 'Lon'].interpolate()
        ed.loc[first_non_nan:last_non_nan, 'Lat'] = ed.loc[first_non_nan:last_non_nan, 'Lat'].interpolate()
        ed.loc[first_non_nan:last_non_nan, 'Vort'] = ed.loc[first_non_nan:last_non_nan, 'Vort'].interpolate()

        def same_sign(vector):

            vector = [x for x in vector if not math.isnan(x)]
            non_zero_values = [x for x in vector if x != 0]
            if not non_zero_values:  
                return True
            first_sign = non_zero_values[0] > 0
            for value in non_zero_values:
                if (value > 0) != first_sign:
                    return False
            return True
            
        if eAge[0] is not None:
            if eAge[0] >= meso_age and same_sign(eVort):

                ed = ed[~ed['Lon'].isna()]
                
                eddies[f'Eddy{e}'] = ed
    
    eddies = {f'Eddy{i+1}': value for i, (key, value) in enumerate(eddies.items())}

    i = 1
    for e in eddies.keys():
        eddies[e].ID = np.ones(eddies[e].ID.shape) * i
        i += 1

    # Remove excess data
    for eddy in eddies.keys():
        eddies[eddy] = eddies[eddy][~eddies[eddy]['Lon'].isna()]

    return eddies

In [9]:
# Gather mesoscale eddies
meso_age = 10
ROMS_eddies = eddy_dic_making(ROMS_daydic_tracked, meso_age)

In [10]:
def clean_daydic(daydic, eddies):
    clean_daydic = {}

    for d in daydic.keys():

        lon = []
        lat = []
        vort = []
        idd = []
        age = []

        for e in eddies.keys():


            if d in eddies[e].index:

                lon.append(eddies[e].loc[d].Lon)
                lat.append(eddies[e].loc[d].Lat)
                vort.append(eddies[e].loc[d].Vort)
                idd.append(round(eddies[e].loc[d].ID))
                age.append(round(eddies[e].loc[d].Age))

        df = pd.DataFrame({
                'Lon': lon,
                'Lat': lat,
                'Vort': vort,
                'ID': idd,
                'Age': age
            }, index=[f'Eddy{i}' for i in idd])

        clean_daydic[d] = df

    return clean_daydic

In [11]:
ROMS_daydic_meso = clean_daydic(ROMS_daydic_tracked, ROMS_eddies)
del ROMS_daydic_tracked

In [12]:
with open('ROMS_26yr_daydic.pkl', 'wb') as pickle_file:
    pickle.dump(ROMS_daydic_meso, pickle_file)

In [13]:
with open('ROMS_26yr_eddies.pkl', 'wb') as pickle_file:
    pickle.dump(ROMS_eddies, pickle_file)

In [14]:
print('Complete!')

Complete!


In [13]:
ROMS_daydic_meso['Day20']

Unnamed: 0,Lon,Lat,Vort,ID,Age
Eddy3,150.963708,-38.552137,3.2e-05,3,39
Eddy4,156.780393,-28.063343,-3.2e-05,4,59
Eddy5,159.178962,-33.322913,3e-05,5,60
Eddy6,155.280093,-37.438839,-1.9e-05,6,33
Eddy7,156.392641,-38.87525,-1e-05,7,35
Eddy8,160.4076,-29.5962,-1.6e-05,8,60
Eddy9,157.561256,-36.005282,-2.2e-05,9,60
Eddy10,157.896934,-39.415513,7e-06,10,26
Eddy11,153.62292,-32.518531,3.8e-05,11,21
Eddy12,153.952761,-37.664709,-2.5e-05,12,60
