In [1]:
import numpy as np
import netCDF4 as nc
import matplotlib.pyplot as plt
import pandas as pd
from scipy.interpolate import griddata
import netCDF4 as nc
from scipy.interpolate import RegularGridInterpolator
import time


In [2]:
# def tracking(df_data, start_ID, next_num, VORT_WEIGHT=15000, R_THRESH=30):
#     tic = time.perf_counter()

#     df_data = df_data.copy()
#     # Initialize IDs: set first day IDs equal to their Eddy values.
#     min_day = df_data['Day'].min()
#     df_data['ID'] = -1
#     df_data.loc[df_data['Day'] == min_day, 'ID'] = start_ID # for the initial dataset
#     df_data['ID'] = df_data['ID'].astype('Int64')
#     # next_num = df_data['ID'].max() + 1 # for the initial dataset

#     df_R = pd.DataFrame(columns=['D_diff', 'W_diff', 'R'])

#     # Loop through days starting from min_day+1 to max_day.
#     for day in range(min_day + 1, df_data['Day'].max() + 1):
#         pres_day = df_data[df_data['Day'] == day].copy()
#         pres_day = pres_day[~np.isnan(pres_day['x0'])]
#         for e_pres in pres_day['Eddy'].unique():
#             pres_eddy = pres_day[pres_day['Eddy'] == e_pres].iloc[0]
#             assigned = False
#             # Look back up to 4 days.
#             for delta in range(1, 5):
#                 candidate_day = day - delta
#                 if candidate_day < 0:
#                     continue
#                 candidate_prev = df_data[df_data['Day'] == candidate_day].copy()
#                 candidate_prev = candidate_prev[~np.isnan(candidate_prev['x0'])]
#                 for e_prev in candidate_prev['Eddy'].unique():
#                     prev_eddy = candidate_prev[candidate_prev['Eddy'] == e_prev].iloc[0]
#                     R = np.sqrt(
#                         (pres_eddy['x0'] - prev_eddy['x0'])**2 +
#                         (pres_eddy['y0'] - prev_eddy['y0'])**2 +
#                         VORT_WEIGHT * (pres_eddy['w'] - prev_eddy['w'])**2 
#                     )
#                     D_diff = np.hypot(pres_eddy['x0'] - prev_eddy['x0'], pres_eddy['y0'] - prev_eddy['y0'])
#                     W_diff = np.abs(pres_eddy['w'] - prev_eddy['w'])
#                     df_R.loc[len(df_R)] = {'D_diff': D_diff, 'W_diff': W_diff, 'R': R}
#                     if R < R_THRESH and (pres_eddy['Cyc'] == prev_eddy['Cyc']) and not (pres_day['ID'] == prev_eddy['ID']).any():
#                         df_data.loc[(df_data['Day'] == day) & (df_data['Eddy'] == e_pres), 'ID'] = prev_eddy['ID']
#                         assigned = True
#                         break
#                 if assigned:
#                     break
#             if not assigned:
#                 df_data.loc[(df_data['Day'] == day) & (df_data['Eddy'] == e_pres), 'ID'] = next_num
#                 next_num += 1

#         if day % 10 == 0:
#             toc = time.perf_counter()
#             print(f"Elapsed time: {toc - tic:.4f} seconds")
#             print(day)

#     df_data['next_num'] = next_num
                
#     return df_data, df_R
    

In [3]:
def tracking(df_data, start_ID, next_num, VORT_WEIGHT=15000, R_THRESH=30):

    tic = time.perf_counter()

    # Work on a copy so as not to modify the original DataFrame.
    df_data = df_data.copy()
    
    # Initialize IDs: set first day IDs equal to start_ID.
    min_day = df_data['Day'].min()
    df_data['ID'] = -1
    df_data.loc[df_data['Day'] == min_day, 'ID'] = start_ID  # for the initial dataset
    df_data['ID'] = df_data['ID'].astype('Int64')

    # Precompute a dictionary mapping each day to the indices of rows where 'x0' is not NaN.
    unique_days = sorted(df_data['Day'].unique())
    day_dict = {
        d: df_data.index[(df_data['Day'] == d) & (~df_data['x0'].isna())]
        for d in unique_days
    }

    # Use a list to collect R values rather than appending row-by-row to a DataFrame.
    df_R_rows = []
    
    max_day = df_data['Day'].max()
    for day in range(min_day + 1, max_day + 1):
        # Skip this day if no valid rows exist.
        if day not in day_dict or len(day_dict[day]) == 0:
            continue

        # Get all rows for the current day with valid x0.
        pres_day = df_data.loc[day_dict[day]]
        unique_eddies = pres_day['Eddy'].unique()

        for e_pres in unique_eddies:
            # Get the first occurrence for this eddy.
            pres_eddy = pres_day[pres_day['Eddy'] == e_pres].iloc[0]
            assigned = False

            # Look back up to 4 days.
            for delta in range(1, 5):
                candidate_day = day - delta
                if candidate_day < min_day:
                    continue
                if candidate_day not in day_dict or len(day_dict[candidate_day]) == 0:
                    continue

                # Get candidate previous day rows.
                candidate_prev = df_data.loc[day_dict[candidate_day]]
                # Group by 'Eddy' and take the first row for each unique eddy while preserving original order.
                candidate_group = candidate_prev.groupby('Eddy', as_index=False, sort=False).first()

                for _, prev_eddy in candidate_group.iterrows():
                    # Compute differences and R value.
                    dx = pres_eddy['x0'] - prev_eddy['x0']
                    dy = pres_eddy['y0'] - prev_eddy['y0']
                    dw = pres_eddy['w'] - prev_eddy['w']
                    R = np.sqrt(dx ** 2 + dy ** 2 + VORT_WEIGHT * (dw ** 2))
                    D_diff = np.hypot(dx, dy)
                    W_diff = np.abs(dw)
                    df_R_rows.append({'D_diff': D_diff, 'W_diff': W_diff, 'R': R})

                    # Check if current candidate meets the criteria.
                    if (R < R_THRESH) and (pres_eddy['Cyc'] == prev_eddy['Cyc']) and not (pres_day['ID'] == prev_eddy['ID']).any():
                        df_data.loc[(df_data['Day'] == day) & (df_data['Eddy'] == e_pres), 'ID'] = prev_eddy['ID']
                        assigned = True
                        break

                if assigned:
                    break

            if not assigned:
                df_data.loc[(df_data['Day'] == day) & (df_data['Eddy'] == e_pres), 'ID'] = next_num
                next_num += 1

        # Periodically print progress every 10 days.
        if day % 200 == 0:
            toc = time.perf_counter()
            print(f"Elapsed time: {toc - tic:.4f} seconds")
            print(day)

    # Convert the accumulated R values list to a DataFrame.
    df_R = pd.DataFrame(df_R_rows)
    df_data['next_num'] = next_num

    return df_data, df_R



In [4]:
intervals = [1611, 2211, 5479]

for k in range(len(intervals) - 2):
    
    start_day, end_day = intervals[k+1], intervals[k+2]
    
    df_data = pd.read_pickle(f"/srv/scratch/z5297792/Chapter2/df_data_1462_5479.pkl")
    df_data = df_data.dropna().copy() # If ESPRA didnt work then we dont have vorticity, which is required for tracking
    df_data = df_data[(df_data['Day']>=start_day)&(df_data['Day']<=end_day)]
    
    df_eddies_pre = pd.read_pickle(f"/srv/scratch/z5297792/Chapter2/df_eddies_{intervals[k]}_{start_day}.pkl")

    tic = time.perf_counter()
    start_ID = df_eddies_pre[df_eddies_pre['Day']==start_day]['ID'] # should be the max day
    next_num = df_eddies_pre.iloc[0]['next_num']
    df_eddies, df_R = tracking(df_data, start_ID, next_num)
    
    toc = time.perf_counter()
    print(f"Elapsed time: {toc - tic:.4f} seconds")

    df_eddies.to_pickle(f"/srv/scratch/z5297792/Chapter2/df_eddies_{start_day}_{end_day}.pkl")
    df_R.to_pickle(f"/srv/scratch/z5297792/Chapter2/df_R_{start_day}_{end_day}.pkl")


Elapsed time: 2.3863 seconds
2220
Elapsed time: 4.4644 seconds
2230
Elapsed time: 6.8405 seconds
2240
Elapsed time: 8.5141 seconds
2250
Elapsed time: 10.2859 seconds
2260
Elapsed time: 12.2359 seconds
2270
Elapsed time: 13.8195 seconds
2280
Elapsed time: 15.4454 seconds
2290
Elapsed time: 18.0423 seconds
2300
Elapsed time: 20.5009 seconds
2310
Elapsed time: 22.6904 seconds
2320
Elapsed time: 25.7236 seconds
2330
Elapsed time: 28.1031 seconds
2340
Elapsed time: 31.2240 seconds
2350
Elapsed time: 33.8563 seconds
2360
Elapsed time: 37.2073 seconds
2370
Elapsed time: 40.5544 seconds
2380
Elapsed time: 43.8747 seconds
2390
Elapsed time: 46.8877 seconds
2400
Elapsed time: 50.3043 seconds
2410
Elapsed time: 53.5363 seconds
2420
Elapsed time: 58.0464 seconds
2430
Elapsed time: 61.9942 seconds
2440
Elapsed time: 65.1091 seconds
2450
Elapsed time: 68.9638 seconds
2460
Elapsed time: 72.1826 seconds
2470
Elapsed time: 75.5323 seconds
2480
Elapsed time: 78.6618 seconds
2490
Elapsed time: 82.2157 se