In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
model = 'geneva_200m'
params_file = 'swirl_03'
output_folder = r'../Outputs'

lvl1_filename = f'{model}_{params_file}_day1_lvl1.csv'
output_filename = f'{model}_{params_file}_day1_lvl2a.csv'

# Import catalogue lvl1

In [3]:
level1_csv_path = os.path.join(output_folder, lvl1_filename)
level1_data = pd.read_csv(level1_csv_path)

# Create catalogue lvl2

In [4]:
def ranges_intersect(min1, max1, min2, max2):
    return max1 >= min2 and max2 >= min1

In [5]:
def compute_center_distance(df1, df2):
    return np.sqrt((df1['xc_mean'] - df2['xc_mean'])**2 + (df1['yc_mean'] - df2['yc_mean'])**2)

In [6]:
# Function to identify the different depths of an eddy
def track_eddy(level1_data, id_eddy, idx_already_aggregated, dist_threshold, time_threshold):
    mask = (
        (~level1_data['id'].isin(idx_already_aggregated)) &
        level1_data.apply(lambda row: ranges_intersect(row['depth_min_[m]'],row['depth_max_[m]'],level1_data.at[id_eddy, 'depth_min_[m]'],level1_data.at[id_eddy, 'depth_max_[m]']), axis=1) &
        (level1_data['rotation_direction'] == level1_data.at[id_eddy, 'rotation_direction']) &
        (compute_center_distance(level1_data, level1_data.iloc[id_eddy]) < dist_threshold * 3) # First a rough distance threshold
    )
    
    filtered_eddies = level1_data.loc[mask]
    filtered_eddies = filtered_eddies.copy()
    filtered_eddies['parsed_date'] = pd.to_datetime(filtered_eddies['date'])
    
    sorted_by_date = filtered_eddies.sort_values('parsed_date', ascending=True).reset_index(drop=False)
    
    aggregated_eddy = sorted_by_date.iloc[[0]]
    for i in range(1,len(sorted_by_date)):
        distance_criteria = compute_center_distance(sorted_by_date.iloc[i], aggregated_eddy.iloc[-1]) < dist_threshold # and now the refined distance threshold
        time_criteria = sorted_by_date.iloc[i]['time_index'] - sorted_by_date.iloc[i-1]['time_index'] <= time_threshold
        
        if ~time_criteria:     
            break
            
        if distance_criteria:
            aggregated_eddy = pd.concat([aggregated_eddy, sorted_by_date.iloc[[i]]], ignore_index=True)
    
    return aggregated_eddy

In [7]:
# Parameters
dist_threshold = 5 # in number of cells
time_threshold = 2 # in number of timestep
timestep_in_seconds = 3600

# Main loop
eddy_rows_lvl2a= []  # Collect aggregated rows here
id_level2a = 0
idx_already_aggregated = set()
for idx in level1_data['id']: 
    if idx in idx_already_aggregated:
        continue
        
    aggregated_data = track_eddy(level1_data, idx, idx_already_aggregated, dist_threshold, time_threshold)
    lifespan = (timestep_in_seconds + (pd.to_datetime(aggregated_data['date'].iloc[-1]) - pd.to_datetime(aggregated_data['date'].iloc[0])).total_seconds()) / 3600

    row = {
        'id': id_level2a,
        'id_lvl1': aggregated_data['id'].tolist(),
        'time_indices(t)': aggregated_data['time_index'].tolist(),
        'dates(t)': aggregated_data['date'].tolist(),
        'xc(t)': aggregated_data['xc_mean'].tolist(),
        'yc(t)': aggregated_data['yc_mean'].tolist(),
        'depth_min(t)_[m]': aggregated_data['depth_min_[m]'].tolist(),
        'depth_max(t)_[m]': aggregated_data['depth_max_[m]'].tolist(),
        'volume(t)_[m3]': aggregated_data['volume_[m3]'].tolist(),
        'rotation_direction': aggregated_data.at[0, 'rotation_direction'],
        'kinetic_energy(t)_[MJ]': aggregated_data['kinetic_energy_[MJ]'].tolist(),
        'lifespan_[h]': lifespan
    }

    eddy_rows_lvl2a.append(row)
    idx_already_aggregated.update(aggregated_data['id'].tolist())
    id_level2a += 1

# Create the final DataFrame using pd.concat
df_catalogue_level2 = pd.concat([pd.DataFrame([row]) for row in eddy_rows_lvl2a], ignore_index=True)

In [8]:
df_catalogue_level2

Unnamed: 0,id,id_lvl1,time_indices(t),dates(t),xc(t),yc(t),depth_min(t)_[m],depth_max(t)_[m],volume(t)_[m3],rotation_direction,kinetic_energy(t)_[MJ],lifespan_[h]
0,0,"[0, 170, 352, 365]","[0, 1, 2, 2]","[2023-07-01 12:00:00, 2023-07-01 12:59:44, 202...","[280.3964570634618, 278.32495824839344, 275.27...","[45.61825354306952, 47.13970202939328, 49.8649...","[-12.219499588012695, -10.545000076293944, -4....","[-0.25, -0.25, -1.820000052452088, -9.74499988...","[96491120.75, 95150080.5, 16569420.25, 2354000.0]",anticlockwise,"[0.2520422081209131, 0.2265821200684555, 0.015...",3.000000
1,1,"[1, 172, 342, 499, 659]","[0, 1, 2, 3, 4]","[2023-07-01 12:00:00, 2023-07-01 12:59:44, 202...","[256.30551005212214, 256.5520596570136, 255.96...","[78.64036981881361, 80.38491061061292, 80.9231...","[-0.7574999928474426, -2.375999927520752, -4.7...","[-0.25, -0.25, -0.25, -0.25, -0.25]","[2532500.0, 12385920.0, 24083240.0, 22765500.0...",clockwise,"[0.0029149493815165, 0.0187376353153208, 0.039...",5.000000
2,2,"[2, 171, 343, 501, 660, 819, 973, 1133, 1454, ...","[0, 1, 2, 3, 4, 5, 6, 7, 9, 10]","[2023-07-01 12:00:00, 2023-07-01 12:59:44, 202...","[287.15970145372427, 289.91144537568914, 287.2...","[15.894140760404936, 14.699779743636938, 13.94...","[-6.777999877929688, -6.777999877929688, -4.77...","[-0.25, -0.25, -0.25, -0.25, -0.25, -0.25, -0....","[15977500.25, 19057600.25, 11672800.0, 1076702...",clockwise,"[0.0127375426380894, 0.0186177329620792, 0.010...",11.000000
3,3,"[3, 10, 173, 178, 346, 355, 363, 514, 667, 824...","[0, 0, 1, 1, 2, 2, 2, 3, 4, 5, 6, 16, 17, 18, ...","[2023-07-01 12:00:00, 2023-07-01 12:00:00, 202...","[223.0081024376751, 221.91765621731315, 219.27...","[74.59188138628512, 71.7993450227922, 73.49327...","[-13.095499992370604, -8.216500282287598, -16....","[-0.25, -6.777999877929688, -0.25, -6.77799987...","[305285100.0, 61345100.0, 318991740.0, 5979722...",anticlockwise,"[2.072985393398578, 0.494573217868561, 1.73972...",23.995556
4,4,"[4, 341]","[0, 2]","[2023-07-01 12:00:00, 2023-07-01 14:00:00]","[47.29124579124579, 46.02382802382802]","[63.67272727272728, 62.78597945264613]","[-4.14900016784668, -1.280500054359436]","[-0.7574999928474426, -0.25]","[297560.0078125, 737380.0]",anticlockwise,"[0.0, 0.0005479106807669]",3.000000
...,...,...,...,...,...,...,...,...,...,...,...,...
1827,1827,[3664],[23],[2023-07-02 10:59:44],[238.2142857142857],[92.5],[-159.1280059814453],[-159.1280059814453],[837520.0],anticlockwise,[0.0],1.000000
1828,1828,[3666],[23],[2023-07-02 10:59:44],[220.0434782608696],[99.95652173913044],[-181.7250061035156],[-170.08450317382812],[2933420.0],anticlockwise,[0.0],1.000000
1829,1829,[3671],[23],[2023-07-02 10:59:44],[247.5],[88.5],[-200.5644989013672],[-200.5644989013672],[517680.0],anticlockwise,[0.0],1.000000
1830,1830,[3672],[23],[2023-07-02 10:59:44],[277.7640692640693],[29.287878787878785],[-214.1094970703125],[-200.5644989013672],[6195240.0],anticlockwise,[3.0468662777933763e-06],1.000000


# Save

In [13]:
df_catalogue_level2.to_csv(os.path.join(output_folder, output_filename), index=False)