In [None]:
import pandas as pd
import numpy as np
import os

In [None]:
model = 'geneva_200m'
params_file = 'swirl_03'
output_folder = r'../Outputs'

lvl1_filename = f'{model}_{params_file}_day1_lvl1.csv'
output_filename = f'{model}_{params_file}_day1_lvl2a.csv'

# Import catalogue lvl1

In [None]:
level1_csv_path = os.path.join(output_folder, lvl1_filename)
level1_data = pd.read_csv(level1_csv_path)

# Create catalogue lvl2

In [None]:
def ranges_intersect(min1, max1, min2, max2):
    return max1 >= min2 and max2 >= min1

In [None]:
def compute_center_distance(df1, df2):
    return np.sqrt((df1['xc_mean'] - df2['xc_mean'])**2 + (df1['yc_mean'] - df2['yc_mean'])**2)

In [None]:
# Function to identify the different depths of an eddy
def track_eddy(level1_data, id_eddy, idx_already_aggregated, dist_threshold, time_threshold):
    mask = (
        (~level1_data['id'].isin(idx_already_aggregated)) &
        level1_data.apply(lambda row: ranges_intersect(row['depth_min_[m]'],row['depth_max_[m]'],level1_data.at[id_eddy, 'depth_min_[m]'],level1_data.at[id_eddy, 'depth_max_[m]']), axis=1) &
        (level1_data['rotation_direction'] == level1_data.at[id_eddy, 'rotation_direction']) &
        (compute_center_distance(level1_data, level1_data.iloc[id_eddy]) < dist_threshold * 3) # First a rough distance threshold
    )
    
    filtered_eddies = level1_data.loc[mask]
    filtered_eddies = filtered_eddies.copy()
    filtered_eddies['parsed_date'] = pd.to_datetime(filtered_eddies['date'])
    
    sorted_by_date = filtered_eddies.sort_values('parsed_date', ascending=True).reset_index(drop=False)
    
    aggregated_eddy = sorted_by_date.iloc[[0]]
    for i in range(1,len(sorted_by_date)):
        distance_criteria = compute_center_distance(sorted_by_date.iloc[i], aggregated_eddy.iloc[-1]) < dist_threshold # and now the refined distance threshold
        time_criteria = sorted_by_date.iloc[i]['time_index'] - sorted_by_date.iloc[i-1]['time_index'] <= time_threshold
        
        if ~time_criteria:     
            break
            
        if distance_criteria:
            aggregated_eddy = pd.concat([aggregated_eddy, sorted_by_date.iloc[[i]]], ignore_index=True)
    
    return aggregated_eddy

In [None]:
# Parameters
dist_threshold = 5 # in number of cells
time_threshold = 2 # in number of timestep
timestep_in_seconds = 3600

# Main loop
eddy_rows_lvl2a= []  # Collect aggregated rows here
id_level2a = 0
idx_already_aggregated = set()
for idx in level1_data['id']: 
    if idx in idx_already_aggregated:
        continue
        
    aggregated_data = track_eddy(level1_data, idx, idx_already_aggregated, dist_threshold, time_threshold)
    lifespan = (timestep_in_seconds + (pd.to_datetime(aggregated_data['date'].iloc[-1]) - pd.to_datetime(aggregated_data['date'].iloc[0])).total_seconds()) / 3600

    row = {
        'id': id_level2a,
        'id_lvl1': aggregated_data['id'].tolist(),
        'time_indices(t)': aggregated_data['time_index'].tolist(),
        'dates(t)': aggregated_data['date'].tolist(),
        'xc(t)': aggregated_data['xc_mean'].tolist(),
        'yc(t)': aggregated_data['yc_mean'].tolist(),
        'depth_min(t)_[m]': aggregated_data['depth_min_[m]'].tolist(),
        'depth_max(t)_[m]': aggregated_data['depth_max_[m]'].tolist(),
        'volume(t)_[m3]': aggregated_data['volume_[m3]'].tolist(),
        'rotation_direction': aggregated_data.at[0, 'rotation_direction'],
        'kinetic_energy(t)_[MJ]': aggregated_data['kinetic_energy_[MJ]'].tolist(),
        'lifespan_[h]': lifespan
    }

    eddy_rows_lvl2a.append(row)
    idx_already_aggregated.update(aggregated_data['id'].tolist())
    id_level2a += 1

# Create the final DataFrame using pd.concat
df_catalogue_level2 = pd.concat([pd.DataFrame([row]) for row in eddy_rows_lvl2a], ignore_index=True)

In [None]:
df_catalogue_level2

# Save

In [None]:
df_catalogue_level2.to_csv(os.path.join(output_folder, output_filename), index=False)