In [1]:
import pandas as pd
import os
import time

import xmitgcm as xm
import swirl

import dask
from dask import delayed, compute
dask.config.set(scheduler='processes', num_workers=20)

<dask.config.set at 0x7fdeaa693390>

In [2]:
datapath = r"/storage/alplakes_test/geneva_200m/run/"
gridpath = r"/storage/alplakes_test/geneva_200m/metadata/"
ref_date = "2023-03-01 0:0:0"
dt_mitgcm_results = 32
endian = '>'
swirl_params_file_name = 'swirl_03'
model = 'geneva_200m'
output_folder = r'./'

In [3]:
def run_swirl(u_plot, v_plot, dx, dy, swirl_params_file):
    vortices = swirl.Identification(v=[u_plot, v_plot],
                                    grid_dx=[dx, dy],
                                    param_file=f'./swirl_params/{swirl_params_file}.param',
                                    verbose=False)
    vortices.run()
    return vortices

In [4]:
# Extract and compute numpy arrays BEFORE passing to dask (improves reading data time)
def load_input_data(ds_mitgcm, time_indices, depth_indices):
    dx = ds_mitgcm.dxC.values[0][0]
    dy = ds_mitgcm.dyC.values[0][0]
    dz = ds_mitgcm.drC.values
    uvel_data = ds_mitgcm['UVEL'].isel(time=time_indices, Z=depth_indices).fillna(0).values
    vvel_data = ds_mitgcm['VVEL'].isel(time=time_indices, Z=depth_indices).fillna(0).values
    wvel_data = ds_mitgcm['WVEL'].isel(time=time_indices, Zl=depth_indices).fillna(0).values

    return dx, dy, dz, uvel_data, vvel_data, wvel_data

In [5]:
def compute_ke_snapshot(uvel, vvel, wvel, dx, dy, dz):
    ke = 0.5 * (uvel ** 2 + vvel ** 2 + wvel ** 2) * dx * dy * dz  # This gives J per cell

    return ke / 1e6  # Convert to MJ

In [6]:
def translate_rotation_direction(eddy_orientation: int):
    return 'clockwise' if eddy_orientation == -1 else 'anticlockwise'

In [13]:
# Helper function to extract eddy info into a row
def extract_eddy_data(id_level0, indices_eddy, eddy, date, depth, dz, ke_grid_megajoules, surface_cell):
    vortex_indices = tuple(eddy.vortex_cells.astype(int))
    ke_eddy = ke_grid_megajoules[vortex_indices[0], vortex_indices[1]].sum()
    surface_area = len(eddy.vortex_cells[0]) * surface_cell

    return {
        'id': id_level0,
        'time_index': indices_eddy[0],
        'depth_index': indices_eddy[1],
        'eddy_index': indices_eddy[2],
        'date': date,
        'depth_[m]': depth,
        'xc': eddy.center[0],
        'yc': eddy.center[1],
        'surface_area_[m2]': float(surface_area),
        'volume_slice_[m3]': float(surface_area * dz),
        'rotation_direction': translate_rotation_direction(eddy.orientation),
        'kinetic_energy_[MJ]': float(ke_eddy),
        'i_eddy_cells': eddy.vortex_cells[0],
        'j_eddy_cells': eddy.vortex_cells[1]
    }

In [18]:
def run_swirl_and_create_lvl0(uvel, vvel, wvel, dx, dy, dz, swirl_params_file, id_level0, date, depth, t_index, d_index):
    eddies = run_swirl(uvel, vvel, dx, dy, swirl_params_file)
    if not eddies:  # empty list
        return pd.DataFrame()  # optionally: return with predefined columns
    
    ke_grid = compute_ke_snapshot(uvel, vvel, wvel, dx, dy, dz)
    eddy_rows = []
    for eddy_index in range(len(eddies)):
        indices_eddy = (t_index, d_index, eddy_index)
        row_data = extract_eddy_data(id_level0, indices_eddy, eddies[eddy_index], date, depth, dz, ke_grid, dx*dy)
        eddy_rows.append(row_data)

    return pd.concat([pd.DataFrame([row]) for row in eddy_rows], ignore_index=True)

In [23]:
def main():
    #---------------------------------
    print('Opening MITgcm results...')
    ds_mitgcm = xm.open_mdsdataset(
        datapath,
        grid_dir=gridpath,
        ref_date=ref_date,
        prefix='3Dsnaps',
        delta_t=dt_mitgcm_results,
        endian=endian)


    # ---------------------------------
    print('Detecting eddies and creating level 0 catalogue...')
    depth_indices = range(len(ds_mitgcm.Z.values))
    time_indices = range(24)

    start_opening_data = time.time()
    dx, dy, dz_array, uvel_data, vvel_data, wvel_data = load_input_data(ds_mitgcm, time_indices, depth_indices)
    print(f"Swirl opening data time: {time.time() - start_opening_data:.6f} seconds")

    tasks = {}
    for ti, t_idx in enumerate(time_indices):
        date = pd.Timestamp(ds_mitgcm.time.values[ti]).to_pydatetime()
        for di, d_idx in enumerate(depth_indices):
            depth = float(ds_mitgcm.Z.values[di])
            dz = dz_array[di]
            uvel = uvel_data[ti, di].T
            vvel = vvel_data[ti, di].T
            wvel = wvel_data[ti, di].T
            tasks[(t_idx, d_idx)] = delayed(run_swirl_and_create_lvl0)(uvel, vvel, wvel, dx, dy, dz, swirl_params_file_name, 0, date, depth, ti, di)

    # Compute all tasks in parallel
    start = time.time()
    results = compute(*tasks.values())
    print(f"Parallel execution time: {time.time() - start:.6f} seconds")

    # Create final DataFrame
    df_catalogue_level0 = pd.concat([row for row in results], ignore_index=True)

    lvl0_output_filename = f'{model}_{swirl_params_file_name}_day{ds_mitgcm.time.values[0]}_lvl0.csv'
    output_path = os.path.join(output_folder, lvl0_output_filename)

    # ---------------------------------
    print(f'Saving catalogue level 0 to {output_path}...')
    df_catalogue_level0.to_csv(output_path, index=False)

In [24]:
if __name__ == "__main__":
    from multiprocessing import freeze_support
    freeze_support()  # optional on Linux/macOS
    main()

Opening MITgcm results...
Detecting eddies and creating level 0 catalogue...


Process SpawnProcess-196:
Process SpawnProcess-195:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/leroquan@eawag.wroot.emp-eaw.ch/miniconda3/envs/swirl_toolbox/lib/python3.11/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/home/leroquan@eawag.wroot.emp-eaw.ch/miniconda3/envs/swirl_toolbox/lib/python3.11/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/home/leroquan@eawag.wroot.emp-eaw.ch/miniconda3/envs/swirl_toolbox/lib/python3.11/concurrent/futures/process.py", line 249, in _process_worker
    call_item = call_queue.get(block=True)
                ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/leroquan@eawag.wroot.emp-eaw.ch/miniconda3/envs/swirl_toolbox/lib/python3.11/multiprocessing/queues.py", line 102, in get
    with self._rlock:
  File "/home/leroquan@eawag.wroot.emp-eaw.ch/miniconda3/envs/swirl_toolbox/lib/python3.11/multiprocessing/synchronize.py", line 95, in __en

KeyboardInterrupt: 

In [25]:
#---------------------------------
print('Opening MITgcm results...')
ds_mitgcm = xm.open_mdsdataset(
    datapath,
    grid_dir=gridpath,
    ref_date=ref_date,
    prefix='3Dsnaps',
    delta_t=dt_mitgcm_results,
    endian=endian)


# ---------------------------------
print('Detecting eddies and creating level 0 catalogue...')
depth_indices = range(2)
time_indices = range(2)

start_opening_data = time.time()
dx, dy, dz_array, uvel_data, vvel_data, wvel_data = load_input_data(ds_mitgcm, time_indices, depth_indices)
print(f"Swirl opening data time: {time.time() - start_opening_data:.6f} seconds")

tasks = {}
for ti, t_idx in enumerate(time_indices):
    date = pd.Timestamp(ds_mitgcm.time.values[ti]).to_pydatetime()
    for di, d_idx in enumerate(depth_indices):
        depth = float(ds_mitgcm.Z.values[di])
        dz = dz_array[di]
        uvel = uvel_data[ti, di].T
        vvel = vvel_data[ti, di].T
        wvel = wvel_data[ti, di].T
        tasks[(t_idx, d_idx)] = delayed(run_swirl_and_create_lvl0)(uvel, vvel, wvel, dx, dy, dz, swirl_params_file_name, 0, date, depth, ti, di)

# Compute all tasks in parallel
start = time.time()
results = compute(*tasks.values())
print(f"Parallel execution time: {time.time() - start:.6f} seconds")

# Create final DataFrame
df_catalogue_level0 = pd.concat([row for row in results], ignore_index=True)

lvl0_output_filename = f'{model}_{swirl_params_file_name}_day{ds_mitgcm.time.values[0]}_lvl0.csv'
output_path = os.path.join(output_folder, lvl0_output_filename)

# ---------------------------------
print(f'Saving catalogue level 0 to {output_path}...')
df_catalogue_level0.to_csv(output_path, index=False)

Opening MITgcm results...
Detecting eddies and creating level 0 catalogue...
Swirl opening data time: 7.708533 seconds
Parallel execution time: 2.364203 seconds
Saving catalogue level 0 to ./geneva_200m_swirl_03_day2023-07-01T12:00:00.000000000_lvl0.csv...


In [26]:
df_catalogue_level0

Unnamed: 0,id,time_index,depth_index,eddy_index,date,depth_[m],xc,yc,surface_area_[m2],volume_slice_[m3],rotation_direction,kinetic_energy_[MJ],i_eddy_cells,j_eddy_cells
0,0,0,0,0,2023-07-01 12:00:00,-0.25,281.334862,44.655963,6040000.0,1510000.0,anticlockwise,0.002504,"[274.0, 274.0, 274.0, 274.0, 274.0, 275.0, 275...","[42.0, 44.0, 46.0, 47.0, 48.0, 41.0, 43.0, 44...."
1,0,0,0,1,2023-07-01 12:00:00,-0.25,256.322785,78.116034,4040000.0,1010000.0,clockwise,0.001366,"[251.0, 251.0, 251.0, 252.0, 252.0, 252.0, 252...","[74.0, 75.0, 82.0, 73.0, 74.0, 75.0, 76.0, 78...."
2,0,0,0,2,2023-07-01 12:00:00,-0.25,285.687166,15.275401,2720000.0,680000.0,clockwise,0.000716,"[281.0, 281.0, 281.0, 282.0, 282.0, 282.0, 282...","[15.0, 16.0, 17.0, 14.0, 15.0, 16.0, 17.0, 18...."
3,0,0,0,3,2023-07-01 12:00:00,-0.25,223.788452,76.421897,31560000.0,7890000.0,anticlockwise,0.068371,"[205.0, 205.0, 205.0, 205.0, 206.0, 206.0, 206...","[74.0, 75.0, 77.0, 79.0, 70.0, 71.0, 72.0, 73...."
4,0,0,1,0,2023-07-01 12:00:00,-0.7575,47.5,64.2,160000.0,81200.0,anticlockwise,0.0,"[47.0, 47.0, 48.0, 48.0]","[63.0, 64.0, 64.0, 65.0]"
5,0,0,1,1,2023-07-01 12:00:00,-0.7575,280.821918,45.278539,6200000.0,3146500.0,anticlockwise,0.005174,"[273.0, 273.0, 273.0, 274.0, 274.0, 274.0, 274...","[43.0, 46.0, 48.0, 42.0, 43.0, 44.0, 46.0, 47...."
6,0,0,1,2,2023-07-01 12:00:00,-0.7575,285.747126,15.390805,2720000.0,1380400.0,clockwise,0.00156,"[281.0, 281.0, 282.0, 282.0, 282.0, 282.0, 282...","[15.0, 16.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18...."
7,0,0,1,3,2023-07-01 12:00:00,-0.7575,256.288235,79.164706,3000000.0,1522500.0,clockwise,0.001549,"[252.0, 252.0, 252.0, 253.0, 253.0, 253.0, 253...","[76.0, 77.0, 80.0, 75.0, 76.0, 77.0, 78.0, 79...."
8,0,0,1,4,2023-07-01 12:00:00,-0.7575,224.191341,75.592867,32480000.0,16483600.0,anticlockwise,0.138797,"[205.0, 205.0, 205.0, 206.0, 206.0, 206.0, 206...","[74.0, 75.0, 77.0, 70.0, 71.0, 72.0, 73.0, 74...."
9,0,1,0,0,2023-07-01 12:59:44,-0.25,277.87232,46.710526,7880000.0,1970000.0,anticlockwise,0.002787,"[269.0, 269.0, 270.0, 270.0, 270.0, 270.0, 271...","[49.0, 50.0, 48.0, 49.0, 50.0, 51.0, 41.0, 42...."
