# Box Loader Testing

I need to test a few things about the steps in the `argo_box_loader` notebook. There are a few things that might make a difference by decreasing the size of an array, but I want to try here on a box that takes a lot less time to load.

In [1]:
import xarray as xr
import matplotlib.pyplot as plt
import numpy as np
import argopy
import scipy.ndimage as filter
import scipy
import matplotlib
import gsw

import argopy
from argopy import DataFetcher as ArgoDataFetcher

argo_loader = ArgoDataFetcher(
    src="gdac", ftp="/swot/SUM05/dbalwada/Argo_sync", progress=True
)

In [2]:
import os
import sys
os.chdir('/home.ufs/amf2288/argo-intern/funcs')
sys.path.append(os.path.abspath('../funcs'))

import processing_funcs as prf

In [13]:
"""
import dask
from dask import delayed
from dask.distributed import Client

client = Client()
"""

from dask.distributed import Client, LocalCluster

# Create a LocalCluster
cluster = LocalCluster(n_workers=64, threads_per_worker=1)

# Connect to the cluster
client = Client(cluster)

# Print the cluster information
print(cluster)

Perhaps you already have a cluster running?
Hosting the HTTP server on port 41738 instead


LocalCluster(8aa8d6a2, 'tcp://127.0.0.1:36133', workers=64, threads=64, memory=0.98 TiB)


In [4]:
#box1 = [-180,-179,0,1,0,2001]
box2 = [-180,-179,1,2,0,2001]
box3 = [-180,-179,2,3,0,2001]
box4 = [-180,-179,3,4,0,2001]
box5 = [-180,-179,4,5,0,2001]

In [5]:
# Example arguments for your function
boxes_list = [
    #(box1,2),
    (box2),
    (box3),
    (box4),
    (box5)
]

In [6]:
@dask.delayed
def get_box_delayed(*args, **kwargs):
    try:
        return prf.get_box(*args, **kwargs)
    except Exception as e:
        return type(e).__name__, str(e)

def get_box_dask(boxes_list, interp_step):
    
    boxes_list = [(box, interp_step) for box in boxes_list]
    
    # Create a list of delayed tasks
    tasks = [get_box_delayed(*args) for args in boxes_list]

    # Compute the results in parallel
    results = dask.compute(*tasks)
    
    errors=[]

    # Process the results as needed
    for n, result in enumerate(results):
        if isinstance(result, tuple) and isinstance(result[0], str):
            error_type, error_message = result
            print("Error in box {}: {} - {}".format(boxes_list[n][0][0],boxes_list[n][0][1],boxes_list[n][0][2],boxes_list[n][0][3], error_type, error_message))
            errors.append([boxes_list[n][0][0],boxes_list[n][0][1],boxes_list[n][0][2],boxes_list[n][0][3], error_type, error_message])
        else:
            result.to_netcdf("/swot/SUM05/amf2288/sync-boxes/atl_basin/lon:({},{})_lat:({},{})_ds_z.nc".format(boxes_list[n][0][0],boxes_list[n][0][1],boxes_list[n][0][2],boxes_list[n][0][3]))
            
    return errors 


In [7]:
#get_box_dask(boxes_list, 2)

In [8]:
from typing import List, Tuple

#def generate_grid(min_lat: float, max_lat: float, min_lon: float, max_lon: float, step: float) -> List[Tuple[float, float, float, float]]:
def generate_grid(box, step):
    grid = []
    lon_min, lon_max, lat_min, lat_max = box[0],box[1],box[2],box[3]
    lat = lat_min
    while lat < lat_max:
        lon = lon_min
        while lon < lon_max:
            # Ensure the box does not exceed the specified bounds
            box_lat_max = min(lat + step, lat_max)
            box_lon_max = min(lon + step, lon_max)
            box = [lat, box_lat_max, lon, box_lon_max, box[4], box[5]]
            grid.append(box)
            lon += step
        lat += step
    return grid

In [9]:
generate_grid([0,90,0,90,0,2001], 20)

[[0, 20, 0, 20, 0, 2001],
 [0, 20, 20, 40, 0, 2001],
 [0, 20, 40, 60, 0, 2001],
 [0, 20, 60, 80, 0, 2001],
 [0, 20, 80, 90, 0, 2001],
 [20, 40, 0, 20, 0, 2001],
 [20, 40, 20, 40, 0, 2001],
 [20, 40, 40, 60, 0, 2001],
 [20, 40, 60, 80, 0, 2001],
 [20, 40, 80, 90, 0, 2001],
 [40, 60, 0, 20, 0, 2001],
 [40, 60, 20, 40, 0, 2001],
 [40, 60, 40, 60, 0, 2001],
 [40, 60, 60, 80, 0, 2001],
 [40, 60, 80, 90, 0, 2001],
 [60, 80, 0, 20, 0, 2001],
 [60, 80, 20, 40, 0, 2001],
 [60, 80, 40, 60, 0, 2001],
 [60, 80, 60, 80, 0, 2001],
 [60, 80, 80, 90, 0, 2001],
 [80, 90, 0, 20, 0, 2001],
 [80, 90, 20, 40, 0, 2001],
 [80, 90, 40, 60, 0, 2001],
 [80, 90, 60, 80, 0, 2001],
 [80, 90, 80, 90, 0, 2001]]

In [10]:
generate_grid([0,20,0,20,0,2001], 5)

[[0, 5, 0, 5, 0, 2001],
 [0, 5, 5, 10, 0, 2001],
 [0, 5, 10, 15, 0, 2001],
 [0, 5, 15, 20, 0, 2001],
 [5, 10, 0, 5, 0, 2001],
 [5, 10, 5, 10, 0, 2001],
 [5, 10, 10, 15, 0, 2001],
 [5, 10, 15, 20, 0, 2001],
 [10, 15, 0, 5, 0, 2001],
 [10, 15, 5, 10, 0, 2001],
 [10, 15, 10, 15, 0, 2001],
 [10, 15, 15, 20, 0, 2001],
 [15, 20, 0, 5, 0, 2001],
 [15, 20, 5, 10, 0, 2001],
 [15, 20, 10, 15, 0, 2001],
 [15, 20, 15, 20, 0, 2001]]

In [None]:
def get_target_box(area, region_step, target_step, interp_step):
    
    regions = generate_grid(area, region_step)
    print('-' * 50)
    print("THE REGIONS ARE {}".format(regions))
    
    errors_list = []
    
    for n,region in enumerate(regions):
        boxes = generate_grid(region, target_step)
        print('-' * 50)
        print("REGION #1{} IS: {}".format(n+1, region))
        print('-' * 50)
        print("THE BOXES IN REGION #{} ARE {}".format(n+1,boxes))
        print('-' * 50)

        errors = get_box_dask(boxes, interp_step)
        errors_list.append([errors])
        
        print('-' * 50)
        print("COMPLETED REGION #{} OUT OF {}".format(n+1,len(regions)))
        print('-' * 50)
        
    return errors_list

Process Dask Worker process (from Nanny):
Traceback (most recent call last):
  File "/home.ufs/amf2288/mambaforge-pypy3/envs/Argo_Nov_24/lib/python3.12/asyncio/runners.py", line 118, in run
    return self._loop.run_until_complete(task)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home.ufs/amf2288/mambaforge-pypy3/envs/Argo_Nov_24/lib/python3.12/asyncio/base_events.py", line 686, in run_until_complete
    return future.result()
           ^^^^^^^^^^^^^^^
  File "/home.ufs/amf2288/mambaforge-pypy3/envs/Argo_Nov_24/lib/python3.12/site-packages/distributed/nanny.py", line 984, in run
    await worker.finished()
  File "/home.ufs/amf2288/mambaforge-pypy3/envs/Argo_Nov_24/lib/python3.12/site-packages/distributed/core.py", line 491, in finished
    await self._event_finished.wait()
  File "/home.ufs/amf2288/mambaforge-pypy3/envs/Argo_Nov_24/lib/python3.12/asyncio/locks.py", line 212, in wait
    await fut
asyncio.exceptions.CancelledError

During handling of the above exception, a

to xarray complete


2025-01-03 09:09:47,916 - distributed.worker - ERROR - 
Traceback (most recent call last):
  File "/home.ufs/amf2288/mambaforge-pypy3/envs/Argo_Nov_24/lib/python3.12/site-packages/distributed/nanny.py", line 984, in run
    await worker.finished()
  File "/home.ufs/amf2288/mambaforge-pypy3/envs/Argo_Nov_24/lib/python3.12/site-packages/distributed/core.py", line 491, in finished
    await self._event_finished.wait()
  File "/home.ufs/amf2288/mambaforge-pypy3/envs/Argo_Nov_24/lib/python3.12/asyncio/locks.py", line 212, in wait
    await fut
asyncio.exceptions.CancelledError

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home.ufs/amf2288/mambaforge-pypy3/envs/Argo_Nov_24/lib/python3.12/site-packages/distributed/utils.py", line 806, in wrapper
    return await func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home.ufs/amf2288/mambaforge-pypy3/envs/Argo_Nov_24/lib/python3.12/site-packages/distributed/work

to xarray complete
point to profile complete
interpolation complete
adding spice complete
adding MLD complete
to xarray complete
point to profile complete
interpolation complete
adding spice complete
adding MLD complete


In [12]:
box = [-75,25,-90,90,0,2000]
region_step = 40
target_step = 5
interp_step = 2
get_target_box(box, region_step, target_step, interp_step)

------------------------------
THE REGIONS ARE [[-90, -50, -75, -35, 0, 2000], [-90, -50, -35, 5, 0, 2000], [-90, -50, 5, 25, 0, 2000], [-50, -10, -75, -35, 0, 2000], [-50, -10, -35, 5, 0, 2000], [-50, -10, 5, 25, 0, 2000], [-10, 30, -75, -35, 0, 2000], [-10, 30, -35, 5, 0, 2000], [-10, 30, 5, 25, 0, 2000], [30, 70, -75, -35, 0, 2000], [30, 70, -35, 5, 0, 2000], [30, 70, 5, 25, 0, 2000], [70, 90, -75, -35, 0, 2000], [70, 90, -35, 5, 0, 2000], [70, 90, 5, 25, 0, 2000]]
------------------------------
THE BOXES IN REGION #1 ARE [[-75, -70, -90, -85, 0, 2000], [-75, -70, -85, -80, 0, 2000], [-75, -70, -80, -75, 0, 2000], [-75, -70, -75, -70, 0, 2000], [-75, -70, -70, -65, 0, 2000], [-75, -70, -65, -60, 0, 2000], [-75, -70, -60, -55, 0, 2000], [-75, -70, -55, -50, 0, 2000], [-70, -65, -90, -85, 0, 2000], [-70, -65, -85, -80, 0, 2000], [-70, -65, -80, -75, 0, 2000], [-70, -65, -75, -70, 0, 2000], [-70, -65, -70, -65, 0, 2000], [-70, -65, -65, -60, 0, 2000], [-70, -65, -60, -55, 0, 2000], [-7

  return bound(*args, **kwds)


KeyboardInterrupt: 

In [None]:
cleint.close()