# Box Loader Testing

I need to test a few things about the steps in the `argo_box_loader` notebook. There are a few things that might make a difference by decreasing the size of an array, but I want to try here on a box that takes a lot less time to load.

In [1]:
import xarray as xr
import matplotlib.pyplot as plt
import numpy as np
import argopy
import scipy.ndimage as filter
import scipy
import matplotlib
import gsw
import traceback
from importlib import reload

import argopy
from argopy import DataFetcher as ArgoDataFetcher

argo_loader = ArgoDataFetcher(
    src="gdac", ftp="/swot/SUM05/dbalwada/Argo_sync", progress=True
)

In [2]:
import os
import sys
os.chdir('/home.ufs/amf2288/argo-intern/funcs')
sys.path.append(os.path.abspath('../funcs'))

import processing_funcs as prf

In [3]:
import dask
from dask.distributed import Client, LocalCluster

# Create a LocalCluster
cluster = LocalCluster(n_workers=64, threads_per_worker=1)

# Connect to the cluster
client = Client(cluster)

# Print the cluster information
print(cluster)

LocalCluster(53ba7671, 'tcp://127.0.0.1:38999', workers=64, threads=64, memory=0.98 TiB)


In [11]:
@dask.delayed
def get_box_delayed(*args, **kwargs):
    try:
        return prf.get_box(*args, **kwargs)
    except Exception as e:
        return type(e).__name__, str(e)

def get_box_dask(boxes_list, interp_step):
    
    boxes_list = [(box, interp_step) for box in boxes_list]
    
    # Create a list of delayed tasks
    tasks = [get_box_delayed(*args) for args in boxes_list]

    # Compute the results in parallel
    results = dask.compute(*tasks)
    
    errors=[]

    # Process the results as needed
    for n, result in enumerate(results):
        if isinstance(result, tuple) and isinstance(result[0], str):
            error_type, error_message = result
            print("Error in box {}: {} - {}".format([boxes_list[n][0][0],boxes_list[n][0][1],boxes_list[n][0][2],boxes_list[n][0][3]], error_type, error_message))
            errors.append([boxes_list[n][0][0],boxes_list[n][0][1],boxes_list[n][0][2],boxes_list[n][0][3], error_type, error_message])
        else:
            result.to_netcdf("/swot/SUM05/amf2288/sync-boxes/retry/lon:({},{})_lat:({},{})_ds_z.nc".format(boxes_list[n][0][0],boxes_list[n][0][1],boxes_list[n][0][2],boxes_list[n][0][3]))
            print("Saved box {} of {}".format(n+1, len(results)))
            
    return errors 

In [12]:
from typing import List, Tuple

#def generate_grid(min_lat: float, max_lat: float, min_lon: float, max_lon: float, step: float) -> List[Tuple[float, float, float, float]]:
def generate_grid(box, step):
    grid = []
    lon_min, lon_max, lat_min, lat_max = box[0],box[1],box[2],box[3]
    lat = lat_min
    while lat < lat_max:
        lon = lon_min
        while lon < lon_max:
            # Ensure the box does not exceed the specified bounds
            box_lat_max = min(lat + step, lat_max)
            box_lon_max = min(lon + step, lon_max)
            box = [lat, box_lat_max, lon, box_lon_max, box[4], box[5]]
            grid.append(box)
            lon += step
        lat += step
    return grid

In [13]:
def get_target_area(area, region_step, target_step, interp_step):
    
    regions = generate_grid(area, region_step)
    print('-' * 50)
    print("Cluster: {}".format(cluster))
    print('-' * 50)
    print("THE REGIONS ARE {}".format(regions))
    print('-' * 50)
    
    errors_list = []
    
    for n,region in enumerate(regions):
        boxes = generate_grid(region, target_step)
        print('-' * 50)
        print("REGION #{} OUT OF {} IS: {}".format(n+1, len(regions), region))
        print('-' * 50)
        print("THE BOXES IN REGION #{} ARE {}".format(n+1,boxes))
        print('-' * 50)

        errors = get_box_dask(boxes, interp_step)
        errors_list.append([errors])
        
        print('-' * 50)
        print("COMPLETED REGION #{} OUT OF {}".format(n+1,len(regions)))
        print('-' * 50)
        
    return errors_list

In [14]:
atl1 = [-75,25,-90,0,0,2000]
atl2 = [-75,25,0,90,0,2000]
ind1 = [25,120,-90,30,0,2000]
pac1 = [120,180,-90,0,0,2000]
pac2 = [120,180,0,90,0,2000]
pac3 = [-180,-75,-90,0,0,2000]
pac4 = [-180,-75,0,90,0,2000]

In [15]:
SO_error = [-40,15,-70,0,0,2000]

In [16]:
globe = [-180, 180, -90, 90, 0, 2000]

In [17]:
problem = [-130,-75,-90,10,0,2000]

In [18]:
area = problem
region_step = 50
box_step = 5
interp_step = 2
get_target_area(area, region_step, box_step, interp_step)

--------------------------------------------------
Cluster: LocalCluster(53ba7671, 'tcp://127.0.0.1:38999', workers=64, threads=64, memory=0.98 TiB)
--------------------------------------------------
THE REGIONS ARE [[-90, -40, -130, -80, 0, 2000], [-90, -40, -80, -75, 0, 2000], [-40, 10, -130, -80, 0, 2000], [-40, 10, -80, -75, 0, 2000]]
--------------------------------------------------
--------------------------------------------------
REGION #1 OUT OF 4 IS: [-90, -40, -130, -80, 0, 2000]
--------------------------------------------------
THE BOXES IN REGION #1 ARE [[-130, -125, -90, -85, 0, 2000], [-130, -125, -85, -80, 0, 2000], [-130, -125, -80, -75, 0, 2000], [-130, -125, -75, -70, 0, 2000], [-130, -125, -70, -65, 0, 2000], [-130, -125, -65, -60, 0, 2000], [-130, -125, -60, -55, 0, 2000], [-130, -125, -55, -50, 0, 2000], [-130, -125, -50, -45, 0, 2000], [-130, -125, -45, -40, 0, 2000], [-125, -120, -90, -85, 0, 2000], [-125, -120, -85, -80, 0, 2000], [-125, -120, -80, -75, 0, 20

  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)


Error in box [-130, -125, -90, -85]: DataNotFound - 'No data found for: x=-130.00/-125.00;y=-90.00/-85.00'
Error in box [-130, -125, -85, -80]: DataNotFound - 'No data found for: x=-130.00/-125.00;y=-85.00/-80.00'
Error in box [-130, -125, -80, -75]: DataNotFound - 'No data found for: x=-130.00/-125.00;y=-80.00/-75.00'
Saved box 4 of 100
Saved box 5 of 100
Saved box 6 of 100
Saved box 7 of 100
Saved box 8 of 100
Saved box 9 of 100
Saved box 10 of 100
Error in box [-125, -120, -90, -85]: DataNotFound - 'No data found for: x=-125.00/-120.00;y=-90.00/-85.00'
Error in box [-125, -120, -85, -80]: DataNotFound - 'No data found for: x=-125.00/-120.00;y=-85.00/-80.00'
Error in box [-125, -120, -80, -75]: DataNotFound - 'No data found for: x=-125.00/-120.00;y=-80.00/-75.00'
Saved box 14 of 100
Saved box 15 of 100
Saved box 16 of 100
Saved box 17 of 100
Saved box 18 of 100
Saved box 19 of 100
Saved box 20 of 100
Error in box [-120, -115, -90, -85]: DataNotFound - 'No data found for: x=-120.00/-1



Saved box 1 of 10
Saved box 2 of 10
Saved box 3 of 10
Saved box 4 of 10
Saved box 5 of 10
Saved box 6 of 10
Error in box [-80, -75, -10, -5]: AttributeError - 'NoneType' object has no attribute 'N_PROF'
Error in box [-80, -75, -5, 0]: DataNotFound - 'Empty dataset, no data to transform !'
Saved box 9 of 10
Saved box 10 of 10
--------------------------------------------------
COMPLETED REGION #4 OUT OF 4
--------------------------------------------------


[[[[-130,
    -125,
    -90,
    -85,
    'DataNotFound',
    "'No data found for: x=-130.00/-125.00;y=-90.00/-85.00'"],
   [-130,
    -125,
    -85,
    -80,
    'DataNotFound',
    "'No data found for: x=-130.00/-125.00;y=-85.00/-80.00'"],
   [-130,
    -125,
    -80,
    -75,
    'DataNotFound',
    "'No data found for: x=-130.00/-125.00;y=-80.00/-75.00'"],
   [-125,
    -120,
    -90,
    -85,
    'DataNotFound',
    "'No data found for: x=-125.00/-120.00;y=-90.00/-85.00'"],
   [-125,
    -120,
    -85,
    -80,
    'DataNotFound',
    "'No data found for: x=-125.00/-120.00;y=-85.00/-80.00'"],
   [-125,
    -120,
    -80,
    -75,
    'DataNotFound',
    "'No data found for: x=-125.00/-120.00;y=-80.00/-75.00'"],
   [-120,
    -115,
    -90,
    -85,
    'DataNotFound',
    "'No data found for: x=-120.00/-115.00;y=-90.00/-85.00'"],
   [-120,
    -115,
    -85,
    -80,
    'DataNotFound',
    "'No data found for: x=-120.00/-115.00;y=-85.00/-80.00'"],
   [-120,
    -115,
    -80,
   

# Data Loading Issues

In [None]:
ds = xr.open_dataset('/swot/SUM05/amf2288/sync-boxes/test_error.nc')

In [None]:
fig = plt.figure(figsize=(5, 8))
ax = plt.axes(projection=ccrs.PlateCarree())
ax.coastlines()
ax.gridlines(draw_labels=True)

ax.scatter(ds.LONGITUDE,ds.LATITUDE,transform=ccrs.PlateCarree(),color='tab:blue',s=1)