# Load Argo data
This notebook loads Argo Data from the Labrador Sea in between a specified time frame as an xarray dataset and saves it as an nc-file. The data loaded contains profiles of absolute salinity (SA), conservative temperatur (CT) and potential vorticity (PV) as a function of pressure sorted by depth levels and profile number. First the dataset for the whole period (2002-2023) is created, secondly a subset with different end and start dates is created. This subset uses the time span used in Holte & Straneo (2017) and was used for comparison.

In [39]:
# install argopy libary from github if necessary
!pip install git+https://github.com/euroargodev/argopy.git@master

Collecting git+https://github.com/euroargodev/argopy.git@master
  Cloning https://github.com/euroargodev/argopy.git (to revision master) to c:\users\schul\appdata\local\temp\pip-req-build-96nd3esy
  Resolved https://github.com/euroargodev/argopy.git to commit 31451cffb5c9e140fa3be681f98edf18dc732bd3
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'


  Running command git clone --filter=blob:none --quiet https://github.com/euroargodev/argopy.git 'C:\Users\schul\AppData\Local\Temp\pip-req-build-96nd3esy'


In [40]:
import os
current_directory = os.getcwd()
project_root = os.path.abspath(os.path.join(current_directory, '..', '..'))
datapath = os.path.join(project_root, 'data')

#--------------- import libaries ---------------------------------------------
import xarray as xr
import argopy
argopy.set_options(mode='research') # to only allow for qualitifly high data
from argopy import DataFetcher as ArgoDataFetcher


#--------------- SET VARIABLES -----------------------------------------------
# Set time range
start_year = 2002
end_year   = 2023

# Coordinates of Labrador Sea 
Lon0 = -66
Lon1 = -44
Lat0 = 45
Lat1 = 68
# maximum water depth
max_depth = 2000

# set file name
filename = 'LabSea_Argo_2002_2023.nc'
#-----------------------------------------------------------------------------

# Create an empty xarray dataset to store the data
ag = None

for year in range(start_year, end_year+1):

    print(f"Fetching data for {year}")
    
    for month in range(1, 13):
        month_start = f"{year}-{month:02d}-01"
        month_end = f"{year}-{month:02d}-28" if month == 2 else f"{year}-{month:02d}-30"
        
         # Load Argo data for the current month and region
        fetcher = ArgoDataFetcher(mode='research', timeout=50000)

        ds_month = fetcher.region([Lon0, Lon1, Lat0, Lat1, 0, max_depth, month_start, month_end]).to_xarray()

        if ag is None:
            ag = ds_month.argo.point2profile()
            ag.argo.teos10(['SA', 'CT', 'PV'])
        else:
            ag_points = ds_month.argo.point2profile()
            ag_points.argo.teos10(['SA', 'CT', 'PV'])
            ag = xr.concat([ag, ag_points], dim='N_PROF')

# Print the final dataset to check variables
print(ag)

Fetching data for 2002
Fetching data for 2003
Fetching data for 2004
Fetching data for 2005
Fetching data for 2006
Fetching data for 2007
Fetching data for 2008
Fetching data for 2009
Fetching data for 2010
Fetching data for 2011
Fetching data for 2012
Fetching data for 2013
Fetching data for 2014
Fetching data for 2015
Fetching data for 2016
Fetching data for 2017
Fetching data for 2018
Fetching data for 2019


  N2 = ((g_local**2) / (specvol_mid * db_to_pa * dp))


Fetching data for 2020
Fetching data for 2021
Fetching data for 2022
Fetching data for 2023
<xarray.Dataset> Size: 1GB
Dimensions:          (N_LEVELS: 1123, N_PROF: 23079)
Coordinates:
  * N_LEVELS         (N_LEVELS) int32 4kB 0 1 2 3 4 ... 1118 1119 1120 1121 1122
  * N_PROF           (N_PROF) int32 92kB 0 3 1 4 2 5 0 1 ... 4 8 19 20 21 22 9
    LATITUDE         (N_PROF) float64 185kB 50.82 50.06 51.87 ... 56.68 58.47
    LONGITUDE        (N_PROF) float64 185kB -45.41 -46.42 ... -51.72 -57.24
    TIME             (N_PROF) datetime64[ns] 185kB 2002-01-04T08:57:08 ... 20...
Data variables:
    CYCLE_NUMBER     (N_PROF) int32 92kB 21 21 22 22 23 23 ... 80 10 11 12 13 81
    DIRECTION        (N_PROF) <U1 92kB 'A' 'A' 'A' 'A' 'A' ... 'A' 'A' 'A' 'A'
    PLATFORM_NUMBER  (N_PROF) int32 92kB 4900192 4900193 ... 7901027 6904231
    PRES             (N_PROF, N_LEVELS) float32 104MB 6.7 16.2 26.1 ... nan nan
    PRES_ERROR       (N_PROF, N_LEVELS) float32 104MB 2.4 2.4 2.4 ... 2.4 2.4
    PSAL 

In [42]:
import numpy as np

# Ensure all dataset attributes are compatible with NetCDF
for attr_key, attr_value in ag.attrs.items():
    if not isinstance(attr_value, (str, int, float, np.ndarray, list, tuple)):
        # Convert to string
        ag.attrs[attr_key] = str(attr_value)
        
# save dataset as netcdf file
ag.to_netcdf(datapath + '//' + filename)

# Subdataset

In [53]:
import time
#--------------- SET VARIABLES -----------------------------------------------
# Set time range
# set time to only load dataset for reproducing Holte & Straneo (2017) 
# timespan set to '2002-03-01' to '2016-04-30' below
start_year2 = 2002
end_year2   = 2016

# Coordinates of Labrador Sea 
Lon0 = -66
Lon1 = -44
Lat0 = 45
Lat1 = 68
# maximum water depth
max_depth = 2000

# set file name
filename2 = 'LabSea_Argo_2002_2016.nc'

#-----------------------------------------------------------------------------

def fetch_with_retry(fetcher, region_params, max_retries=5, delay=3):
    """Fetches data with retry handling. (If running into Server timeouts)"""
    for attempt in range(max_retries):
        try:
            return fetcher.region(region_params).to_xarray()
        except Exception as e:
            print(f"Attempt {attempt + 1} failed with error: {e}")
            if attempt < max_retries - 1:
                print(f"Retrying in {delay} seconds...")
                time.sleep(delay)
            else:
                print("Max retries reached. Skipping this request.")
                return None


# Create an empty array
ag2 = None

for year in range(start_year2, end_year2 + 1):
    print(f"Fetching data for {year}")

    for month in range(1, 13):
        # Skip months before March in the start year
        if year == start_year2 and month < 3:
            continue
        # Skip months after April in the end year
        if year == end_year2 and month > 4:
            continue

        # Define start and end dates for each month
        month_start = f"{year}-{month:02d}-01"
        month_end = f"{year}-{month:02d}-28" if month == 2 else f"{year}-{month:02d}-30"

        # Load Argo data for the current month and region
        fetcher = ArgoDataFetcher(mode='research', timeout=500000)
        #ds_month = fetcher.region([Lon0, Lon1, Lat0, Lat1, 0, max_depth, month_start, month_end]).to_xarray()
        ds_month = fetch_with_retry(fetcher, [Lon0, Lon1, Lat0, Lat1, 0, max_depth, month_start, month_end])
        
        # Process and concatenate data
        if ds_month is not None:
            # Process and concatenate data
            if ag2 is None:
                ag2 = ds_month.argo.point2profile()
                ag2.argo.teos10(['SA', 'CT', 'PV'])
            else:
                ag_points = ds_month.argo.point2profile()
                ag_points.argo.teos10(['SA', 'CT', 'PV'])
                ag2 = xr.concat([ag2, ag_points], dim='N_PROF')

# Print the final dataset to check variables

print(ag2)

Fetching data for 2002
Attempt 1 failed with error: Server disconnected
Retrying in 3 seconds...
Attempt 1 failed with error: Server disconnected
Retrying in 3 seconds...
Fetching data for 2003
Attempt 1 failed with error: Server disconnected
Retrying in 3 seconds...
Attempt 1 failed with error: Server disconnected
Retrying in 3 seconds...
Attempt 1 failed with error: Server disconnected
Retrying in 3 seconds...
Attempt 1 failed with error: Server disconnected
Retrying in 3 seconds...
Attempt 1 failed with error: Server disconnected
Retrying in 3 seconds...
Fetching data for 2004
Attempt 1 failed with error: Server disconnected
Retrying in 3 seconds...
Attempt 1 failed with error: Server disconnected
Retrying in 3 seconds...
Fetching data for 2005
Attempt 1 failed with error: Server disconnected
Retrying in 3 seconds...
Fetching data for 2006
Fetching data for 2007
Attempt 1 failed with error: Server disconnected
Retrying in 3 seconds...
Attempt 1 failed with error: Server disconnected

In [None]:
#save dataset as netcdf file

# Ensure all dataset attributes are compatible with NetCDF
for attr_key, attr_value in ag2.attrs.items():
    if not isinstance(attr_value, (str, int, float, np.ndarray, list, tuple)):
        # Convert to string
        ag2.attrs[attr_key] = str(attr_value)
        
ag2.to_netcdf(datapath + '//' + filename2)