# Run through datasets

Run a list of datasets, generating log files and data output files in data/

In [1]:
# This cell imports for development work
import pathlib
import sys
import warnings
warnings.simplefilter("ignore", category=Warning)
import logging
_log = logging.getLogger(__name__)


In [2]:
from seagliderOG1 import readers, writers, plotters, utilities, tools
from seagliderOG1 import convertOG1
from seagliderOG1 import vocabularies
import xarray as xr
import os

## Load Seaglider data in basestation format

Test case build on a file which was written in 2013 by basestation v2.8 into nodc format template v0.9.

This is the same process as above (contained in `convertOG1.convert_to_OG1`), but breaking out to access the sub-functions individually.  This way you can inspect the process as it goes along, and also inspect some of the data which did not make it into the final dataset:

- `sg_cal` - details from `sg_calib_constants.m`, 
- `dc_log` - log events, and 
- `dc_other` - random other variables that were in the basestation file).

In [None]:
script_dir = pathlib.Path().parent.absolute()
parent_dir = script_dir.parents[0]
sys.path.append(str(parent_dir))
sys.path.append(str(parent_dir) + '/seagliderOG1')
print(parent_dir)
print(sys.path)

# Specify the path for writing datafiles
data_path = os.path.join(parent_dir, 'data')

input_locations = [
    # Either Iceland, Faroes or RAPID/MOCHA
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/005/20090829/", # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/005/20080606/", # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/005/20081106/", # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/012/20070831/", # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/014/20080214/",  # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/014/20080222/", # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/016/20061112/",  # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/016/20090605/", # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/016/20071113/", # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/016/20080607/",  # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/033/20100518/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/033/20100903/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/101/20081108/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/101/20061112/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/101/20070609/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/102/20061112/",
    # Labrador Sea
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/015/20040924/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/014/20040924/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/008/20031002/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/004/20031002/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/016/20050406/",
    # RAPID/MOCHA
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/033/20100729/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/034/20110128/",
    # RAPID/MOCHA

]

for input_loc in input_locations:
    sg_num = input_loc.split('/')[8]
    start_date = input_loc.split('/')[9]
    # Name the log file - note that this is specific to the input locations above!
    logf = 'sg'+sg_num +'_' + start_date+'.log'
    logf_with_path = os.path.join(data_path, logf)

    # Set up logging
    logging.basicConfig(
        filename=logf_with_path, 
        encoding='utf-8',
        format="%(asctime)s %(levelname)-8s %(funcName)s %(message)s",
        filemode="w",
        level=logging.INFO,
        datefmt="%Y%m%dT%H%M%S",
        force=True,
        )
    _log.info('convertOG1.process_and_save_data')
    _log.info('Processing data from: %s', input_loc)
    # Example usage
    ds_all = convertOG1.process_and_save_data(input_loc, output_dir=data_path, save=True,  run_quietly=True)
    _log.info('Finished processing data from: %s', input_loc)

/Users/eddifying/Cloudfree/gitlab-cloudfree/seagliderOG1
['/Users/eddifying/micromamba/envs/messfern_env/lib/python313.zip', '/Users/eddifying/micromamba/envs/messfern_env/lib/python3.13', '/Users/eddifying/micromamba/envs/messfern_env/lib/python3.13/lib-dynload', '', '/Users/eddifying/Cloudfree/gitlab-cloudfree/seagliderOG1/venv/lib/python3.13/site-packages', '/Users/eddifying/Cloudfree/gitlab-cloudfree/seagliderOG1', '/Users/eddifying/Cloudfree/gitlab-cloudfree/seagliderOG1/seagliderOG1']
Running the directory: https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/033/20100729/
TypeError Invalid value for attr 'calibration_parameters': {'t_g': 0.00435754738, 't_h': 0.000629403181, 't_i': 2.43066775e-05, 't_j': 2.62239863e-06, 'c_g': -10.3320573, 'c_h': 1.20947434, 'c_i': -0.00161928789, 'c_j': 0.000218257457, 'cpcor': -9.57e-08, 'ctcor': 3.25e-06}. For serialization to netCDF files, its value must be of one of the following types: str, Number, ndarray, number, list, tuple, bytes
