# Convert an entire seaglider mission

Given an online location or folder on your computer, process a full mission of basestation netCDF files into a single seagliderOG1 mission file.  

- Provide the input location (directory of `p*.nc` files) and output location (where the netCDF file and log file will be saved)
- Optional: Provide details of contributing authors (e.g., who created the OG1 format file) to be appended to the output file's attributes

In [1]:
# This cell imports for development work
import pathlib
import sys
import warnings
warnings.simplefilter("ignore", category=Warning)

In [2]:
from seagliderOG1 import readers, writers, plotters, utilities, tools
from seagliderOG1 import convertOG1
from seagliderOG1 import vocabularies
import xarray as xr
import os
import datetime
import logging
_log = logging.getLogger(__name__)

## Specify paths for inputs/outputs

In [3]:
script_dir = pathlib.Path().parent.absolute()
parent_dir = script_dir.parents[0]
sys.path.append(str(parent_dir))
sys.path.append(str(parent_dir) + '/seagliderOG1')
print(parent_dir)

# Specify the path for writing datafiles
data_path = os.path.join(parent_dir, 'data')


# Provide a list of input locations
input_locations = [
    # Either Iceland, Faroes or RAPID/MOCHA
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/005/20090829/", # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/005/20080606/", # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/005/20081106/", # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/012/20070831/", # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/014/20080214/",  # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/014/20080222/", # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/016/20061112/",  # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/016/20090605/", # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/016/20071113/", # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/016/20080607/",  # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/033/20100518/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/033/20100903/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/101/20081108/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/101/20061112/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/101/20070609/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/102/20061112/",
    # Labrador Sea
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/015/20040924/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/014/20040924/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/008/20031002/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/004/20031002/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/016/20050406/",
    # RAPID/MOCHA
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/033/20100729/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/034/20110128/",
    # RAPID/MOCHA
    "/Users/eddifying/Nextcloud/Shared/data-shared/data-whittard-seaglider/dg042_whittard_data"

]

input_locations = [
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/033/20100903/",
]

/Users/eddifying/Cloudfree/gitlab-cloudfree/seagliderOG1


In [4]:
for input_loc in input_locations:
    ds1_base = readers.load_first_basestation_file(input_loc)

    # Create a log file based on the first data file
    platform_id = ds1_base.attrs['platform_id']
    dive_start = ds1_base.attrs['time_coverage_start']
    start_time = datetime.datetime.strptime(dive_start, '%Y-%m-%dT%H:%M:%SZ').strftime('%Y%m%dT%H%M%S')

    log_file = os.path.join(data_path, f"{platform_id}_{start_time}.log")
    logf_with_path = os.path.join(data_path, log_file)

    # Create the log file
    # Note that the use of `force=True` generates a new log file each instance in the loop
    logging.basicConfig(
        filename=logf_with_path, 
        encoding='utf-8',
        format="%(asctime)s %(levelname)-8s %(funcName)s %(message)s",
        filemode="w", # 'w' to overwrite, 'a' to append
        level=logging.INFO,
        datefmt="%Y%m%dT%H%M%S",
        force=True,
        )
    _log.info('convertOG1.process_and_save_data')
    _log.info('Processing data from: %s', input_loc)

    # Process the data
    ds_all = convertOG1.process_and_save_data(input_loc, output_dir=data_path, save=True,  run_quietly=True)

    _log.info('Finished processing data from: %s', input_loc)

sg_cal_calibcomm: SBE s/n 0112 calibration 20apr09
sg_cal_calibcomm_optode: Optode 4330F S/N 182 foil batch 2808F calibrated 09may09
File /Users/eddifying/Cloudfree/gitlab-cloudfree/seagliderOG1/data/sg033_20100903T182416_delayed.nc already exists. Exiting the process.


In [5]:
ds_all

In [6]:
ds1_base

In [8]:
ds1 = readers.load_sample_dataset()
split_ds = tools.split_by_unique_dims(ds1)
sg_cal, dc_log, dc_other = convertOG1.extract_variables(split_ds[()])


In [11]:
sg_cal.mass.values

array(75.997)

In [28]:
tmp = dc_log.log_GPS.values.tobytes().decode('utf-8')

In [29]:
print(tmp)

$GPS,060910,142637,1831.076,-6558.818,31,1.1,34,-12.7


In [26]:
tmp = str1[0:4]

In [27]:
print(tmp)

$GPS


In [37]:
keys_list = [key[0] for key in split_ds.keys()]
print(keys_list)

IndexError: tuple index out of range

In [72]:
ds1 = readers.load_sample_dataset()
divenum = ds1.attrs['dive_number']
split_ds = tools.split_by_unique_dims(ds1)
gps_info = split_ds[('gps_info',)]

key_dims = list(split_ds.keys())
key_dims.sort()
assert key_dims == [(), ('gc_event',), ('gc_state',), ('gps_info',), ('sg_data_point',)]

ds = split_ds[('sg_data_point',)]
dsa = convertOG1.standardise_OG10(ds)

varlist = list(dsa.data_vars)
coordlist = list(dsa.coords)
combined_list = varlist + coordlist
combined_list.sort()
og1_varlist = ['TIME',
            'LATITUDE',
            'LONGITUDE',
            'LATITUDE_GPS',
            'TEMP',
            'DEPTH',
            'TIME_GPS',
            'LONGITUDE_GPS',
            'TRAJECTORY', 
            'PLATFORM_MODEL',
            'PLATFORM_SERIAL_NUMBER'
]
for var in og1_varlist:
    if var in combined_list:
        print(f"{var} is in the list")

ds_new = convertOG1.add_gps_info_to_dataset(dsa, gps_info)

if 'LATITUDE_GPS' in list(ds_new.variables):
    print('LATITUDE_GPS is in the list')

ds_new = tools.assign_profile_number(ds_new)



TIME is in the list
LATITUDE is in the list
LONGITUDE is in the list
TEMP is in the list
DEPTH is in the list
LATITUDE_GPS is in the list


In [74]:
ds_new

In [44]:
var_data.dims

()