# Convert an entire seaglider mission

Given an online location or folder on your computer, process a full mission of basestation netCDF files into a single seagliderOG1 mission file.  

- Provide the input location (directory of `p*.nc` files) and output location (where the netCDF file and log file will be saved)
- Optional: Provide details of contributing authors (e.g., who created the OG1 format file) to be appended to the output file's attributes

In [1]:
import pathlib
import sys
import warnings
warnings.simplefilter("ignore", category=Warning)

from seagliderOG1 import readers, writers, plotters, utilities, tools
from seagliderOG1 import convertOG1
from seagliderOG1 import vocabularies, parsers
import xarray as xr
import os
import importlib
import datetime
import logging
_log = logging.getLogger(__name__)

script_dir = pathlib.Path().parent.absolute()
parent_dir = script_dir.parents[0]
sys.path.append(str(parent_dir))
sys.path.append(str(parent_dir) + '/seagliderOG1')
print(parent_dir)

# Specify the path for writing datafiles
data_path = os.path.join(parent_dir, '../data')
input_locations = [
    "/Users/eddifying/Dropbox/data/GLIDER-data/sampler-seaglider",
]

/Users/eddifying/Cloudfree/gitlab-cloudfree/seagliderOG1/notebooks


## Specify paths for inputs/outputs

In [2]:
file_list = readers.list_files(input_locations[0])

In [5]:
all_attr = set()
all_datavar = set()
all_gpsvar = set()
all_sgcal = set()
all_logvar = set()
all_other = set()

for file in file_list:
    ds1_base = xr.open_dataset(os.path.join(input_locations[0], file))
    split_ds = tools.split_by_unique_dims(ds1_base)
    ds_sgdata = split_ds[('sg_data_point',)]
    ds_gps = split_ds[('gps_info',)]
    ds_sgcal, ds_log, ds_other = convertOG1.extract_variables(split_ds[()])

    attr1 = set(ds1_base.attrs.keys())
    all_attr = all_attr.union(attr1)
    var1 = set(ds_sgdata.variables)
    all_datavar = all_datavar.union(var1)
    var1 = set(ds_gps.variables)
    all_gpsvar = all_gpsvar.union(var1)
    var1 = set(ds_sgcal.variables)
    all_sgcal = all_sgcal.union(var1)
    var1 = set(ds_log.variables)
    all_logvar = all_logvar.union(var1)
    var = set(ds_other.variables)
    all_other = all_other.union(var)

# Convert all_attr and all_var from set to list
all_attr = list(all_attr)
all_datavar = list(all_datavar)
all_gpsvar = list(all_gpsvar)
all_sgcal = list(all_sgcal)
all_logvar = list(all_logvar)
all_other = list(all_other)

all_attr.sort()
all_datavar.sort()
all_gpsvar.sort()
all_sgcal.sort()
all_logvar.sort()
all_other.sort()


In [10]:
# To plan to refactor, I think I need a superset of possible variable names from a range of different files
len(all_datavar)
len(all_attr)

63

In [15]:

all_datavar

['aanderaa4330_dissolved_oxygen',
 'aanderaa4330_dissolved_oxygen_qc',
 'aanderaa4330_instrument_dissolved_oxygen',
 'aanderaa4330_results_time',
 'absolute_salinity',
 'buoyancy',
 'conductivity',
 'conductivity_qc',
 'conductivity_raw',
 'conductivity_raw_qc',
 'conservative_temperature',
 'ctd_depth',
 'ctd_pressure',
 'ctd_time',
 'density',
 'density_insitu',
 'depth',
 'dissolved_oxygen_sat',
 'east_displacement',
 'east_displacement_gsm',
 'eng_GC_phase',
 'eng_aa4330_AirSat',
 'eng_aa4330_CalPhase',
 'eng_aa4330_O2',
 'eng_aa4330_TCPhase',
 'eng_aa4330_Temp',
 'eng_condFreq',
 'eng_depth',
 'eng_elaps_t',
 'eng_elaps_t_0000',
 'eng_head',
 'eng_pitchAng',
 'eng_pitchCtl',
 'eng_press_counts',
 'eng_pressure',
 'eng_rec',
 'eng_rollAng',
 'eng_rollCtl',
 'eng_sbe43_O2Freq',
 'eng_sbect_condFreq',
 'eng_sbect_tempFreq',
 'eng_tempFreq',
 'eng_vbdCC',
 'eng_wlbb2f_VFtemp',
 'eng_wlbb2f_blueCount',
 'eng_wlbb2f_blueRef',
 'eng_wlbb2f_fluorCount',
 'eng_wlbb2f_redCount',
 'eng_wlbb2

In [13]:
datavar_keywords = ['aanderaa4330', 'sbe43', 'wlbb2f', 'wlbb2fl', 'aa4330', 'sbect']
datavar_suffix = ['_qc', '_raw', '_raw_qc', '_gsm']
datavar_derived = ['absolute_salinity', 'conservative_temperature', 'density', 'density_insitu', 'salinity',
                   'sigma_t','sigma_theta','sound_velocity','theta']


['comment', 'contributor_name', 'contributor_role', 'date_issued']


In [16]:
all_sgcal

['A',
 'B',
 'Boc',
 'C',
 'E',
 'Foffset',
 'Pcor',
 'QC_cond_spike_depth',
 'QC_temp_spike_depth',
 'Soc',
 'Tcor',
 'WETLabsCalData_Chlorophyll_calTemperature',
 'WETLabsCalData_Chlorophyll_darkCounts',
 'WETLabsCalData_Chlorophyll_maxOutput',
 'WETLabsCalData_Chlorophyll_resolution',
 'WETLabsCalData_Chlorophyll_scaleFactor',
 'WETLabsCalData_Scatter_470_darkCounts',
 'WETLabsCalData_Scatter_470_resolution',
 'WETLabsCalData_Scatter_470_scaleFactor',
 'WETLabsCalData_Scatter_470_wavelength',
 'WETLabsCalData_Scatter_700_darkCounts',
 'WETLabsCalData_Scatter_700_resolution',
 'WETLabsCalData_Scatter_700_scaleFactor',
 'WETLabsCalData_Scatter_700_wavelength',
 'a',
 'abs_compress',
 'b',
 'c',
 'c_g',
 'c_h',
 'c_i',
 'c_j',
 'calibcomm',
 'calibcomm_optode',
 'calibcomm_oxygen',
 'calibcomm_wetlabs',
 'comm_oxy_type',
 'cond_bias',
 'cpcor',
 'ctcor',
 'hd_a',
 'hd_b',
 'hd_c',
 'hd_s',
 'id_str',
 'mass',
 'mass_comp',
 'mission_title',
 'o_a',
 'o_b',
 'o_c',
 'o_e',
 'optode_Conc

In [None]:
sgcal_keywords = ['WETLabsCalData_Chlorophyll', 'WETLabsCalData_Scatter_470', 'WETLabsCalData_Scatter_700', 
                  'optode', 't_', 'wlbbfl2_sig460nm', 'wlbbfl2_sig697nm', 'wlbbfl2_sig700nm', 'o_', 'c_', 
                  'A', 'B', 'Boc', 'C', 'E', 'Foffset', 'Pcor', 'Soc', 'Tcor']
cal_strings = ['calibcomm','calibcomm_optode','calibcomm_oxygen','calibcomm_wetlabs']

In [17]:
all_logvar

['log_10V_AH',
 'log_24V_AH',
 'log_AD7714Ch0Gain',
 'log_AH0_10V',
 'log_AH0_24V',
 'log_ALTIM_BOTTOM_PING_RANGE',
 'log_ALTIM_BOTTOM_TURN_MARGIN',
 'log_ALTIM_FREQUENCY',
 'log_ALTIM_PING_DELTA',
 'log_ALTIM_PING_DEPTH',
 'log_ALTIM_PULSE',
 'log_ALTIM_SENSITIVITY',
 'log_ALTIM_TOP_MIN_OBSTACLE',
 'log_ALTIM_TOP_PING',
 'log_ALTIM_TOP_PING_RANGE',
 'log_ALTIM_TOP_TURN_MARGIN',
 'log_APOGEE_PITCH',
 'log_CALL_NDIVES',
 'log_CALL_TRIES',
 'log_CALL_WAIT',
 'log_CAPMAXSIZE',
 'log_CAPUPLOAD',
 'log_CAP_FILE_SIZE',
 'log_CF8_MAXERRORS',
 'log_CFSIZE',
 'log_COMM_SEQ',
 'log_COMPASS2_DEVICE',
 'log_COMPASS_DEVICE',
 'log_COMPASS_USE',
 'log_COURSE_BIAS',
 'log_CURRENT',
 'log_C_PITCH',
 'log_C_ROLL_CLIMB',
 'log_C_ROLL_DIVE',
 'log_C_VBD',
 'log_DATA_FILE_SIZE',
 'log_DBDW',
 'log_DEEPGLIDER',
 'log_DEEPGLIDERMB',
 'log_DEVICE1',
 'log_DEVICE2',
 'log_DEVICE3',
 'log_DEVICE4',
 'log_DEVICE5',
 'log_DEVICE6',
 'log_DEVICES',
 'log_DEVICE_MAMPS',
 'log_DEVICE_SECS',
 'log_DIVE',
 'log_D_ABO

In [18]:
all_other

['CTD_qc',
 'GPS1_qc',
 'GPS2_qc',
 'GPSE_qc',
 'SBE43_qc',
 'aa4330',
 'aanderaa4330_qc',
 'avg_latitude',
 'depth_avg_curr_east',
 'depth_avg_curr_east_gsm',
 'depth_avg_curr_error',
 'depth_avg_curr_north',
 'depth_avg_curr_north_gsm',
 'depth_avg_curr_qc',
 'directives',
 'flight_avg_speed_east',
 'flight_avg_speed_east_gsm',
 'flight_avg_speed_north',
 'flight_avg_speed_north_gsm',
 'glider',
 'hdm_qc',
 'latlong_qc',
 'magnetic_variation',
 'magnetometer',
 'processing_error',
 'reviewed',
 'sbe41',
 'sbe43',
 'start_of_climb_time',
 'surface_curr_east',
 'surface_curr_error',
 'surface_curr_north',
 'surface_curr_qc',
 'wlbb2f',
 'wlbb2fl',
 'wlbbfl2']

In [None]:
sensor_other = ['aa4330', 'sbe43', 'sbe41', 'wlbb2f', 'wlbb2fl', 'wlbbfl2']