# Troubleshooting

To help with troubleshooting when the loader or converter breaks, this notebook breaks down some of the wrapper functions into their individual steps.

In [None]:
# This cell imports for development work
import sys
import importlib
sys.path.append('/Users/eddifying/Cloudfree/gitlab-cloudfree/seagliderOG1')
import warnings
warnings.simplefilter("ignore", category=Warning)

In [None]:
from seagliderOG1 import readers, writers, plotters, utilities
from seagliderOG1 import convertOG1
import xarray as xr
import os

## Load Seaglider data in basestation format

Test case build on a file which was written in 2013 by basestation v2.8 into nodc format template v0.9.

This is the same process as above (contained in `convertOG1.convert_to_OG1`), but breaking out to access the sub-functions individually.  This way you can inspect the process as it goes along, and also inspect some of the data which did not make it into the final dataset:

- `sg_cal` - details from `sg_calib_constants.m`, 
- `dc_log` - log events, and 
- `dc_other` - random other variables that were in the basestation file).

In [None]:
# Specify the server where data are located
if 1:
    input_loc = "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/015/20040924/"
    input_loc = "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/033/20100729/"
else:
    input_loc = '/Users/eddifying/Dropbox/data/sg015-ncei-download/'

# Load datasets from the server, optionally specifying the range of profiles to load
start_dive = 1
end_dive = 10

# --- List the data
list_datasets = readers.read_basestation(input_loc, 1, 10)

# Pick one basestation xarray dataset to work with
ds1 = list_datasets[1]

# --- Convert the data
#ds_single_OG1 = convertOG1.convert_to_OG1(list_datasets, contrib_to_append)
# Convert the dataset and output also variables not included
ds_OG1, attr_warnings, sg_cal, dc_other, dc_log = convertOG1.process_dataset(ds1)

# Create the list of attributes in order
ordered_attributes = convertOG1.update_dataset_attributes(ds1, contrib_to_append)

for key, value in ordered_attributes.items():
    ds_OG1.attrs[key] = value

# Construct the platform serial number
PLATFORM_SERIAL_NUMBER = 'sg' + ds_OG1.attrs['id'][1:4]
ds_OG1['PLATFORM_SERIAL_NUMBER'] = PLATFORM_SERIAL_NUMBER
ds_OG1['PLATFORM_SERIAL_NUMBER'].attrs['long_name'] = "glider serial number"

# Construct the unique identifier attribute
id = f"{PLATFORM_SERIAL_NUMBER}_{ds_OG1.start_date}_delayed"
ds_OG1.attrs['id'] = id


# --- Output file
# Check a location for the output file
output_file = os.path.join('../data', 'demo_single_test.nc')
# If it's already there, remove it first
if os.path.exists(output_file):
    os.remove(output_file)

# Write the file
# This writer catches errors in data types (DType errors) when using xr.to_netcdf()
# The solution is to convert them to strings, which may be undesired behaviour
writers.save_dataset(ds_OG1, output_file)

In [None]:
plotters.show_attributes(ds1)

In [None]:
input_locations = [
    # Either Iceland, Faroes or RAPID/MOCHA
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/005/20090829/", # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/005/20080606/", # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/005/20081106/", # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/012/20070831/", # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/014/20080214/",  # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/014/20080222/", # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/016/20061112/",  # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/016/20090605/", # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/016/20071113/", # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/016/20080607/",  # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/033/20100518/", # failed - ok after update to save_dataset
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/033/20100903/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/101/20081108/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/101/20061112/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/101/20070609/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/102/20061112/",
    # Labrador Sea
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/015/20040924/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/014/20040924/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/008/20031002/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/004/20031002/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/016/20050406/",
    # RAPID/MOCHA
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/033/20100729/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/034/20110128/",
]

# Cells below are for troubleshooting the loading

## Break down the loading functions to see where it's going wrong. 

Update the `input_loc` to the directory where the `p*nc` files are stored.

Example of a problematic mission: https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/016/20050406/

In [None]:
importlib.reload(utilities)
input_loc =  "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/016/20050406/"
save = True

# Troubleshooting - run outside a function
# Load and concatenate all datasets from the server
list_datasets = readers.read_basestation(input_loc)

datasets = list_datasets
# Convert the list of datasets to OG1
processed_datasets = []
for ds in datasets:
    ds_new, attr_warnings, sg_cal, dc_other, dc_log = convertOG1.convert_to_OG1_dataset(ds)
    if ds_new:
        processed_datasets.append(ds_new)
    else:
        print(f"Failed to convert {ds.attrs['id']} - skipping")

concatenated_ds = xr.concat(processed_datasets, dim='N_MEASUREMENTS')
concatenated_ds = concatenated_ds.sortby('TIME')

# Apply attributes
ordered_attributes = convertOG1.update_dataset_attributes(datasets[0], contrib_to_append)
for key, value in ordered_attributes.items():
    concatenated_ds.attrs[key] = value

# Construct the platform serial number
PLATFORM_SERIAL_NUMBER = 'sg' + concatenated_ds.attrs['id'][1:4]
print(PLATFORM_SERIAL_NUMBER)
concatenated_ds['PLATFORM_SERIAL_NUMBER'] = PLATFORM_SERIAL_NUMBER
concatenated_ds['PLATFORM_SERIAL_NUMBER'].attrs['long_name'] = "glider serial number"

# Construct the unique identifier attribute
id = f"{PLATFORM_SERIAL_NUMBER}_{concatenated_ds.start_date}_delayed"
concatenated_ds.attrs['id'] = id

if save:
    output_file = os.path.join('../data', 'run_mission_test.nc')
    # Save the dataset to a NetCDF file
    writers.save_dataset(concatenated_ds, output_file)

## Dealing with missing coordinates

In [None]:
processed_datasets # The list of xarray datasets, one for each `p*nc` file
coordinates_dict = {i: list(ds.coords) for i, ds in enumerate(processed_datasets)}
coordinates_dict

missing_longitude = [i for i, coords in coordinates_dict.items() if 'LONGITUDE' not in coords]
missing_longitude

datasets_with_longitude = [i for i, coords in coordinates_dict.items() if 'LONGITUDE' in coords]
datasets_with_longitude
