# Run a full mission of basestation files


In [None]:
# This cell imports for development work
import sys
import importlib
sys.path.append('/Users/eddifying/Cloudfree/gitlab-cloudfree/seagliderOG1')
import warnings
warnings.simplefilter("ignore", category=Warning)

In [None]:
from seagliderOG1 import readers
from seagliderOG1 import plotters
from seagliderOG1 import convertOG1
from seagliderOG1 import vocabularies
import xarray as xr
import os

In [None]:
importlib.reload(convertOG1)
servers = [
    # Either Iceland, Faroes or RAPID/MOCHA
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/005/20090829/", # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/005/20080606/", # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/005/20081106/", # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/012/20070831/", # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/014/20080214/",  # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/014/20080222/", # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/016/20061112/",  # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/016/20090605/", # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/016/20071113/", # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/016/20080607/",  # done
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/033/20100518/", # failed - ok after update to save_dataset
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/033/20100903/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/101/20081108/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/101/20061112/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/101/20070609/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/102/20061112/",
    # Labrador Sea
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/015/20040924/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/014/20040924/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/008/20031002/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/004/20031002/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/016/20050406/",
    # RAPID/MOCHA
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/033/20100729/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/034/20110128/",

]

def process_and_save_data(input_location, save=False, output_dir='../data'):
    # Load and concatenate all datasets from the server
    list_datasets = readers.read_basestation(input_location)
    
    # Convert the list of datasets to OG1
    ds1 = convertOG1.convert_to_OG1(list_datasets[-1])
    output_file = os.path.join('../data', ds1.attrs['id'] + '.nc')
    
    # Check if the file exists and delete it if it does
    if os.path.exists(output_file):
        # Run quietly
#        user_input = input(f"File {output_file} already exists. Do you want to re-run and overwrite it? (yes/no): ")
        user_input = 'no'
        if user_input.lower() != 'yes':
            print("File already exists. Exiting the process.  Edit in run_mission to prompt user.")
            ds_all = xr.open_dataset(output_file)
            return ds_all
        elif user_input.lower() == 'yes':
            ds_all = convertOG1.convert_to_OG1(list_datasets)
            os.remove(output_file)
            if save:
                convertOG1.save_dataset(ds_all, output_file)
    else:
        print('Running the directory')
        ds_all = convertOG1.convert_to_OG1(list_datasets)
        if save:
            convertOG1.save_dataset(ds_all, output_file)
    
    return ds_all

for input_loc in servers:
    print(input_loc)
    # Example usage
    ds_all = process_and_save_data(input_loc, save=True)

# Cells below are for troubleshooting the loading

In [None]:
## Break down the loading functions to see where it's going wrong. 

Update the `input_loc` to the directory where the `p*nc` files are stored.

In [None]:
importlib.reload(convertOG1)
input_loc =  "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/016/20050406/"
save = True

# Troubleshooting - run outside a function
# Load and concatenate all datasets from the server
list_datasets = readers.read_basestation(input_loc)

datasets = list_datasets
# Convert the list of datasets to OG1
processed_datasets = []
for ds in datasets:
    ds_new, attr_warnings, sg_cal, dc_other, dc_log = convertOG1.convert_to_OG1_dataset(ds)
    processed_datasets.append(ds_new)

concatenated_ds = xr.concat(processed_datasets, dim='N_MEASUREMENTS')
concatenated_ds = concatenated_ds.sortby('TIME')

# Apply attributes
ordered_attributes = update_dataset_attributes(datasets[0], contrib_to_append)
for key, value in ordered_attributes.items():
    concatenated_ds.attrs[key] = value

# Construct the platform serial number
PLATFORM_SERIAL_NUMBER = 'sg' + concatenated_ds.attrs['id'][1:4]
print(PLATFORM_SERIAL_NUMBER)
concatenated_ds['PLATFORM_SERIAL_NUMBER'] = PLATFORM_SERIAL_NUMBER
concatenated_ds['PLATFORM_SERIAL_NUMBER'].attrs['long_name'] = "glider serial number"

# Construct the unique identifier attribute
id = f"{PLATFORM_SERIAL_NUMBER}_{concatenated_ds.start_date}_delayed"
concatenated_ds.attrs['id'] = id

if save:
    output_file = os.path.join('../data', 'run_mission_test.nc')
    # Save the dataset to a NetCDF file
    convertOG1.save_dataset(concatenated_ds, output_file)

## Dealing with missing coordinates

In [None]:
processed_datasets # The list of xarray datasets, one for each `p*nc` file
coordinates_dict = {i: list(ds.coords) for i, ds in enumerate(processed_datasets)}
coordinates_dict

missing_longitude = [i for i, coords in coordinates_dict.items() if 'LONGITUDE' not in coords]
missing_longitude

datasets_with_longitude = [i for i, coords in coordinates_dict.items() if 'LONGITUDE' in coords]
datasets_with_longitude
