# Demo


The purpose of this notebook is to create a minimum working example of seaglider data in OG1 format. The test case is to convert sg015 data from the Labrador Sea in September 2004.

- OG1 format is a newly agreed format (since June 2024) for glider data sets from various platforms (e.g., Seaglider, Slocum, Seaexplorer).  It lives on github here: (https://github.com/OceanGlidersCommunity/OG-format-user-manual).
- OG1 manual: https://oceangliderscommunity.github.io/OG-format-user-manual/OG_Format.html


In [None]:
import sys
import importlib
sys.path.append('/Users/eddifying/Cloudfree/gitlab-cloudfree/seagliderOG1')
import glidertools as gt
#importlib.reload(fetchers)
#importlib.reload(plotters)

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from seagliderOG1 import fetchers
from seagliderOG1 import tools
from seagliderOG1 import plotters
import xarray as xr

#from pathlib import Path

## Load seaglider data in native format

This has three ways to load a glider dataset.

Load an example dataset using `seagliderOG1.fetchers.load_sample_dataset`

Alternatively, use your own with e.g. `ds = xr.open_dataset('/path/to/yourfile.nc')`

### Load a sample dataset

These data are hosted (currently) on dropbox.

In [None]:
# List of available datasets
file_list = ['p0150500_20050213.nc', 'p0150501_20050213.nc', 'p0150502_20050214.nc', 'p0150503_20050214.nc', 'p0150504_20050215.nc'];

# Loads one dataset (p0150500_20050213.nc)
ds = fetchers.load_sample_dataset()
plotters.plot_profile_depth(ds)

### Load dataset from local machine

- For local data in the directory `input_dir`
- Creates a plot of ctd_depth against ctd_time.


In [None]:
# Specify the input directory on your local machine
input_dir = "/Users/eddifying/Dropbox/data/sg015-ncei-download"

# Load and concatenate all datasets in the input directory
# Optionally, specify the range of profiles to load (start_profile, end_profile)
ds_all = fetchers.load_dataset_from_directory(input_dir, start_profile=500, end_profile=501)

# Simple plot of depth against time
plotters.plot_profile_depth(ds_all)

### Load data from the NCEI server

- Data from the sg015 mission in the Labrador Sea (https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:0111844), dataset identifier gov.noaa.nodc:0111844.


In [None]:
# Specify the server where data are located
server = "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/015/20040924/"

# This was necessary to get an initial file list
# mylist = fetchers.list_files_in_https_server(server)
# fetchers.create_pooch_registry_from_directory("/Users/eddifying/Dropbox/data/sg015-ncei-download/")

# Load and concatenate all datasets from the server, optionally specifying the range of profiles to load
ds_all = fetchers.load_dataset_from_online(server, start_profile=500, end_profile=504)

# Simple plot of depth against time
plotters.plot_profile_depth(ds_all)

## Start to think about renaming variables

### Check what variables are in the file

- Here, I found it easier to use glidertools (https://glidertools.readthedocs.io/en/latest/loading.html#working-with-seaglider-base-station-files) to get a quick look at what's in there.

In [None]:
# Print the variables from one file
filenames = input_dir + "/" + file_list[0]
print(filenames)

gt.load.seaglider_show_variables(filenames)

### Check the attributes in the basestation file

There was no equivalent in glidertools, so I wrote one and put it in `seagliderOG1.plotters`

In [None]:
file_list = ['p0150500_20050213.nc', 'p0150501_20050213.nc', 'p0150502_20050214.nc', 'p0150503_20050214.nc', 'p0150504_20050215.nc'];
filenames = input_dir + "/" + file_list[0]
plotters.show_attributes(filenames)

In [None]:
from datetime import datetime

title = "OceanGliders trajectory file"
platform = "sub-surface gliders"
platform_vocabulary = "https://vocab.nerc.ac.uk/collection/L06/current/27/"
time_str = ds_all.time_coverage_start.replace('_', '').replace(':', '').rstrip('Z')
id = ds_all.platform_id + '_' + time_str + '_delayed'
naming_authority = ds_all.naming_authority
institution = ds_all.institution
time_coverage_start = time_str
time_coverage_end = ds_all.time_coverage_end.replace('_', '').replace(':', '').rstrip('Z')
site = ds_all.summary
project = ds_all.project
contributor_name = ds_all.creator_name + ', ' + ds_all.contributor_name
contributor_email = ds_all.creator_email
contributor_role = "PI, " + ds_all.contributor_role
contributor_role_vocabulary = "http://vocab.nerc.ac.uk/search_nvs/W08/"
uri = ds_all.uuid
web_link = "https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:0111844"
comment = "history: " + ds_all.history
start_date = time_coverage_start
date_created = datetime.now().strftime('%Y%m%dT%H%M%S')
featureType = "trajectory"
Conventions = "CF-1.10,OG-1.0"

attr_as_is = [
    "geospatial_lat_min",
    "geospatial_lat_max",
    "geospatial_lon_min",
    "geospatial_lon_max",
    "geospatial_vertical_min",
    "geospatial_vertical_max",
]

GPS_variables = [
    "log_gps_lat",
    "log_gps_lon",
    "log_gps_time"
]

names = [
    'ctd_depth',
    'ctd_time',
    'ctd_pressure',
    'salinity',
    'temperature',
    'salinity_qc',
    'temperature_qc',
    'pressure',
    'vert_speed',
    'horz_speed'
]



In [None]:
import datetime
# Define dictionaries for renaming
#ds_single = xr.open_dataset(file_list[0])

dims_rename_dict = {'sg_data_point': 'N_MEASUREMENTS'}
coords_rename_dict = {
    'longitude': 'LONGITUDE',
    'latitude': 'LATITUDE',
    'ctd_time': 'TIME',
    'ctd_depth': 'DEPTH'
}
vars_rename_dict = {
    'conductivity': 'CNDC',
    'temperature': 'TEMP',
    'salinity': 'PSAL', # after thermal lag correction
    'conductivity_qc': 'CNDC_QC',
    'salinity_qc': 'PSAL_QC',
    'temperature_qc': 'TEMP_QC',
    'vert_speed': 'VERT_GLIDER_SPEED', # This is using the hdm
    'horz_speed': 'HORZ_GLIDER_SPEED', # This is using the hdm
    'density': 'DENSITY',
}

def create_renamed_dataset(ds, dims_rename_dict, coords_rename_dict, vars_rename_dict):
    # Apply renaming using the dictionaries
    ds_renamed = ds.rename_dims(dims_rename_dict)
    ds_renamed = ds_renamed.rename_vars(coords_rename_dict)
    ds_renamed = ds_renamed.rename_vars(vars_rename_dict)
    
    return ds_renamed

# Example usage
ds_renamed = create_renamed_dataset(ds_all, dims_rename_dict, coords_rename_dict, vars_rename_dict)
print(ds_renamed)

In [None]:


def process_dataset(file_list, dims_rename_dict, coords_rename_dict, vars_rename_dict):

    ds_single = xr.open_dataset(file_list[0])

    # Apply renaming using the dictionaries
    ds_single = ds_single.rename_dims(dims_rename_dict)
    ds_single = ds_single.rename_vars(coords_rename_dict)
    ds_single = ds_single.rename_vars(vars_rename_dict)

    # Remove variables not in vars_rename_dict().values
    vars_to_keep = set(vars_rename_dict.values())
    ds_renamed = ds_single[vars_to_keep]

    # Assign trajectory coordinate and drop the original trajectory variable
    ds_renamed = ds_renamed.assign_coords(TRAJECTORY=ds_single['trajectory'])
    ds_renamed = ds_renamed.drop_vars('trajectory')

    # Convert start_time to deployment_time
    start_time = ds_renamed.attrs['start_time']
    deployment_time = datetime.datetime.fromtimestamp(start_time, datetime.UTC)
    deployment_time_float = deployment_time.timestamp()

    # Create a new variable DEPLOYMENT_TIME
    ds_renamed['DEPLOYMENT_TIME'] = deployment_time_float

    # Add metadata to the new variable
    ds_renamed['DEPLOYMENT_TIME'].attrs['long_name'] = "date of deployment"
    ds_renamed['DEPLOYMENT_TIME'].attrs['standard_name'] = "time"
    ds_renamed['DEPLOYMENT_TIME'].attrs['calendar'] = "gregorian"
    ds_renamed['DEPLOYMENT_TIME'].attrs['units'] = "seconds since 1970-01-01T00:00:00Z"

    return ds_renamed

# Example usage
ds_renamed = process_dataset(file_list, dims_rename_dict, coords_rename_dict, vars_rename_dict)
print(ds_renamed)


In [None]:
def plot_speeds(ds):
    fig, ax = plt.subplots(2, 1, figsize=(12, 8), sharex=True)

    # Plot horizontal speed
    ax[0].plot(ds['ctd_time'], ds['horz_speed'], label='Horizontal Speed', color='b')
    ax[0].set_ylabel('Horizontal Speed (m/s)')
    ax[0].legend()
    ax[0].grid(True)

    # Plot vertical speed
    ax[1].plot(ds['ctd_time'], ds['vert_speed'], label='Vertical Speed', color='r')
    ax[1].set_ylabel('Vertical Speed (m/s)')
    ax[1].set_xlabel('Time')
    ax[1].legend()
    ax[1].grid(True)

    plt.tight_layout()
    plt.show()

# Example usage
plot_speeds(ds_all)

In [None]:
def preprocess(ds):
    # Apply renaming using the dictionaries
    ds = ds.rename_dims(dims_rename_dict)
    ds = ds.rename_vars(coords_rename_dict)
    ds = ds.rename_vars(vars_rename_dict)

    # Remove variables not in vars_rename_dict().values
    vars_to_keep = set(vars_rename_dict.values())
    ds = ds[vars_to_keep]

    # Assign trajectory coordinate and drop the original trajectory variable
#    ds = ds.assign_coords(TRAJECTORY=ds['trajectory'])
#    ds = ds.drop_vars('trajectory')

    # Convert start_time to deployment_time
    start_time = ds.attrs['start_time']
    deployment_time = datetime.datetime.fromtimestamp(start_time, datetime.UTC)
    deployment_time_float = deployment_time.timestamp()

    # Create a new variable DEPLOYMENT_TIME
    ds['DEPLOYMENT_TIME'] = deployment_time_float

    # Add metadata to the new variable
    ds['DEPLOYMENT_TIME'].attrs['long_name'] = "date of deployment"
    ds['DEPLOYMENT_TIME'].attrs['standard_name'] = "time"
    ds['DEPLOYMENT_TIME'].attrs['calendar'] = "gregorian"
    ds['DEPLOYMENT_TIME'].attrs['units'] = "seconds since 1970-01-01T00:00:00Z"

    return ds

# Use xr.open_mfdataset to open multiple files and combine them along TIME
ds_combined = xr.open_mfdataset(file_list, combine='nested', concat_dim='TIME', preprocess=preprocess)
print(ds_combined)



# Basic statistics of dataset

In [None]:
# Basic plot of the location of the dataset in space/time
#tools.plot_glider_track(ds)


In [None]:
# Basic diagnostics of the gridding in the dataset
#tools.plot_grid_spacing_histograms(ds)

In [None]:
# Basic diagnostics of the watermass properties
#tools.plot_ts_histograms(ds)


### Check basic data and water column structure first

In [None]:
#tools.plot_basic_vars(ds,v_res=1, start_prof=1, end_prof=int(ds.PROFILE_NUMBER.max()))