In [1]:
import os, gc, sys
import json
import yaml
import numpy as np
import pandas as pd
import xarray as xr
import re
import warnings
warnings.filterwarnings("ignore")

In [2]:
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
# Import OOI M2M tools
sys.path.append("/home/andrew/Documents/OOI-CGSN/ooinet/ooinet/")
from m2m import M2M

#### Set OOINet API access
In order access and download data from OOINet, need to have an OOINet api username and access token. Those can be found on your profile after logging in to OOINet. Your username and access token should NOT be stored in this notebook/python script (for security). It should be stored in a yaml file, kept in the same directory, named user_info.yaml.

In [4]:
# Import user info for connecting to OOINet via M2M
userinfo = yaml.load(open("../../../../QAQC_Sandbox/user_info.yaml"), Loader=yaml.FullLoader)
username = userinfo["apiname"]
token = userinfo["apikey"]

#### Connect to OOINet

In [5]:
OOINet = M2M(username, token)

---
## Datasets

Identify all of the OOI-CGSN datasets with the **```PCO2W```**, **```PCO2A```**, and **```PHSEN```** datasets that are located at the Global Irminger Array. This is done by querying OOINet and iteratively walking through all of the API endpoints. The results are saved into a csv file so this step doesn't have to be repeated each time.

Check to see if the reference designators have already been identified. If they haven't been previously downloaded, can use the ```OOINet.search_datasets``` function to search for the datasets associated with each instrument.

#### PCO2W

In [6]:
try:
    # If the reference designators where previously identified and downloaded
    pco2w_datasets = pd.read_csv("../data/pco2w_datasets.csv")
except:
    # Search for PCO2W datasets, asking for English names
    pco2w_datasets = OOINet.search_datasets(instrument="PCO2W", English_names=True)

    # Save the datasets locally to speed up future runs
    pco2w_datasets.to_csv("../data/pco2w_datasets.csv", index=False)

# Print out the head
pco2w_datasets.head()

Unnamed: 0,array,array_name,node,node_name,instrument,instrument_name,refdes,url,deployments
0,RS03AXPS,Cabled Axial Seamount Axial Base Shallow Profi...,SF03A,Shallow Profiler (SF03A),4F-PCO2WA301,pCO2 Water,RS03AXPS-SF03A-4F-PCO2WA301,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7]"
1,RS01SBPS,Cabled Continental Margin Oregon Slope Base Sh...,SF01A,Shallow Profiler (SF01A),4F-PCO2WA101,pCO2 Water,RS01SBPS-SF01A-4F-PCO2WA101,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7, 8, 9]"
2,GS01SUMO,Global Southern Ocean Apex Surface Mooring,RII11,Mooring Riser,02-PCO2WC053,pCO2 Water (130 meters),GS01SUMO-RII11-02-PCO2WC053,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4]"
3,GS01SUMO,Global Southern Ocean Apex Surface Mooring,RII11,Mooring Riser,02-PCO2WC052,pCO2 Water (80 meters),GS01SUMO-RII11-02-PCO2WC052,https://ooinet.oceanobservatories.org/api/m2m/...,"[2, 3, 4]"
4,GS01SUMO,Global Southern Ocean Apex Surface Mooring,RII11,Mooring Riser,02-PCO2WC051,pCO2 Water (40 meters),GS01SUMO-RII11-02-PCO2WC051,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4]"


#### PHSEN

In [7]:
try:
    # If the reference designators where previously identified and downloaded
    phsen_datasets = pd.read_csv("../data/phsen_datasets.csv")
except:
    # Search for PHSEN datasets, asking for full English names
    phsen_datasets = OOINet.search_datasets(instrument="PHSEN", English_names=True)

    # Save the datasets locally to speed up future runs
    phsen_datasets.to_csv("../data/phsen_datasets.csv", index=False)
    
phsen_datasets.head()

Unnamed: 0,array,array_name,node,node_name,instrument,instrument_name,refdes,url,deployments
0,RS03AXPS,Cabled Axial Seamount Axial Base Shallow Profi...,SF03A,Shallow Profiler (SF03A),2D-PHSENA301,Seawater pH,RS03AXPS-SF03A-2D-PHSENA301,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7]"
1,RS03AXPS,Cabled Axial Seamount Axial Base Shallow Profi...,PC03A,200m Platform (PC03A),4B-PHSENA302,Seawater pH,RS03AXPS-PC03A-4B-PHSENA302,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7]"
2,RS01SBPS,Cabled Continental Margin Oregon Slope Base Sh...,SF01A,Shallow Profiler (SF01A),2D-PHSENA101,Seawater pH,RS01SBPS-SF01A-2D-PHSENA101,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7, 8, 9]"
3,RS01SBPS,Cabled Continental Margin Oregon Slope Base Sh...,PC01A,200m Platform (PC01A),4B-PHSENA102,Seawater pH,RS01SBPS-PC01A-4B-PHSENA102,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7, 8]"
4,GS03FLMB,Global Southern Ocean Flanking Subsurface Moor...,RIS01,Mooring Riser,04-PHSENF000,Seawater pH,GS03FLMB-RIS01-04-PHSENF000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3]"


#### PCO2A

In [8]:
try:
    # If the reference designators where previously identified and downloaded
    pco2a_datasets = pd.read_csv("../data/pco2a_datasets.csv")
except:
    # Search for PCO2A datasets
    pco2a_datasets = OOINet.search_datasets(instrument="PCO2A", English_names=True)

    # Save the datasets locally to speed up future runs
    pco2a_datasets.to_csv("../data/pco2a_datasets.csv", index=False)
    
pco2a_datasets.head()

Unnamed: 0,array,array_name,node,node_name,instrument,instrument_name,refdes,url,deployments
0,GS01SUMO,Global Southern Ocean Apex Surface Mooring,SBD12,Surface Buoy,04-PCO2AA000,pCO2 Air-Sea,GS01SUMO-SBD12-04-PCO2AA000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4]"
1,GS01SUMO,Global Southern Ocean Apex Surface Mooring,SBD11,Surface Buoy,03-PCO2AA000,pCO2 Air-Sea,GS01SUMO-SBD11-03-PCO2AA000,https://ooinet.oceanobservatories.org/api/m2m/...,[4]
2,GI01SUMO,Global Irminger Sea Apex Surface Mooring,SBD12,Surface Buoy,04-PCO2AA000,pCO2 Air-Sea,GI01SUMO-SBD12-04-PCO2AA000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7]"
3,GA01SUMO,Global Argentine Basin Apex Surface Mooring,SBD12,Surface Buoy,04-PCO2AA000,pCO2 Air-Sea,GA01SUMO-SBD12-04-PCO2AA000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3]"
4,CP04OSSM,Coastal Pioneer Offshore Surface Mooring,SBD12,Surface Buoy,04-PCO2AA000,pCO2 Air-Sea,CP04OSSM-SBD12-04-PCO2AA000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]"


Filter the datasets for the Irminger Array datasets, which start with the prefix "GI" for Global Irminger

In [9]:
# PCO2A
mask = pco2a_datasets["array"].apply(lambda x: True if x.startswith("GI") else False)
pco2a_datasets = pco2a_datasets[mask]

# PCO2W
mask = pco2w_datasets["array"].apply(lambda x: True if x.startswith("GI") else False)
pco2w_datasets = pco2w_datasets[mask]

# PHSEN
mask = phsen_datasets["array"].apply(lambda x: True if x.startswith("GI") else False)
phsen_datasets = phsen_datasets[mask]

#### CTD & METBK
We will also need the temperature, salinity, and pressure data associated with the carbon system datasets. So we will also identify all the **```CTD```** datasets located at the Global Irminger Array as well as the **```METBK```** dataset for the surface mooring, which has the surface temperature and salinity.

In [10]:
try:
    # If the reference designators where previously identified and downloaded
    ctd_datasets = pd.read_csv("../data/ctd_datasets.csv")
except:
    # Search for PCO2W datasets, asking for English names
    ctd_datasets = OOINet.search_datasets(instrument="CTD", English_names=True)

    # Save the datasets locally to speed up future runs
    ctd_datasets.to_csv("../data/ctd_datasets.csv", index=False)

# Print out the head
ctd_datasets.head()

Unnamed: 0,array,array_name,node,node_name,instrument,instrument_name,refdes,url,deployments
0,RS03ECAL,Cabled Axial Seamount Eastern Caldera,MJ03E,Medium-Power JBox (MJ03E),12-CTDPFB306,CTD,RS03ECAL-MJ03E-12-CTDPFB306,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2]"
1,RS03AXPS,Cabled Axial Seamount Axial Base Shallow Profi...,SF03A,Shallow Profiler (SF03A),2A-CTDPFA302,CTD,RS03AXPS-SF03A-2A-CTDPFA302,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7]"
2,RS03AXPS,Cabled Axial Seamount Axial Base Shallow Profi...,PC03A,200m Platform (PC03A),4A-CTDPFA303,CTD,RS03AXPS-PC03A-4A-CTDPFA303,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7]"
3,RS03AXPD,Cabled Axial Seamount Axial Base Deep Profiler...,DP03A,Wire-Following Profiler (DP03A),01-CTDPFL304,CTD,RS03AXPD-DP03A-01-CTDPFL304,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7]"
4,RS03AXBS,Cabled Axial Seamount Axial Base Seafloor,LJ03A,Low-Power JBox (LJ03A),12-CTDPFB301,CTD,RS03AXBS-LJ03A-12-CTDPFB301,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7, 8]"


Filter the datasets for the Irminger Array datasets, which start with the prefix "GI" for Global Irminger. For the **```CTD```** datasets, we also need to remove the Mobile Asset and Profiler datasets.

In [11]:
# Identify the Global Irminger Array datasets
mask = ctd_datasets["array"].apply(lambda x: True if x.startswith("GI") else False)
ctd_datasets = ctd_datasets[mask]

# Remove datasets which are either Glider, AUV, or Profiler Mooring datasets
mask = ctd_datasets["refdes"].apply(lambda x: False if "MOAS" in x or "CTDPF" in x else True)
ctd_datasets = ctd_datasets[mask]

In [12]:
try:
    # If the reference designators where previously identified and downloaded
    metbk_datasets = pd.read_csv("../data/metbk_datasets.csv")
except:
    # Search for PCO2W datasets, asking for English names
    metbk_datasets = OOINet.search_datasets(instrument="METBK", English_names=True)

    # Save the datasets locally to speed up future runs
    metbk_datasets.to_csv("../data/metbk_datasets.csv", index=False)

# Print out the head
metbk_datasets.head()

Unnamed: 0,array,array_name,node,node_name,instrument,instrument_name,refdes,url,deployments
0,GS01SUMO,Global Southern Ocean Apex Surface Mooring,SBD12,Surface Buoy,06-METBKA000,Bulk Meteorology Instrument Package,GS01SUMO-SBD12-06-METBKA000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4]"
1,GS01SUMO,Global Southern Ocean Apex Surface Mooring,SBD11,Surface Buoy,06-METBKA000,Bulk Meteorology Instrument Package,GS01SUMO-SBD11-06-METBKA000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4]"
2,GI01SUMO,Global Irminger Sea Apex Surface Mooring,SBD12,Surface Buoy,06-METBKA000,Bulk Meteorology Instrument Package,GI01SUMO-SBD12-06-METBKA000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7]"
3,GI01SUMO,Global Irminger Sea Apex Surface Mooring,SBD11,Surface Buoy,06-METBKA000,Bulk Meteorology Instrument Package,GI01SUMO-SBD11-06-METBKA000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7]"
4,GA01SUMO,Global Argentine Basin Apex Surface Mooring,SBD12,Surface Buoy,06-METBKA000,Bulk Meteorology Instrument Package,GA01SUMO-SBD12-06-METBKA000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3]"


Filter the datasets for the Global Irminger dataset

In [13]:
mask = metbk_datasets["array"].apply(lambda x: True if x.startswith("GI") else False)
metbk_datasets = metbk_datasets[mask]

---
## Download Datasets
Now, download the PCO2A, PHSEN, and PCO2W datasets along with their associated CTD datasets from OOINet and save locally for ease of access. We can scroll through the available datasets to identify which CTD datasets are 

### Irminger Array
* GI01SUMO: Apex Surface Mooring
    * SBD12: Surface Buoy
        * PCO2AA: pCO2 Air-Sea (refdes = GI01SUMO-SBD12-04-PCO2AA000)
        * METBKA: Bulk Meteorology Instrument Package (refdes = GI01SUMO-SBD12-06-METBKA000)
    * RID16: Near-Surface Instrument Frame
        * PCO2WB: pCO2 Water (refdes = GI01SUMO-RID16-05-PCO2WB000)
        * CTDBPF: CTD (refdes = GI01SUMO-RID16-03-CTDBPF000)
    * RII11: Mooring Riser
        * PCO2WC: pCO2 Water (40 meters) (refdes = GI01SUMO-RII11-02-PCO2WC051)
        * CTDMOQ: CTD (40 meters) (refdes = GI01SUMO-RII11-02-CTDMOQ031)
        * PCO2WC: pCO2 Water (80 meters) (refdes = GI01SUMO-RII11-02-PCO2WC052)
        * CTDBPP: CTD (80 meters) (refdes = GI01SUMO-RII11-02-CTDBPP032)
        * PCO2WC: pCO2 Water (130 meters) (refdes = GI01SUMO-RII11-02-PCO2WC053)
        * CTDBPP: CTD (130 meters) (refdes = GI01SUMO-RII11-02-CTDBPP033)
        * PHSENE: Seawater pH (20 meters) (refdes = GI01SUMO-RII11-02-PHSENE041)
        * CTDMOQ: CTD (20 meters) (refdes = GI01SUMO-RII11-02-CTDMOQ011)
        * PHSENE: Seawater pH (100 meters) (refdes = GI01SUMO-RII11-02-PHSENE042)
        * CTDMOQ: CTD (100 meters) (refdes = GI01SUMO-RII11-02-CTDMOQ013)
* GI03FLMA: Flanking Subsurface Mooring A
    * RIS01: Mooring Riser
        * PHSENF: Seawater pH (refdes = GI03FLMA-RIS01-04-PHSENF000)
        * CTDMOG: CTD (30 meters) (refdes = GI03FLMA-RIM01-02-CTDMOG040)
* GI03FLMB: Flanking Subsurface Mooring B
    * RIS01: Mooring Riser
        * PHSENF: Seawater pH (refdes = GI03FLMB-RIS01-04-PHSENF000)
        * CTDMOG: CTD (30 meters) (refdes = GI03FLMB-RIM01-02-CTDMOG060)

### Surface Buoy: PCO2A (GI01SUMO-SBD12-04-PCO2AA000)

In [209]:
refdes = "GI03FLMB-RIS01-04-PHSENF000"

In [210]:
metadata = OOINet.get_metadata(refdes)
metadata = metadata.groupby(by=["refdes","method","stream"]).agg(lambda x: pd.unique(x.values.ravel()).tolist())
metadata = metadata.reset_index()
metadata = metadata.applymap(lambda x: x[0] if len(x) == 1 else x)
metadata

Unnamed: 0,refdes,method,stream,pdId,particleKey,type,shape,units,fillValue,unsigned,count,beginTime,endTime
0,GI03FLMB-RIS01-04-PHSENF000,recovered_host,phsen_abcdef_imodem_instrument_recovered,"[PD7, PD10, PD11, PD12, PD16, PD353, PD355, PD...","[time, port_timestamp, driver_timestamp, inter...","[DOUBLE, STRING, UBYTE, UINT, SHORT, FLOAT, BYTE]","[SCALAR, ARRAY1D, FUNCTION]","[seconds since 1900-01-01, 1, seconds since 19...","[-9999999, empty, 0, -9999, -99]","[False, True]",14056,2014-09-16T18:00:00.000Z,2021-07-05T17:58:31.000Z
1,GI03FLMB-RIS01-04-PHSENF000,recovered_inst,phsen_abcdef_instrument,"[PD7, PD10, PD11, PD12, PD16, PD355, PD356, PD...","[time, port_timestamp, driver_timestamp, inter...","[DOUBLE, STRING, UBYTE, UINT, SHORT, FLOAT]","[SCALAR, ARRAY1D, FUNCTION]","[seconds since 1900-01-01, 1, seconds since 19...","[-9999999, empty, 0, -9999]","[False, True]",30344,2014-09-16T18:00:00.000Z,2021-08-18T13:58:31.000Z
2,GI03FLMB-RIS01-04-PHSENF000,recovered_inst,phsen_abcdef_metadata,"[PD7, PD10, PD11, PD12, PD16, PD355, PD356, PD...","[time, port_timestamp, driver_timestamp, inter...","[DOUBLE, STRING, UBYTE, UINT, SHORT, BYTE, INT]",SCALAR,"[seconds since 1900-01-01, 1, seconds since 19...","[-9999999, empty, 0, -9999, -99]","[False, True]",7,2014-09-16T17:59:59.000Z,2020-08-20T23:58:31.000Z
3,GI03FLMB-RIS01-04-PHSENF000,telemetered,phsen_abcdef_sio_mule_instrument,"[PD7, PD10, PD11, PD12, PD16, PD353, PD355, PD...","[time, port_timestamp, driver_timestamp, inter...","[DOUBLE, STRING, UBYTE, UINT, SHORT, FLOAT, BYTE]","[SCALAR, ARRAY1D, FUNCTION]","[seconds since 1900-01-01, 1, seconds since 19...","[-9999999, empty, 0, -9999, 9999999, -99]","[False, True]",403,2014-09-18T00:00:01.000Z,2021-09-14T00:00:48.000Z
4,GI03FLMB-RIS01-04-PHSENF000,telemetered,phsen_abcdef_sio_mule_metadata,"[PD7, PD10, PD11, PD12, PD16, PD353, PD355, PD...","[time, port_timestamp, driver_timestamp, inter...","[DOUBLE, STRING, UBYTE, UINT, SHORT, BYTE, INT]",SCALAR,"[seconds since 1900-01-01, 1, seconds since 19...","[-9999999, empty, 0, -9999, -99, 9999999]","[False, True]",7,2015-08-21T18:00:03.000Z,2021-08-18T00:00:55.000Z


For the METBK data streams, we don't want the hourly datastreams, which are computed flux products. We want the normal METBK data with the temperature, salinity, air pressure, etc.

In [211]:
datastreams = OOINet.get_datastreams(refdes)

# For METBK: Drop the hourly data streams
#mask = datastreams["stream"].apply(lambda x: True if "hourly" not in x else False)
#datastreams = datastreams[mask]
datastreams

Unnamed: 0,refdes,method,stream
0,GI03FLMB-RIS01-04-PHSENF000,recovered_host,phsen_abcdef_imodem_instrument_recovered
1,GI03FLMB-RIS01-04-PHSENF000,recovered_inst,phsen_abcdef_instrument
2,GI03FLMB-RIS01-04-PHSENF000,recovered_inst,phsen_abcdef_metadata
3,GI03FLMB-RIS01-04-PHSENF000,telemetered,phsen_abcdef_sio_mule_instrument
4,GI03FLMB-RIS01-04-PHSENF000,telemetered,phsen_abcdef_sio_mule_metadata


Download the datasets

In [212]:
def clean_catalog(catalog, stream):
    """Clean up the netCDF catalog of unwanted datasets"""
    # Parse the netCDF datasets to only get those with the datastream in its name
    datasets = []
    for dset in catalog:
        check = dset.split("/")[-1]
        if stream in check:
            datasets.append(dset)
        else:
            pass
    
    # Next, check that the netCDF datasets are not empty by getting the timestamps in the
    # datasets and checking if they are 
    catalog = datasets
    datasets = []
    for dset in catalog:
        # Get the timestamps
        timestamps = dset.split("_")[-1].replace(".nc","").split("-")
        t1, t2 = timestamps
        # Check if the timestamps are equal
        if t1 == t2:
            pass
        else:
            datasets.append(dset)
            
    return datasets

In [213]:
OOINet.REFDES = refdes

In [214]:
for ind in metadata.index:
    row = metadata.loc[ind]
    method, stream = row["method"], row["stream"]
    if "power" in stream or "blank" in stream or "metadata" in stream or "control" in stream:
        continue
    else:
        pass
    
    # Get the thredds url
    thredds_url = OOINet.get_thredds_url(refdes, method, stream, goldCopy=True)
    print(thredds_url + "\n")
    # Access the catalog
    catalog = OOINet.get_thredds_catalog(thredds_url)
    # Parse the catalog for relevant netCDF files
    catalog = OOINet.parse_catalog(catalog, exclude=["gps", "blank"])
    catalog = sorted(catalog) 
    # Clean the catalog
    catalog = clean_catalog(catalog, stream)
    
    # Open the data
    data = OOINet.load_netCDF_datasets(catalog, goldCopy=True)
    
    # Eliminate unneeded timestamps
    for var in data.variables:
        if "time" in var and var != "time":
            data = data.drop_vars(var)
            
    # Download and add annotations
    annotations = OOINet.get_annotations(refdes)
    data = OOINet.add_annotation_qc_flag(data, annotations)
    
    # Save the dataset
    saveDir = f"../data/{refdes}/"
    filename = f"{refdes}-{method}-{stream}.nc"
    data.to_netcdf(saveDir+filename, engine="h5netcdf")

https://thredds.dataexplorer.oceanobservatories.org/thredds/catalog/ooigoldcopy/public/GI03FLMB-RIS01-04-PHSENF000-recovered_host-phsen_abcdef_imodem_instrument_recovered/catalog.html

Checking and removing bad files: 
[########################################] | 100% Completed | 10.4s

Loading netCDF_files for GI03FLMB-RIS01-04-PHSENF000:
[########################################] | 100% Completed | 12.5s
https://thredds.dataexplorer.oceanobservatories.org/thredds/catalog/ooigoldcopy/public/GI03FLMB-RIS01-04-PHSENF000-recovered_inst-phsen_abcdef_instrument/catalog.html

Checking and removing bad files: 
[########################################] | 100% Completed | 15.9s

Loading netCDF_files for GI03FLMB-RIS01-04-PHSENF000:
[########################################] | 100% Completed | 18.1s
https://thredds.dataexplorer.oceanobservatories.org/thredds/catalog/ooigoldcopy/public/GI03FLMB-RIS01-04-PHSENF000-telemetered-phsen_abcdef_sio_mule_instrument/catalog.html

Checking and removing b

### Combine Datasets

Now, we need to merge the data. First, we iterate through the data variables for each dataset, identify any which are unique to a given dataset, and broadcast them to the other datasets. This step is necessary to allow the datasets to combine. Once each dataset has the same data variables, we utilize ```xr.combine_first``` to combine the datasets. We assume that the instrument record, if available, is the best and most complete dataset and utilize the telemetered and recovered_host datasets to fill in the gaps.

In [215]:
saveDir = f"../data/{refdes}/"
for file in os.listdir(saveDir):
    if file.endswith(".nc"):
        print(file)

GI03FLMB-RIS01-04-PHSENF000-recovered_host-phsen_abcdef_imodem_instrument_recovered.nc
GI03FLMB-RIS01-04-PHSENF000-telemetered-phsen_abcdef_sio_mule_instrument.nc
GI03FLMB-RIS01-04-PHSENF000-recovered_inst-phsen_abcdef_instrument.nc


In [216]:
tele_data = xr.open_dataset(saveDir + "GI03FLMB-RIS01-04-PHSENF000-telemetered-phsen_abcdef_sio_mule_instrument.nc")
host_data = xr.open_dataset(saveDir + "GI03FLMB-RIS01-04-PHSENF000-recovered_host-phsen_abcdef_imodem_instrument_recovered.nc")
inst_data = xr.open_dataset(saveDir + "GI03FLMB-RIS01-04-PHSENF000-recovered_inst-phsen_abcdef_instrument.nc")

In [217]:
tele_data = tele_data.sel(time=~tele_data.get_index("time").duplicated())
host_data = host_data.sel(time=~host_data.get_index("time").duplicated())
inst_data = inst_data.sel(time=~inst_data.get_index("time").duplicated())

In [218]:
# Need to make sure each dataset has the same variables
for var in tele_data.variables:
    if var not in host_data.variables:
        host_data[var] = tele_data[var].broadcast_like(host_data["time"])
        
for var in host_data.variables:
    if var not in tele_data.variables:
        tele_data[var] = host_data[var].broadcast_like(tele_data["time"])

In [219]:
# Merge the telemetered dataset and host_dataset
tele_host = tele_data.combine_first(host_data)


In [220]:
for var in tele_host.variables:
    if var not in inst_data.variables:
        inst_data[var] = tele_host[var].broadcast_like(inst_data["time"])

for var in inst_data.variables:
    if var not in tele_host.variables:
        tele_host[var] = inst_data[var].broadcast_like(tele_host["time"])

In [221]:
# Merge the instrument dataset with the combined telemetered-recovered_host dataset
data = inst_data.combine_first(tele_host)

#### Save the results
With the merged datasets, we can save the results locally as a netCDF file. However, some data variables contain improperly formatted datetimes and timestamps which will cause an error when saving. Generally, these data variables do not contain particularly useful information for a science-user and can be dropped before saving.

In [222]:
data.to_netcdf(f"../data/{refdes}_combined.nc", engine="h5netcdf")