In [1]:
import os, gc, sys
import json
import yaml
import numpy as np
import pandas as pd
import xarray as xr
import re

In [2]:
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
# Import OOI M2M tools
sys.path.append("/home/andrew/Documents/OOI-CGSN/ooinet/ooinet/")
from m2m import M2M

#### Set OOINet API access
In order access and download data from OOINet, need to have an OOINet api username and access token. Those can be found on your profile after logging in to OOINet. Your username and access token should NOT be stored in this notebook/python script (for security). It should be stored in a yaml file, kept in the same directory, named user_info.yaml.

In [4]:
# Import user info for connecting to OOINet via M2M
userinfo = yaml.load(open("../../../../QAQC_Sandbox/user_info.yaml"), Loader=yaml.FullLoader)
username = userinfo["apiname"]
token = userinfo["apikey"]

#### Connect to OOINet

In [5]:
OOINet = M2M(username, token)

---
## Datasets

Identify all of the OOI-CGSN datasets with the **```PCO2W```**, **```PCO2A```**, and **```PHSEN```** datasets that are located at the Global Irminger Array. This is done by querying OOINet and iteratively walking through all of the API endpoints. The results are saved into a csv file so this step doesn't have to be repeated each time.

Check to see if the reference designators have already been identified. If they haven't been previously downloaded, can use the ```OOINet.search_datasets``` function to search for the datasets associated with each instrument.

#### PCO2W

In [6]:
try:
    # If the reference designators where previously identified and downloaded
    pco2w_datasets = pd.read_csv("../data/pco2w_datasets.csv")
except:
    # Search for PCO2W datasets, asking for English names
    pco2w_datasets = OOINet.search_datasets(instrument="PCO2W", English_names=True)

    # Save the datasets locally to speed up future runs
    pco2w_datasets.to_csv("../data/pco2w_datasets.csv", index=False)

# Print out the head
pco2w_datasets.head()

Unnamed: 0,array,array_name,node,node_name,instrument,instrument_name,refdes,url,deployments
0,RS03AXPS,Cabled Axial Seamount Axial Base Shallow Profi...,SF03A,Shallow Profiler (SF03A),4F-PCO2WA301,pCO2 Water,RS03AXPS-SF03A-4F-PCO2WA301,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7]"
1,RS01SBPS,Cabled Continental Margin Oregon Slope Base Sh...,SF01A,Shallow Profiler (SF01A),4F-PCO2WA101,pCO2 Water,RS01SBPS-SF01A-4F-PCO2WA101,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7, 8, 9]"
2,GS01SUMO,Global Southern Ocean Apex Surface Mooring,RII11,Mooring Riser,02-PCO2WC053,pCO2 Water (130 meters),GS01SUMO-RII11-02-PCO2WC053,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4]"
3,GS01SUMO,Global Southern Ocean Apex Surface Mooring,RII11,Mooring Riser,02-PCO2WC052,pCO2 Water (80 meters),GS01SUMO-RII11-02-PCO2WC052,https://ooinet.oceanobservatories.org/api/m2m/...,"[2, 3, 4]"
4,GS01SUMO,Global Southern Ocean Apex Surface Mooring,RII11,Mooring Riser,02-PCO2WC051,pCO2 Water (40 meters),GS01SUMO-RII11-02-PCO2WC051,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4]"


#### PHSEN

In [7]:
try:
    # If the reference designators where previously identified and downloaded
    phsen_datasets = pd.read_csv("../data/phsen_datasets.csv")
except:
    # Search for PHSEN datasets, asking for full English names
    phsen_datasets = OOINet.search_datasets(instrument="PHSEN", English_names=True)

    # Save the datasets locally to speed up future runs
    phsen_datasets.to_csv("../data/phsen_datasets.csv", index=False)
    
phsen_datasets.head()

Unnamed: 0,array,array_name,node,node_name,instrument,instrument_name,refdes,url,deployments
0,RS03AXPS,Cabled Axial Seamount Axial Base Shallow Profi...,SF03A,Shallow Profiler (SF03A),2D-PHSENA301,Seawater pH,RS03AXPS-SF03A-2D-PHSENA301,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7]"
1,RS03AXPS,Cabled Axial Seamount Axial Base Shallow Profi...,PC03A,200m Platform (PC03A),4B-PHSENA302,Seawater pH,RS03AXPS-PC03A-4B-PHSENA302,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7]"
2,RS01SBPS,Cabled Continental Margin Oregon Slope Base Sh...,SF01A,Shallow Profiler (SF01A),2D-PHSENA101,Seawater pH,RS01SBPS-SF01A-2D-PHSENA101,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7, 8, 9]"
3,RS01SBPS,Cabled Continental Margin Oregon Slope Base Sh...,PC01A,200m Platform (PC01A),4B-PHSENA102,Seawater pH,RS01SBPS-PC01A-4B-PHSENA102,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7, 8]"
4,GS03FLMB,Global Southern Ocean Flanking Subsurface Moor...,RIS01,Mooring Riser,04-PHSENF000,Seawater pH,GS03FLMB-RIS01-04-PHSENF000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3]"


#### PCO2A

In [8]:
try:
    # If the reference designators where previously identified and downloaded
    pco2a_datasets = pd.read_csv("../data/pco2a_datasets.csv")
except:
    # Search for PCO2A datasets
    pco2a_datasets = OOINet.search_datasets(instrument="PCO2A", English_names=True)

    # Save the datasets locally to speed up future runs
    pco2a_datasets.to_csv("../data/pco2a_datasets.csv", index=False)
    
pco2a_datasets.head()

Unnamed: 0,array,array_name,node,node_name,instrument,instrument_name,refdes,url,deployments
0,GS01SUMO,Global Southern Ocean Apex Surface Mooring,SBD12,Surface Buoy,04-PCO2AA000,pCO2 Air-Sea,GS01SUMO-SBD12-04-PCO2AA000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4]"
1,GS01SUMO,Global Southern Ocean Apex Surface Mooring,SBD11,Surface Buoy,03-PCO2AA000,pCO2 Air-Sea,GS01SUMO-SBD11-03-PCO2AA000,https://ooinet.oceanobservatories.org/api/m2m/...,[4]
2,GI01SUMO,Global Irminger Sea Apex Surface Mooring,SBD12,Surface Buoy,04-PCO2AA000,pCO2 Air-Sea,GI01SUMO-SBD12-04-PCO2AA000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7]"
3,GA01SUMO,Global Argentine Basin Apex Surface Mooring,SBD12,Surface Buoy,04-PCO2AA000,pCO2 Air-Sea,GA01SUMO-SBD12-04-PCO2AA000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3]"
4,CP04OSSM,Coastal Pioneer Offshore Surface Mooring,SBD12,Surface Buoy,04-PCO2AA000,pCO2 Air-Sea,CP04OSSM-SBD12-04-PCO2AA000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]"


Filter the datasets for the Irminger Array datasets, which start with the prefix "GI" for Global Irminger

In [9]:
# PCO2A
mask = pco2a_datasets["array"].apply(lambda x: True if x.startswith("GI") else False)
pco2a_datasets = pco2a_datasets[mask]

# PCO2W
mask = pco2w_datasets["array"].apply(lambda x: True if x.startswith("GI") else False)
pco2w_datasets = pco2w_datasets[mask]

# PHSEN
mask = phsen_datasets["array"].apply(lambda x: True if x.startswith("GI") else False)
phsen_datasets = phsen_datasets[mask]

#### CTD & METBK
We will also need the temperature, salinity, and pressure data associated with the carbon system datasets. So we will also identify all the **```CTD```** datasets located at the Global Irminger Array as well as the **```METBK```** dataset for the surface mooring, which has the surface temperature and salinity.

In [10]:
try:
    # If the reference designators where previously identified and downloaded
    ctd_datasets = pd.read_csv("../data/ctd_datasets.csv")
except:
    # Search for PCO2W datasets, asking for English names
    ctd_datasets = OOINet.search_datasets(instrument="CTD", English_names=True)

    # Save the datasets locally to speed up future runs
    ctd_datasets.to_csv("../data/ctd_datasets.csv", index=False)

# Print out the head
ctd_datasets.head()

Unnamed: 0,array,array_name,node,node_name,instrument,instrument_name,refdes,url,deployments
0,RS03ECAL,Cabled Axial Seamount Eastern Caldera,MJ03E,Medium-Power JBox (MJ03E),12-CTDPFB306,CTD,RS03ECAL-MJ03E-12-CTDPFB306,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2]"
1,RS03AXPS,Cabled Axial Seamount Axial Base Shallow Profi...,SF03A,Shallow Profiler (SF03A),2A-CTDPFA302,CTD,RS03AXPS-SF03A-2A-CTDPFA302,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7]"
2,RS03AXPS,Cabled Axial Seamount Axial Base Shallow Profi...,PC03A,200m Platform (PC03A),4A-CTDPFA303,CTD,RS03AXPS-PC03A-4A-CTDPFA303,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7]"
3,RS03AXPD,Cabled Axial Seamount Axial Base Deep Profiler...,DP03A,Wire-Following Profiler (DP03A),01-CTDPFL304,CTD,RS03AXPD-DP03A-01-CTDPFL304,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7]"
4,RS03AXBS,Cabled Axial Seamount Axial Base Seafloor,LJ03A,Low-Power JBox (LJ03A),12-CTDPFB301,CTD,RS03AXBS-LJ03A-12-CTDPFB301,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7, 8]"


Filter the datasets for the Irminger Array datasets, which start with the prefix "GI" for Global Irminger. For the **```CTD```** datasets, we also need to remove the Mobile Asset and Profiler datasets.

In [11]:
# Identify the Global Irminger Array datasets
mask = ctd_datasets["array"].apply(lambda x: True if x.startswith("GI") else False)
ctd_datasets = ctd_datasets[mask]

# Remove datasets which are either Glider, AUV, or Profiler Mooring datasets
mask = ctd_datasets["refdes"].apply(lambda x: False if "MOAS" in x or "CTDPF" in x else True)
ctd_datasets = ctd_datasets[mask]

In [12]:
try:
    # If the reference designators where previously identified and downloaded
    metbk_datasets = pd.read_csv("../data/metbk_datasets.csv")
except:
    # Search for PCO2W datasets, asking for English names
    metbk_datasets = OOINet.search_datasets(instrument="METBK", English_names=True)

    # Save the datasets locally to speed up future runs
    metbk_datasets.to_csv("../data/metbk_datasets.csv", index=False)

# Print out the head
metbk_datasets.head()

Unnamed: 0,array,array_name,node,node_name,instrument,instrument_name,refdes,url,deployments
0,GS01SUMO,Global Southern Ocean Apex Surface Mooring,SBD12,Surface Buoy,06-METBKA000,Bulk Meteorology Instrument Package,GS01SUMO-SBD12-06-METBKA000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4]"
1,GS01SUMO,Global Southern Ocean Apex Surface Mooring,SBD11,Surface Buoy,06-METBKA000,Bulk Meteorology Instrument Package,GS01SUMO-SBD11-06-METBKA000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4]"
2,GI01SUMO,Global Irminger Sea Apex Surface Mooring,SBD12,Surface Buoy,06-METBKA000,Bulk Meteorology Instrument Package,GI01SUMO-SBD12-06-METBKA000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7]"
3,GI01SUMO,Global Irminger Sea Apex Surface Mooring,SBD11,Surface Buoy,06-METBKA000,Bulk Meteorology Instrument Package,GI01SUMO-SBD11-06-METBKA000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7]"
4,GA01SUMO,Global Argentine Basin Apex Surface Mooring,SBD12,Surface Buoy,06-METBKA000,Bulk Meteorology Instrument Package,GA01SUMO-SBD12-06-METBKA000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3]"


Filter the datasets for the Global Irminger dataset

In [13]:
mask = metbk_datasets["array"].apply(lambda x: True if x.startswith("GI") else False)
metbk_datasets = metbk_datasets[mask]

---
## Download Datasets
Now, download the PCO2A, PHSEN, and PCO2W datasets along with their associated CTD datasets from OOINet and save locally for ease of access. We can scroll through the available datasets to identify which CTD datasets are 

### Irminger Array
* GI01SUMO: Apex Surface Mooring
    * SBD12: Surface Buoy
        * PCO2AA: pCO2 Air-Sea (refdes = GI01SUMO-SBD12-04-PCO2AA000)
        * METBKA: Bulk Meteorology Instrument Package (refdes = GI01SUMO-SBD12-06-METBKA000)
    * RID16: Near-Surface Instrument Frame
        * PCO2WB: pCO2 Water (refdes = GI01SUMO-RID16-05-PCO2WB000)
        * CTDBPF: CTD (refdes = GI01SUMO-RID16-03-CTDBPF000)
    * RII11: Mooring Riser
        * PCO2WC: pCO2 Water (40 meters) (refdes = GI01SUMO-RII11-02-PCO2WC051)
        * CTDMOQ: CTD (40 meters) (refdes = GI01SUMO-RII11-02-CTDMOQ031)
        * PCO2WC: pCO2 Water (80 meters) (refdes = GI01SUMO-RII11-02-PCO2WC052)
        * CTDBPP: CTD (80 meters) (refdes = GI01SUMO-RII11-02-CTDBPP032)
        * PCO2WC: pCO2 Water (130 meters) (refdes = GI01SUMO-RII11-02-PCO2WC053)
        * CTDBPP: CTD (130 meters) (refdes = GI01SUMO-RII11-02-CTDBPP033)
        * PHSENE: Seawater pH (20 meters) (refdes = GI01SUMO-RII11-02-PHSENE041)
        * CTDMOQ: CTD (20 meters) (refdes = GI01SUMO-RII11-02-CTDMOQ011)
        * PHSENE: Seawater pH (100 meters) (refdes = GI01SUMO-RII11-02-PHSENE042)
        * CTDMOQ: CTD (100 meters) (refdes = GI01SUMO-RII11-02-CTDMOQ013)
* GI03FLMA: Flanking Subsurface Mooring A
    * RIS01: Mooring Riser
        * PHSENF: Seawater pH (refdes = GI03FLMA-RIS01-04-PHSENF000)
        * CTDMOG: CTD (30 meters) (refdes = GI03FLMA-RIM01-02-CTDMOG040)
* GI03FLMB: Flanking Subsurface Mooring B
    * RIS01: Mooring Riser
        * PHSENF: Seawater pH (refdes = GI03FLMB-RIS01-04-PHSENF000)
        * CTDMOG: CTD (30 meters) (refdes = GI03FLMB-RIM01-02-CTDMOG060)

### Surface Buoy: PCO2A (GI01SUMO-SBD12-04-PCO2AA000)

In [None]:
refdes = "GI01SUMO-SBD12-04-PCO2AA000"

In [None]:
metadata = OOINet.get_metadata(refdes)
metadata = metadata.groupby(by=["refdes","method","stream"]).agg(lambda x: pd.unique(x.values.ravel()).tolist())
metadata = metadata.reset_index()
metadata = metadata.applymap(lambda x: x[0] if len(x) == 1 else x)
metadata

For the METBK data streams, we don't want the hourly datastreams, which are computed flux products. We want the normal METBK data with the temperature, salinity, air pressure, etc.

In [None]:
datastreams = OOINet.get_datastreams(refdes)

# For METBK: Drop the hourly data streams
#mask = datastreams["stream"].apply(lambda x: True if "hourly" not in x else False)
#datastreams = datastreams[mask]
datastreams

Download the datasets

In [None]:
method = "recovered_host"
stream = "pco2a_a_dcl_instrument_water_recovered"

Get the THREDDS url

In [None]:
thredds_url = OOINet.get_thredds_url(refdes, method, stream)
thredds_url

Get the catalog for the THREDDS requested data

In [None]:
# Access the catalog
catalog = OOINet.get_thredds_catalog(thredds_url)
# Parse the catalog for relevant netCDF files
catalog = OOINet.parse_catalog(catalog, exclude=["gps", "blank"])
catalog = sorted(catalog)
catalog

In [None]:
# Parse the netCDF datasets to only get those with the 
netCDF_datasets = []
for dset in catalog:
    check = dset.split("/")[-1]
    if stream in check:
        netCDF_datasets.append(dset)
    else:
        pass
    
netCDF_datasets

In [None]:
data = load_netCDF(netCDF_datasets)
data            

In [None]:
data.to_netcdf("../data/GI01SUMO-SBD12-04-PCO2AA000-recovered_host-pco2a_a_dcl_instrument_water_recovered.nc")

In [None]:
for var in data.variables:
    if "time" in var and var != "time":
        data = data.drop_vars(var)

In [None]:
data

In [None]:
# Set up the directory to save the datasets to
savedir = f"../data/{refdes}/{method}/{stream}"
if os.path.isdir(savedir):
    pass
else:
    os.makedirs(savedir)

# Download the datasets
OOINet.download_netCDF_files(netCDF_datasets, save_dir=savedir)

# Get the annotations for the associated refdes, method, stream
annotations = OOINet.get_annotations(refdes, method=method, stream=stream)
annotations.to_csv(f"{savedir}/annotations.csv", index=False)

### Near-Surface Instrument Frame

#### PCO2WB  (GI01SUMO-RID16-05-PCO2WB000)

In [None]:
refdes = "GI01SUMO-RID16-05-PCO2WB000"

In [None]:
metadata = OOINet.get_metadata(refdes)
metadata = metadata.groupby(by=["refdes","method","stream"]).agg(lambda x: pd.unique(x.values.ravel()).tolist())
metadata = metadata.reset_index()
metadata = metadata.applymap(lambda x: x[0] if len(x) == 1 else x)
metadata

In [None]:
datastreams = OOINet.get_datastreams(refdes)
datastreams

Download the datasets

In [None]:
method = "telemetered"
stream = "pco2w_abc_dcl_instrument"

In [None]:
thredds_url = OOINet.get_thredds_url(refdes, method, stream)
thredds_url

In [None]:
# Access the catalog
catalog = OOINet.get_thredds_catalog(thredds_url)
# Parse the catalog for relevant netCDF files
catalog = OOINet.parse_catalog(catalog, exclude=["gps", "blank"])
catalog = sorted(catalog)
catalog

In [None]:
# Parse the netCDF datasets to only get those with the 
netCDF_datasets = []
for dset in catalog:
    check = dset.split("/")[-1]
    if stream in check:
        netCDF_datasets.append(dset)
    else:
        pass
    
netCDF_datasets

In [None]:
# Set up the directory to save the datasets to
savedir = f"../data/{refdes}/{method}/{stream}"
if os.path.isdir(savedir):
    pass
else:
    os.makedirs(savedir)

# Download the datasets
OOINet.download_netCDF_files(netCDF_datasets, save_dir=savedir)

# Get the annotations for the associated refdes, method, stream
annotations = OOINet.get_annotations(refdes, method=method, stream=stream)
annotations.to_csv(f"{savedir}/annotations.csv", index=False)

#### CTDBPF (GI01SUMO-RID16-03-CTDBPF000)

In [None]:
refdes = "GI01SUMO-RID16-03-CTDBPF000"

In [None]:
metadata = OOINet.get_metadata(refdes)
metadata = metadata.groupby(by=["refdes","method","stream"]).agg(lambda x: pd.unique(x.values.ravel()).tolist())
metadata = metadata.reset_index()
metadata = metadata.applymap(lambda x: x[0] if len(x) == 1 else x)
metadata

In [None]:
datastreams = OOINet.get_datastreams(refdes)
datastreams

In [None]:
method = "telemetered"
stream = "ctdbp_cdef_dcl_instrument"

In [None]:
thredds_url = OOINet.get_thredds_url(refdes, method, stream)
thredds_url

In [None]:
# Access the catalog
catalog = OOINet.get_thredds_catalog(thredds_url)
# Parse the catalog for relevant netCDF files
catalog = OOINet.parse_catalog(catalog, exclude=["gps", "blank"])
catalog = sorted(catalog)
catalog

In [None]:
# Parse the netCDF datasets to only get those with the 
netCDF_datasets = []
for dset in catalog:
    check = dset.split("/")[-1]
    if stream in check:
        netCDF_datasets.append(dset)
    else:
        pass
    
netCDF_datasets

In [None]:
# Set up the directory to save the datasets to
savedir = f"../data/{refdes}/{method}/{stream}"
if os.path.isdir(savedir):
    pass
else:
    os.makedirs(savedir)

# Download the datasets
OOINet.download_netCDF_files(netCDF_datasets, save_dir=savedir)

# Get the annotations for the associated refdes, method, stream
annotations = OOINet.get_annotations(refdes, method=method, stream=stream)
annotations.to_csv(f"{savedir}/annotations.csv", index=False)