# Example Notebook:
This notebook provides an example on how to use the OOINet download tool to perform the following functions:
* Search for datasets
* Identify desired reference designator
* Get the associated metadata for a given reference designator
* Request netCDF datasets for a reference designator
* Download the netCDF dataset to your local machine

The key parameters which the OOINet API requires is the "reference designator." A reference designator may be thought of as a type of instrument located at a fixed location and depth. For example, below use the **CP01CNSM-RID27-03-CTDBPC000**, which is the CTD located at 7 meters depth on the Pioneer Array Central Surface Mooring at approximately (latitude, longitude) of (40.14, -70.7783).

In [5]:
# This is necessary if not installed as a package
import sys
sys.path.append("../ooinet/")

In [4]:
import m2m as M2M

In [25]:
# What libraries are needed for OOINet object?
import re
import os
import time
import requests
import datetime
import numpy as np
import pandas as pd
import xarray as xr
from xml.dom import minidom
from urllib.request import urlopen
from urllib.request import urlretrieve

In [7]:
class OOINet():

    def __init__(self, USERNAME, TOKEN):

        self.username = USERNAME
        self.token = TOKEN
        self.urls = {
            'data': 'https://ooinet.oceanobservatories.org/api/m2m/12576/sensor/inv',
            'anno': 'https://ooinet.oceanobservatories.org/api/m2m/12580/anno/find',
            'vocab': 'https://ooinet.oceanobservatories.org/api/m2m/12586/vocab/inv',
            'asset': 'https://ooinet.oceanobservatories.org/api/m2m/12587',
            'deploy': 'https://ooinet.oceanobservatories.org/api/m2m/12587/events/deployment/inv',
            'preload': 'https://ooinet.oceanobservatories.org/api/m2m/12575/parameter',
            'cal': 'https://ooinet.oceanobservatories.org/api/m2m/12587/asset/cal'
        }

    def _get_api(self, url):
        """Request the given url from OOINet."""
        r = requests.get(url, auth=(self.username, self.token))
        data = r.json()
        return data

    def _ntp_seconds_to_datetime(self, ntp_seconds):
        """Convert OOINet timestamps to unix-convertable timestamps."""
        # Specify some constant needed for timestamp conversions
        ntp_epoch = datetime.datetime(1900, 1, 1)
        unix_epoch = datetime.datetime(1970, 1, 1)
        ntp_delta = (unix_epoch - ntp_epoch).total_seconds()

        return datetime.datetime.utcfromtimestamp(ntp_seconds - ntp_delta)

    def _convert_time(self, ms):
        if ms is None:
            return None
        else:
            return datetime.datetime.utcfromtimestamp(ms/1000)

    def get_metadata(self, refdes):
        """
        Get the OOI Metadata for a specific instrument specified by its
        associated reference designator.

            Args:
                refdes (str): OOINet standardized reference designator in the
                    form of <array>-<node>-<instrument>.

            Returns:
                results (pandas.DataFrame): A dataframe with the relevant
                    metadata of the given reference designator.
        """
        # First, construct the metadata request url
        array, node, instrument = refdes.split("-", 2)
        metadata_request_url = "/".join((self.urls["data"], array, node,
                                         instrument, "metadata"))

        # Request the metadata
        metadata = self._get_api(metadata_request_url)

        # Parse the metadata
        metadata = self.parse_metadata(metadata)

        # Add in the reference designator
        metadata["refdes"] = refdes

        # Return the metadata
        return metadata

    def parse_metadata(self, metadata):
        """
        Parse the metadata dictionary for an instrument returned by OOI into
        a pandas dataframe.
        """
        # Put the two keys into separate dataframes
        metadata_times = pd.DataFrame(metadata["times"])
        metadata_parameters = pd.DataFrame(metadata["parameters"])

        # Merge the two into a single dataframe
        results = metadata_parameters.merge(metadata_times, left_on="stream",
                                            right_on="stream")
        results.drop_duplicates(inplace=True)

        # Return the results
        return results

    def get_deployments(self, refdes, deploy_num="-1", results=pd.DataFrame()):
        """
        Get the deployment information for an instrument. Defaults to all
        deployments for a given instrument (reference designator) unless one is
        supplied.

        Args:
            refdes (str): The reference designator for the instrument for which
                to request deployment information.
            deploy_num (str): Optional to include a specific deployment number.
                Otherwise defaults to -1 which is all deployments.
            results (pandas.DataFrame): Optional. Useful for recursive
                applications for gathering deployment information for multiple
                instruments.

        Returns:
            results (pandas.DataFrame): A table of the deployment information
                for the given instrument (reference designator) with deployment
                number, deployed water depth, latitude, longitude, start of
                deployment, end of deployment, and cruise IDs for the
                deployment and recovery.

        """

        # First, build the request
        array, node, instrument = refdes.split("-", 2)
        deploy_url = "/".join((self.urls["deploy"], array, node, instrument,
                               deploy_num))

        # Next, get the deployments from the deploy url. The API returns a list
        # of dictionary objects with the deployment data.
        deployments = self._get_api(deploy_url)

        # Now, iterate over the deployment list and get the associated data for
        # each individual deployment
        while len(deployments) > 0:
            # Get a single deployment
            deployment = deployments.pop()

            # Process the dictionary data
            # Deployment Number
            deploymentNumber = deployment.get("deploymentNumber")

            # Location info
            location = deployment.get("location")
            depth = location["depth"]
            lat = location["latitude"]
            lon = location["longitude"]

            # Start and end times of the deployments
            startTime = self._convert_time(deployment.get("eventStartTime"))
            stopTime = self._convert_time(deployment.get("eventStopTime"))

            # Cruise IDs of the deployment and recover cruises
            deployCruiseInfo = deployment.get("deployCruiseInfo")
            recoverCruiseInfo = deployment.get("recoverCruiseInfo")
            if deployCruiseInfo is not None:
                deployID = deployCruiseInfo["uniqueCruiseIdentifier"]
            else:
                deployID = None
            if recoverCruiseInfo is not None:
                recoverID = recoverCruiseInfo["uniqueCruiseIdentifier"]
            else:
                recoverID = None

            # Put the data into a pandas dataframe
            data = np.array([[deploymentNumber, lat, lon, depth, startTime,
                            stopTime, deployID, recoverID]])
            columns = ["deploymentNumber", "latitude", "longitude", "depth",
                       "deployStart", "deployEnd", "deployCruise",
                       "recoverCruise"]
            df = pd.DataFrame(data=data, columns=columns)

            #
            results = results.append(df)

        return results

    def get_vocab(self, refdes):
        """
        Return the OOI vocabulary for a given url endpoint. The vocab results
            contains info about the reference designator, names of the

        Args:
            refdes (str): The reference designator for the instrument for which
                to request vocab information.

        Returns:
            results (pandas.DataFrame): A table of the vocab information for
                the given reference designator.

        """
        # First, construct the vocab request url
        array, node, instrument = refdes.split("-", 2)
        vocab_url = "/".join((self.urls["vocab"], array, node, instrument))

        # Next, get the vocab data
        data = self._get_api(vocab_url)

        # Put the returned vocab data into a pandas dataframe
        vocab = pd.DataFrame()
        vocab = vocab.append(data)

        # Finally, return the results
        return vocab

    def get_datasets(self, search_url, datasets=pd.DataFrame(), **kwargs):
        """Search OOINet for available datasets for a url."""
        # Check if the method is attached to the url
        flag = ("inv" == search_url.split("/")[-4])
        # inst = re.search("[0-9]{2}-[023A-Z]{6}[0-9]{3}", search_url)
        # inst = re.search("[0-9]{2}-", search_url)

        # This means you are at the end-point
        if flag is True:
            # Get the reference designator info
            array, node, instrument = search_url.split("/")[-3:]
            refdes = "-".join((array, node, instrument))

            # Get the available deployments
            deploy_url = "/".join((self.urls["deploy"], array, node,
                                  instrument))
            deployments = self._get_api(deploy_url)

            # Put the data into a dictionary
            info = pd.DataFrame(data=np.array([[array, node, instrument,
                                refdes, search_url, deployments]]),
                                columns=["array", "node", "instrument",
                                         "refdes", "url", "deployments"])
            # add the dictionary to the dataframe
            datasets = datasets.append(info, ignore_index=True)

        else:
            endpoints = self._get_api(search_url)

            while len(endpoints) > 0:

                # Get one endpoint
                new_endpoint = endpoints.pop()

                # Build the new request url
                new_search_url = "/".join((search_url, new_endpoint))

                # Get the datasets for the new given endpoint
                datasets = self.get_datasets(new_search_url, datasets)

        # Once recursion is done, return the datasets
        return datasets

    def search_datasets(self, array=None, node=None, instrument=None,
                        English_names=False):
        """
        Wrapper around get_datasets to make the construction of the
        url simpler. Eventual goal is to use this as a search tool.

            Args:
                array (str): OOI abbreviation for a particular buoy on an array
                    (e.g. Pioneer Central Surface Mooring = CP01CNSM)
                node (str): Partial or full OOI abbreviation for a node on a
                    buoy to search for (e.g. Multi-Function Node = MFD)
                instrument (str): Partial or full OOI abbreviation for a
                    particular instrument type to search for (e.g. CTD)
                English_names (bool): Set to True if the descriptive names
                    associated with the given array/node/instrument are wanted.

            Returns:
                datasets (pandas.DataFrame): A dataframe of all the OOI
                    datasets which match the given search terms. If no search
                    terms are entered, will return every dataset available in
                    OOINet (slow).
        """

        # Build the request url
        dataset_url = f'{self.urls["data"]}/{array}/{node}/{instrument}'

        # Truncate the url at the first "none"
        dataset_url = dataset_url[:dataset_url.find("None")-1]

        print(dataset_url)
        # Get the datasets
        datasets = self.get_datasets(dataset_url)

        # Now, it node is not None, can filter on that
        if node is not None:
            mask = datasets["node"].apply(lambda x: True if node
                                          in x else False)
            datasets = datasets[mask]

        # If instrument is not None
        if instrument is not None:
            mask = datasets["instrument"].apply(lambda x: True if instrument
                                                in x else False)
            datasets = datasets[mask]

        # Check if they want the English names for the associated datasets
        if English_names:
            vocab = {
                "refdes": [],
                "array_name": [],
                "node_name": [],
                "instrument_name": []
            }

            # Iterate through the given reference designators
            for refdes in datasets["refdes"]:
                # Request the vocab for the given reference designator
                refdes_vocab = self.get_vocab(refdes)

                # Check if it returns an empty dataframe - then fill with NaNs
                if len(refdes_vocab) == 0:
                    vocab["refdes"].append(refdes)
                    vocab["array_name"].append(None)
                    vocab["node_name"].append(None)
                    vocab["instrument_name"].append(
                        refdes_vocab["instrument"].iloc[0])

                # Parse the refdes-specific vocab
                vocab["refdes"].append(refdes)
                vocab["array_name"].append(refdes_vocab["tocL1"].iloc[0] + " "
                                           + refdes_vocab["tocL2"].iloc[0])
                vocab["node_name"].append(refdes_vocab["tocL3"].iloc[0])
                vocab["instrument_name"].append(
                                            refdes_vocab["instrument"].iloc[0])

            # Merge the results with the datasets
            vocab = pd.DataFrame(vocab)
            datasets = datasets.merge(vocab, left_on="refdes",
                                      right_on="refdes")
            # Sort the datasets
            columns = ["array", "array_name", "node", "node_name", "instrument",
                       "instrument_name", "refdes", "url", "deployments"]
            datasets = datasets[columns]

        return datasets

    def get_datastreams(self, refdes):
        """Retrieve methods and data streams for a reference designator."""
        # Build the url
        array, node, instrument = refdes.split("-", 2)
        method_url = "/".join((self.urls["data"], array, node, instrument))

        # Build a table linking the reference designators, methods, and data
        # streams
        stream_df = pd.DataFrame(columns=["refdes", "method", "stream"])
        methods = self._get_api(method_url)
        for method in methods:
            if "bad" in method:
                continue
            stream_url = "/".join((method_url, method))
            streams = self._get_api(stream_url)
            stream_df = stream_df.append({
                "refdes": refdes,
                "method": method,
                "stream": streams
            }, ignore_index=True)

        # Expand so that each row of the dataframe is unique
        stream_df = stream_df.explode('stream').reset_index(drop=True)

        # Return the results
        return stream_df

    def get_parameter_data_levels(self, metadata):
        """
        Get the data levels associated with the parameters for a given
        reference designator.

            Args:
                metadata (pandas.DataFrame): a dataframe which contains the
                    metadata for a given reference designator.

            Returns:
                pid_dict (dict): a dictionary with the data levels for each
                    parameter id (Pid)
        """

        pdIds = np.unique(metadata["pdId"])
        pid_dict = {}
        for pid in pdIds:
            # Build the preload url
            preload_url = "/".join((self.urls["preload"], pid.strip("PD")))
            # Query the preload data
            preload_data = self._get_api(preload_url)
            data_level = preload_data.get("data_level")
            # Update the results dictionary
            pid_dict.update({pid: data_level})

        return pid_dict

    def filter_parameter_ids(self, pdId, pid_dict):
        """Filter for processed data products."""
        # Check if pdId should be kept
        data_level = pid_dict.get(pdId)
        if data_level == 1:
            return True
        else:
            return False

    def get_thredds_url(self, refdes, method, stream, **kwargs):
        """
        Return the url for the THREDDS server for the desired dataset(s).

            Args:
                refdes (str): reference designator for the instrument
                method (str): the method (i.e. telemetered) for the given
                              reference designator
                stream (str): the stream associated with the reference
                              designator and method

            Kwargs: optional parameters to pass to OOINet API to limit the
                    results of the query
                beginDT (str): limit the data request to only data after this
                    date.
                endDT (str): limit the data request to only data before this
                    date.
                format (str): e.g. "application/netcdf" (the default)
                include_provenance (str): 'true' returns a text file with the
                    provenance information
                include_annotations (str): 'true' returns a separate text file
                    with annotations for the date range

            Returns:
                thredds_url (str): a url to the OOI Thredds server which
                    contains the desired datasets
        """
        # Build the data request url
        array, node, instrument = refdes.split("-", 2)
        data_request_url = "/".join((self.urls["data"], array, node,
                                     instrument, method, stream))

        # Ensure proper datetime format for the request
        if 'beginDT' in kwargs.keys():
            kwargs['beginDT'] = pd.to_datetime(kwargs['beginDT']).strftime(
                                '%Y-%m-%dT%H:%M:%S.%fZ')
        if 'endDT' in kwargs.keys():
            kwargs['endDT'] = pd.to_datetime(kwargs['endDT']).strftime(
                                '%Y-%m-%dT%H:%M:%S.%fZ')

        # Build the query
        params = kwargs

        # Request the data
        r = requests.get(data_request_url, params=params, auth=(self.username,
                         self.token))
        if r.status_code == 200:
            data_urls = r.json()
        else:
            print(r.reason)
            return None

        # The asynchronous data request is contained in the 'allURLs' key,
        # in which we want to find the url to the thredds server
        for d in data_urls['allURLs']:
            if 'thredds' in d:
                thredds_url = d

        return thredds_url

    def _get_elements(self, url, tag_name, attribute_name):
        """Get elements from an XML file."""
        usock = urlopen(url)
        xmldoc = minidom.parse(usock)
        usock.close()
        tags = xmldoc.getElementsByTagName(tag_name)
        attributes = []
        for tag in tags:
            attribute = tag.getAttribute(attribute_name)
            attributes.append(attribute)
        return attributes

    def get_thredds_catalog(self, thredds_url):
        """
        Get the dataset catalog for the requested data stream.

            Args:
                thredds_url (str): the THREDDS server url for the
                    requested data stream

            Returns:
                catalog (list): the THREDDS catalog of datasets for
                    the requested data stream
        """
        # ==========================================================
        # Parse out the dataset_id from the thredds url
        server_url = 'https://opendap.oceanobservatories.org/thredds/'
        dataset_id = re.findall(r'(ooi/.*)/catalog', thredds_url)[0]

        # Check the status of the request until the datasets are ready
        # Will timeout if request takes longer than 10 mins
        status_url = thredds_url + '?dataset=' + dataset_id + '/status.txt'
        status = requests.get(status_url)
        start_time = time.time()
        while status.status_code != requests.codes.ok:
            elapsed_time = time.time() - start_time
            status = requests.get(status_url)
            if elapsed_time > 10*60:
                print(f'Request time out for {thredds_url}')
                return None
            time.sleep(5)

        # Parse the datasets from the catalog for the requests url
        catalog_url = server_url + dataset_id + '/catalog.xml'
        catalog = self._get_elements(catalog_url, 'dataset', 'urlPath')

        return catalog

    def parse_catalog(self, catalog, exclude=[]):
        """
        Parses the THREDDS catalog for the netCDF files. The exclude
        argument takes in a list of strings to check a given catalog
        item against and, if in the item, not return it.

        Args:
            catalog (list): the THREDDS catalog of datasets for
                the requested data stream
            exclude (list): keywords to filter files out of the THEDDS catalog

        Returns:
            datasets (list): a list of netCDF datasets which contain the
                associated .nc datasets
        """
        datasets = [citem for citem in catalog if citem.endswith('.nc')]
        if type(exclude) is not list:
            raise ValueError('arg exclude must be a list')
        for ex in exclude:
            if type(ex) is not str:
                raise ValueError(f'Element {ex} of exclude must be a string.')
            datasets = [dset for dset in datasets if ex not in dset]
        return datasets

    def download_netCDF_files(self, datasets, save_dir=None):
        """
        Download netCDF files for given netCDF datasets. If no path
        is specified for the save directory, will download the files to
        the current working directory.

            Args:
                datasets (list): the netCDF datasets to download
                save_dir (str): the path to the directory in which to save
                    the downloaded netCDF files
        """
        # Specify the server url
        server_url = 'https://opendap.oceanobservatories.org/thredds/'

        # Specify and make the relevant save directory
        if save_dir is not None:
            # Make the save directory if it doesn't exists
            if not os.path.exists(save_dir):
                os.makedirs(save_dir)
        else:
            save_dir = os.getcwd()

        # Download and save the netCDF files from the HTTPServer
        # to the save directory
        count = 0
        for dset in datasets:
            # Check that the datasets are netCDF
            if not dset.endswith('.nc'):
                raise ValueError(f'Dataset {dset} not netCDF.')
            count += 1
            file_url = server_url + 'fileServer/' + dset
            filename = file_url.split('/')[-1]
            print(f'Downloading file {count} of {len(datasets)}: {dset} \n')
            a = urlretrieve(file_url, '/'.join((save_dir, filename)))

    def load_netCDF_files(self, netCDF_datasets):
        """Open the netCDF files directly from the THREDDS opendap server."""
        # Get the OpenDAP server
        opendap_url = "https://opendap.oceanobservatories.org/thredds/dodsC"

        # Add the OpenDAP url to the netCDF dataset names
        netCDF_datasets = ["/".join((opendap_url, dset)) for dset in
                           netCDF_datasets]

        # Note: latest version of xarray and netcdf-c libraries enforce strict
        # fillvalue match, which causes an error with the implement OpenDAP
        # data mapping. Requires appending #fillmismatch to open the data
        netCDF_datasets = [dset+"#fillmismatch" for dset in netCDF_datasets]

        # Open the datasets into an xarray dataset, make time the main
        # dimension, and sort
        with xr.open_mfdataset(netCDF_datasets) as ds:
            ds = ds.swap_dims({"obs": "time"})
            ds = ds.sortby("time")

        # Add in the English name of the dataset
        refdes = "-".join(ds.attrs["id"].split("-")[:4])
        vocab = self.get_vocab(refdes)
        ds.attrs["Location_name"] = " ".join((vocab["tocL1"].iloc[0],
                                              vocab["tocL2"].iloc[0],
                                              vocab["tocL3"].iloc[0]))
        # Return the dataset
        return ds

## Initialize the Tool
In order to utilize the OOINet tool, it needs to be initialized with your OOINet **username** and **token**. These may be found by logging onto ooinet.oceanobservatories.org and looking under your profile. If you have not registered with OOI, you cannot query OOINet via M2M, since it require authentication.

Personally, I store my OOI username and password locally in a yaml file which is excluded from git tracking. 

In [8]:
import yaml
import warnings
warnings.filterwarnings("ignore")
# Import user info for accessing UFrame
userinfo = yaml.load(open('../user_info.yaml'))
username = userinfo['apiname']
token = userinfo['apikey']

In [9]:
OOI = OOINet(username, token)

In [10]:
OOI.username, OOI.token

('OOIAPI-C9OSZAQABG1H3U', 'JA48WUQVG7F')

---
## Search Datasets
First, we can search the available OOI Reference Designators (i.e. "refdes" for short) on the following keys: **array**, **node**, **instrument**. Additionally, can request for "**English_names**", which will return the descriptive name for the associated array, node, and instrument. Below, we will search for the available CTD instruments on the Pioneer Array Central Surface Mooring.

The major caveat with the search is, similar to searching on ERDDAP datasets, the search terms must be partial or full match based on OOI nomenclature. For example, we have to search for "CTD", "CTDBP", or the full instrument name "03-CTDBPC000". We can't search "conductivity", "temperature" or other CTD-related instrument terms.

In [11]:
datasets = OOI.search_datasets(array="CP01CNSM", instrument="CTD", English_names=True)
datasets

https://ooinet.oceanobservatories.org/api/m2m/12576/sensor/inv/CP01CNSM


Unnamed: 0,array,array_name,node,node_name,instrument,instrument_name,refdes,url,deployments
0,CP01CNSM,Coastal Pioneer Central Surface Mooring,RID27,Near Surface Instrument Frame,03-CTDBPC000,CTD,CP01CNSM-RID27-03-CTDBPC000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]"
1,CP01CNSM,Coastal Pioneer Central Surface Mooring,MFD37,Seafloor Multi-Function Node (MFN),03-CTDBPD000,CTD,CP01CNSM-MFD37-03-CTDBPD000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12]"


From the above datasets, we're going to select the CTDBP instrument on the Pioneer Array Central Surface Mooring Near-Surface Instrument Frame (located at 7m depth), which has a reference designator **CP01CNSM-RID27-03-CTDBPC000**. 

In [12]:
refdes = "CP01CNSM-RID27-03-CTDBPC000"

---
## Metadata
Next, we can query OOINet for the metadata associated with the selected reference designator. The metadata contains such valuable information such as the available methods and streams (which are required to download the data), the particleKeys (the data variable names), and the associated units. 

In [14]:
metadata = OOI.get_metadata(refdes=refdes)
metadata

Unnamed: 0,type,stream,units,fillValue,pdId,particleKey,shape,unsigned,method,count,endTime,beginTime,refdes
0,FLOAT,ctdbp_cdef_dcl_instrument,S m-1,-9999999,PD1,conductivity,SCALAR,False,telemetered,1862862,2020-08-03T16:03:10.348Z,2013-11-21T18:16:00.000Z,CP01CNSM-RID27-03-CTDBPC000
1,FLOAT,ctdbp_cdef_dcl_instrument,dbar,-9999999,PD2,pressure,SCALAR,False,telemetered,1862862,2020-08-03T16:03:10.348Z,2013-11-21T18:16:00.000Z,CP01CNSM-RID27-03-CTDBPC000
2,FLOAT,ctdbp_cdef_dcl_instrument,ºC,-9999999,PD6,temp,SCALAR,False,telemetered,1862862,2020-08-03T16:03:10.348Z,2013-11-21T18:16:00.000Z,CP01CNSM-RID27-03-CTDBPC000
3,DOUBLE,ctdbp_cdef_dcl_instrument,seconds since 1900-01-01,-9999999,PD7,time,SCALAR,False,telemetered,1862862,2020-08-03T16:03:10.348Z,2013-11-21T18:16:00.000Z,CP01CNSM-RID27-03-CTDBPC000
4,DOUBLE,ctdbp_cdef_dcl_instrument,seconds since 1900-01-01,-9999999,PD10,port_timestamp,SCALAR,False,telemetered,1862862,2020-08-03T16:03:10.348Z,2013-11-21T18:16:00.000Z,CP01CNSM-RID27-03-CTDBPC000
5,DOUBLE,ctdbp_cdef_dcl_instrument,seconds since 1900-01-01,-9999999,PD11,driver_timestamp,SCALAR,False,telemetered,1862862,2020-08-03T16:03:10.348Z,2013-11-21T18:16:00.000Z,CP01CNSM-RID27-03-CTDBPC000
6,DOUBLE,ctdbp_cdef_dcl_instrument,seconds since 1900-01-01,-9999999,PD12,internal_timestamp,SCALAR,False,telemetered,1862862,2020-08-03T16:03:10.348Z,2013-11-21T18:16:00.000Z,CP01CNSM-RID27-03-CTDBPC000
7,STRING,ctdbp_cdef_dcl_instrument,1,empty,PD16,preferred_timestamp,SCALAR,False,telemetered,1862862,2020-08-03T16:03:10.348Z,2013-11-21T18:16:00.000Z,CP01CNSM-RID27-03-CTDBPC000
8,STRING,ctdbp_cdef_dcl_instrument,1,empty,PD93,date_time_string,SCALAR,False,telemetered,1862862,2020-08-03T16:03:10.348Z,2013-11-21T18:16:00.000Z,CP01CNSM-RID27-03-CTDBPC000
9,DOUBLE,ctdbp_cdef_dcl_instrument,seconds since 1900-01-01,-9999,PD863,ingestion_timestamp,SCALAR,False,telemetered,1862862,2020-08-03T16:03:10.348Z,2013-11-21T18:16:00.000Z,CP01CNSM-RID27-03-CTDBPC000


---
## Deployment Information
When we searched for CTD datasets on the Pioneer Central Surface Mooring, it returned a table which listed the available deployment numbers for each of the datasets. We can get much more detailed information on the deployments for a particular reference designator by requesting the deployment information from OOINet.

In [15]:
deployments = OOI.get_deployments(refdes=refdes)
deployments

Unnamed: 0,deploymentNumber,latitude,longitude,depth,deployStart,deployEnd,deployCruise,recoverCruise
0,12,40.1332,-70.7783,7,2019-09-27 18:30:00,,AR-39,
0,11,40.1401,-70.7714,7,2019-04-06 14:35:00,2019-09-26 17:15:00,AR-34,AR-39
0,10,40.1334,-70.7777,7,2018-10-30 01:48:00,2019-04-07 18:08:00,AR-31,AR-34
0,9,40.1397,-70.7713,7,2018-03-24 21:32:00,2018-10-29 12:31:00,AR-28,AR-31
0,8,40.1334,-70.7783,7,2017-10-29 14:15:00,2018-03-29 19:37:00,AR-24,AR-28
0,7,40.1398,-70.7712,7,2017-06-09 14:24:00,2017-11-01 20:33:00,AR-18,AR-24
0,6,40.1334,-70.7785,7,2016-10-13 18:36:00,2017-06-09 16:05:00,AR-08,AR-18
0,5,40.1404,-70.7713,7,2016-05-13 13:50:00,2016-10-13 19:34:00,AR-04,AR-08
0,4,40.1332,-70.7784,7,2015-10-23 18:49:00,2016-04-04 12:03:00,AT-31,AR1-07
0,3,40.1402,-70.7713,7,2015-05-07 17:34:00,2015-10-23 19:40:00,AT-27,AT-31


---
## Vocab Information
Additionally, if we are interested in more detailed information on the location that the reference designator is assigned to, we can request the vocab information for the given reference designator. The vocab information includes some of the "**English_names**" info we requested when searching for datasets, as well as instrument model, manufacturer, and the descriptive names for the reference designator location.

In [16]:
vocab = OOI.get_vocab(refdes=refdes)
vocab

Unnamed: 0,@class,model,vocabId,refdes,instrument,tocL1,tocL2,tocL3,mindepth,maxdepth,manufacturer
0,.VocabRecord,SBE 16plusV2,472,CP01CNSM-RID27-03-CTDBPC000,CTD,Coastal Pioneer,Central Surface Mooring,Near Surface Instrument Frame,7.0,7.0,Sea-Bird


---
## Download Datasets
The ultimate goal of the queries above were to identify what data streams(s) we are interested in requesting data from to download. Now we want to be able to request those data streams and get the associated netCDF files. This process involves the following steps:
1. Identify the methods and data streams for the selected reference designator
2. Request the THREDDS server url for the data sets
3. Get the catalog of datasets on the THREDDS server
4. Parse the catalog for the desired netCDF files
5. Download the identified netCDF files to a local directory

**1.** Get the methods and data streams associated with the given reference designator:

In [17]:
streams = OOI.get_datastreams(refdes)
streams

Unnamed: 0,refdes,method,stream
0,CP01CNSM-RID27-03-CTDBPC000,recovered_host,ctdbp_cdef_dcl_instrument_recovered
1,CP01CNSM-RID27-03-CTDBPC000,recovered_inst,ctdbp_cdef_instrument_recovered
2,CP01CNSM-RID27-03-CTDBPC000,telemetered,ctdbp_cdef_dcl_instrument


**2.** Now, we request the THREDDS server url from OOINet. At a minimum, this requires the reference designator, method, and stream as inputs. This will request the datasets for _all_ deployments.

If we want to further limit the request to a specific deployment or a specific time period, we can do that by passing the arguments **beginDT** (begin datetime) and **endDT** (end datetime). 

Additionally, we can input some optional arguments that will return diagnostic information. The **include_provenance** will return a separate text file with information on the provenance of the data, such as the calibration coefficients applied. The **include_annotations** returns a separate text file of annotations, which are descriptions of issues and information associated with the given dataset.

In [19]:
method = "recovered_inst"
stream = "ctdbp_cdef_instrument_recovered"

In [20]:
thredds_url = OOI.get_thredds_url(refdes, method, stream)
thredds_url

'https://opendap.oceanobservatories.org/thredds/catalog/ooi/areed@whoi.edu/20200803T183219273Z-CP01CNSM-RID27-03-CTDBPC000-recovered_inst-ctdbp_cdef_instrument_recovered/catalog.html'

**3.** With the appropriate THREDDS url, we can query the THREDDS catalog to identify:

In [21]:
catalog = OOI.get_thredds_catalog(thredds_url)
catalog

['',
 'ooi/areed@whoi.edu/20200803T183219273Z-CP01CNSM-RID27-03-CTDBPC000-recovered_inst-ctdbp_cdef_instrument_recovered/status.txt',
 'ooi/areed@whoi.edu/20200803T183219273Z-CP01CNSM-RID27-03-CTDBPC000-recovered_inst-ctdbp_cdef_instrument_recovered/status.json',
 'ooi/areed@whoi.edu/20200803T183219273Z-CP01CNSM-RID27-03-CTDBPC000-recovered_inst-ctdbp_cdef_instrument_recovered/deployment0011_CP01CNSM-RID27-03-CTDBPC000-recovered_inst-ctdbp_cdef_instrument_recovered_aggregate_provenance.json',
 'ooi/areed@whoi.edu/20200803T183219273Z-CP01CNSM-RID27-03-CTDBPC000-recovered_inst-ctdbp_cdef_instrument_recovered/deployment0011_CP01CNSM-RID27-03-CTDBPC000-recovered_inst-ctdbp_cdef_instrument_recovered_20190406T144517-20190926T161808.nc',
 'ooi/areed@whoi.edu/20200803T183219273Z-CP01CNSM-RID27-03-CTDBPC000-recovered_inst-ctdbp_cdef_instrument_recovered/deployment0011_CP01CNSM-RID27-03-CTDBPC000-recovered_inst-ctdbp_cdef_instrument_recovered.ncml',
 'ooi/areed@whoi.edu/20200803T183219273Z-CP01C

**4.** Next, we want to parse the THREDDS catalog for the netCDF datasets which contain the relevant data. We can pass to the **exclude** keyword netCDF datasets which may be delivered as part of the THREDDS request, but which we aren't interested in. For example, Glider datasets frequently returned engineering (ENG) datastreams which contain engineering files but no relevant data.

In [22]:
netCDF_datasets = OOI.parse_catalog(catalog, exclude=["ENG", "gps"])
netCDF_datasets

['ooi/areed@whoi.edu/20200803T183219273Z-CP01CNSM-RID27-03-CTDBPC000-recovered_inst-ctdbp_cdef_instrument_recovered/deployment0011_CP01CNSM-RID27-03-CTDBPC000-recovered_inst-ctdbp_cdef_instrument_recovered_20190406T144517-20190926T161808.nc',
 'ooi/areed@whoi.edu/20200803T183219273Z-CP01CNSM-RID27-03-CTDBPC000-recovered_inst-ctdbp_cdef_instrument_recovered/deployment0008_CP01CNSM-RID27-03-CTDBPC000-recovered_inst-ctdbp_cdef_instrument_recovered_20171029T141519-20180329T190320.nc',
 'ooi/areed@whoi.edu/20200803T183219273Z-CP01CNSM-RID27-03-CTDBPC000-recovered_inst-ctdbp_cdef_instrument_recovered/deployment0007_CP01CNSM-RID27-03-CTDBPC000-recovered_inst-ctdbp_cdef_instrument_recovered_20170609T142931-20171101T202931.nc',
 'ooi/areed@whoi.edu/20200803T183219273Z-CP01CNSM-RID27-03-CTDBPC000-recovered_inst-ctdbp_cdef_instrument_recovered/deployment0006_CP01CNSM-RID27-03-CTDBPC000-recovered_inst-ctdbp_cdef_instrument_recovered_20161013T183731-20161122T190001.nc',
 'ooi/areed@whoi.edu/2020080

**5.** Finally, we can download the netCDF files to a specified save directory (**save_dir**):

In [23]:
save_dir = "/home/andrew/Documents/OOI-CGSN/QAQC_Sandbox/OOI_Download_Tool/Testing/"
OOI.download_netCDF_files(netCDF_datasets, save_dir=save_dir)

Downloading file 1 of 7: ooi/areed@whoi.edu/20200803T183219273Z-CP01CNSM-RID27-03-CTDBPC000-recovered_inst-ctdbp_cdef_instrument_recovered/deployment0011_CP01CNSM-RID27-03-CTDBPC000-recovered_inst-ctdbp_cdef_instrument_recovered_20190406T144517-20190926T161808.nc 

Downloading file 2 of 7: ooi/areed@whoi.edu/20200803T183219273Z-CP01CNSM-RID27-03-CTDBPC000-recovered_inst-ctdbp_cdef_instrument_recovered/deployment0008_CP01CNSM-RID27-03-CTDBPC000-recovered_inst-ctdbp_cdef_instrument_recovered_20171029T141519-20180329T190320.nc 

Downloading file 3 of 7: ooi/areed@whoi.edu/20200803T183219273Z-CP01CNSM-RID27-03-CTDBPC000-recovered_inst-ctdbp_cdef_instrument_recovered/deployment0007_CP01CNSM-RID27-03-CTDBPC000-recovered_inst-ctdbp_cdef_instrument_recovered_20170609T142931-20171101T202931.nc 

Downloading file 4 of 7: ooi/areed@whoi.edu/20200803T183219273Z-CP01CNSM-RID27-03-CTDBPC000-recovered_inst-ctdbp_cdef_instrument_recovered/deployment0006_CP01CNSM-RID27-03-CTDBPC000-recovered_inst-ctdbp

## Open netCDF Datasets
Instead of downloading netCDF datasets to a local directory, another option is to load the netCDF datasets right from the THREDDS OpenDAP server into an xarray dataset.

In [26]:
ctd = OOI.load_netCDF_files(netCDF_datasets)

In [27]:
ctd

Unnamed: 0,Array,Chunk
Bytes,9.41 MB,2.40 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 9.41 MB 2.40 MB Shape (1176060,) (299491,) Count 670 Tasks 649 Chunks Type float64 numpy.ndarray",1176060  1,

Unnamed: 0,Array,Chunk
Bytes,9.41 MB,2.40 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.70 MB,1.20 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 4.70 MB 1.20 MB Shape (1176060,) (299491,) Count 670 Tasks 649 Chunks Type float32 numpy.ndarray",1176060  1,

Unnamed: 0,Array,Chunk
Bytes,4.70 MB,1.20 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.41 MB,2.40 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 9.41 MB 2.40 MB Shape (1176060,) (299491,) Count 670 Tasks 649 Chunks Type float64 numpy.ndarray",1176060  1,

Unnamed: 0,Array,Chunk
Bytes,9.41 MB,2.40 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.41 MB,2.40 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 9.41 MB 2.40 MB Shape (1176060,) (299491,) Count 670 Tasks 649 Chunks Type float64 numpy.ndarray",1176060  1,

Unnamed: 0,Array,Chunk
Bytes,9.41 MB,2.40 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.41 MB,2.40 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,datetime64[ns],numpy.ndarray
"Array Chunk Bytes 9.41 MB 2.40 MB Shape (1176060,) (299491,) Count 670 Tasks 649 Chunks Type datetime64[ns] numpy.ndarray",1176060  1,

Unnamed: 0,Array,Chunk
Bytes,9.41 MB,2.40 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,datetime64[ns],numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.70 MB,1.20 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 4.70 MB 1.20 MB Shape (1176060,) (299491,) Count 670 Tasks 649 Chunks Type float32 numpy.ndarray",1176060  1,

Unnamed: 0,Array,Chunk
Bytes,4.70 MB,1.20 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.41 MB,2.40 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 9.41 MB 2.40 MB Shape (1176060,) (299491,) Count 670 Tasks 649 Chunks Type float64 numpy.ndarray",1176060  1,

Unnamed: 0,Array,Chunk
Bytes,9.41 MB,2.40 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,75.27 MB,19.17 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,|S64,numpy.ndarray
"Array Chunk Bytes 75.27 MB 19.17 MB Shape (1176060,) (299491,) Count 670 Tasks 649 Chunks Type |S64 numpy.ndarray",1176060  1,

Unnamed: 0,Array,Chunk
Bytes,75.27 MB,19.17 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,|S64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.41 MB,2.40 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 9.41 MB 2.40 MB Shape (1176060,) (299491,) Count 670 Tasks 649 Chunks Type float64 numpy.ndarray",1176060  1,

Unnamed: 0,Array,Chunk
Bytes,9.41 MB,2.40 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.41 MB,2.40 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,datetime64[ns],numpy.ndarray
"Array Chunk Bytes 9.41 MB 2.40 MB Shape (1176060,) (299491,) Count 670 Tasks 649 Chunks Type datetime64[ns] numpy.ndarray",1176060  1,

Unnamed: 0,Array,Chunk
Bytes,9.41 MB,2.40 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,datetime64[ns],numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.70 MB,1.20 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 4.70 MB 1.20 MB Shape (1176060,) (299491,) Count 670 Tasks 649 Chunks Type float32 numpy.ndarray",1176060  1,

Unnamed: 0,Array,Chunk
Bytes,4.70 MB,1.20 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.41 MB,2.40 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 9.41 MB 2.40 MB Shape (1176060,) (299491,) Count 670 Tasks 649 Chunks Type float64 numpy.ndarray",1176060  1,

Unnamed: 0,Array,Chunk
Bytes,9.41 MB,2.40 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.41 MB,2.40 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 9.41 MB 2.40 MB Shape (1176060,) (299491,) Count 670 Tasks 649 Chunks Type float64 numpy.ndarray",1176060  1,

Unnamed: 0,Array,Chunk
Bytes,9.41 MB,2.40 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,75.27 MB,19.17 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,|S64,numpy.ndarray
"Array Chunk Bytes 75.27 MB 19.17 MB Shape (1176060,) (299491,) Count 670 Tasks 649 Chunks Type |S64 numpy.ndarray",1176060  1,

Unnamed: 0,Array,Chunk
Bytes,75.27 MB,19.17 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,|S64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.70 MB,1.20 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 4.70 MB 1.20 MB Shape (1176060,) (299491,) Count 670 Tasks 649 Chunks Type float32 numpy.ndarray",1176060  1,

Unnamed: 0,Array,Chunk
Bytes,4.70 MB,1.20 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.70 MB,1.20 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 4.70 MB 1.20 MB Shape (1176060,) (299491,) Count 670 Tasks 649 Chunks Type float32 numpy.ndarray",1176060  1,

Unnamed: 0,Array,Chunk
Bytes,4.70 MB,1.20 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.41 MB,2.40 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 9.41 MB 2.40 MB Shape (1176060,) (299491,) Count 670 Tasks 649 Chunks Type float64 numpy.ndarray",1176060  1,

Unnamed: 0,Array,Chunk
Bytes,9.41 MB,2.40 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.41 MB,2.40 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,datetime64[ns],numpy.ndarray
"Array Chunk Bytes 9.41 MB 2.40 MB Shape (1176060,) (299491,) Count 670 Tasks 649 Chunks Type datetime64[ns] numpy.ndarray",1176060  1,

Unnamed: 0,Array,Chunk
Bytes,9.41 MB,2.40 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,datetime64[ns],numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.70 MB,1.20 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 4.70 MB 1.20 MB Shape (1176060,) (299491,) Count 670 Tasks 649 Chunks Type float32 numpy.ndarray",1176060  1,

Unnamed: 0,Array,Chunk
Bytes,4.70 MB,1.20 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.70 MB,1.20 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 4.70 MB 1.20 MB Shape (1176060,) (299491,) Count 670 Tasks 649 Chunks Type float32 numpy.ndarray",1176060  1,

Unnamed: 0,Array,Chunk
Bytes,4.70 MB,1.20 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.41 MB,2.40 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,datetime64[ns],numpy.ndarray
"Array Chunk Bytes 9.41 MB 2.40 MB Shape (1176060,) (299491,) Count 670 Tasks 649 Chunks Type datetime64[ns] numpy.ndarray",1176060  1,

Unnamed: 0,Array,Chunk
Bytes,9.41 MB,2.40 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,datetime64[ns],numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.41 MB,2.40 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,datetime64[ns],numpy.ndarray
"Array Chunk Bytes 9.41 MB 2.40 MB Shape (1176060,) (299491,) Count 670 Tasks 649 Chunks Type datetime64[ns] numpy.ndarray",1176060  1,

Unnamed: 0,Array,Chunk
Bytes,9.41 MB,2.40 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,datetime64[ns],numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.41 MB,2.40 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 9.41 MB 2.40 MB Shape (1176060,) (299491,) Count 670 Tasks 649 Chunks Type float64 numpy.ndarray",1176060  1,

Unnamed: 0,Array,Chunk
Bytes,9.41 MB,2.40 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.70 MB,1.20 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 4.70 MB 1.20 MB Shape (1176060,) (299491,) Count 670 Tasks 649 Chunks Type float32 numpy.ndarray",1176060  1,

Unnamed: 0,Array,Chunk
Bytes,4.70 MB,1.20 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.70 MB,1.20 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 4.70 MB 1.20 MB Shape (1176060,) (299491,) Count 670 Tasks 649 Chunks Type float32 numpy.ndarray",1176060  1,

Unnamed: 0,Array,Chunk
Bytes,4.70 MB,1.20 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.70 MB,1.20 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 4.70 MB 1.20 MB Shape (1176060,) (299491,) Count 670 Tasks 649 Chunks Type float32 numpy.ndarray",1176060  1,

Unnamed: 0,Array,Chunk
Bytes,4.70 MB,1.20 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.70 MB,1.20 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,int32,numpy.ndarray
"Array Chunk Bytes 4.70 MB 1.20 MB Shape (1176060,) (299491,) Count 670 Tasks 649 Chunks Type int32 numpy.ndarray",1176060  1,

Unnamed: 0,Array,Chunk
Bytes,4.70 MB,1.20 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,int32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.70 MB,1.20 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 4.70 MB 1.20 MB Shape (1176060,) (299491,) Count 670 Tasks 649 Chunks Type float32 numpy.ndarray",1176060  1,

Unnamed: 0,Array,Chunk
Bytes,4.70 MB,1.20 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.70 MB,1.20 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 4.70 MB 1.20 MB Shape (1176060,) (299491,) Count 670 Tasks 649 Chunks Type float32 numpy.ndarray",1176060  1,

Unnamed: 0,Array,Chunk
Bytes,4.70 MB,1.20 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.70 MB,1.20 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 4.70 MB 1.20 MB Shape (1176060,) (299491,) Count 670 Tasks 649 Chunks Type float32 numpy.ndarray",1176060  1,

Unnamed: 0,Array,Chunk
Bytes,4.70 MB,1.20 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.41 MB,2.40 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,object,numpy.ndarray
"Array Chunk Bytes 9.41 MB 2.40 MB Shape (1176060,) (299491,) Count 670 Tasks 649 Chunks Type object numpy.ndarray",1176060  1,

Unnamed: 0,Array,Chunk
Bytes,9.41 MB,2.40 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,object,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.70 MB,1.20 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 4.70 MB 1.20 MB Shape (1176060,) (299491,) Count 670 Tasks 649 Chunks Type float32 numpy.ndarray",1176060  1,

Unnamed: 0,Array,Chunk
Bytes,4.70 MB,1.20 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.70 MB,1.20 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 4.70 MB 1.20 MB Shape (1176060,) (299491,) Count 670 Tasks 649 Chunks Type float32 numpy.ndarray",1176060  1,

Unnamed: 0,Array,Chunk
Bytes,4.70 MB,1.20 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.70 MB,1.20 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 4.70 MB 1.20 MB Shape (1176060,) (299491,) Count 670 Tasks 649 Chunks Type float32 numpy.ndarray",1176060  1,

Unnamed: 0,Array,Chunk
Bytes,4.70 MB,1.20 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.70 MB,1.20 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 4.70 MB 1.20 MB Shape (1176060,) (299491,) Count 670 Tasks 649 Chunks Type float32 numpy.ndarray",1176060  1,

Unnamed: 0,Array,Chunk
Bytes,4.70 MB,1.20 MB
Shape,"(1176060,)","(299491,)"
Count,670 Tasks,649 Chunks
Type,float32,numpy.ndarray


Add some basic plotting tools for working with datasets

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
np.unique(ds.deployment)

In [None]:
D11 = ds.where(ds.deployment==11, drop=True)
D11

In [None]:
# First, lets just p
title = D11.attrs["id"]
title

In [None]:
D11

In [None]:
x = D11.time
y = D11.practical_salinity

In [None]:
D11.practical_salinity.attrs

In [None]:
# Calculate limits for plotting
ymean = np.mean(y.values)
ystd = np.std(y.values)
ymin, ymax = ymean-5*ystd, ymean+5*ystd
ymin, ymax

In [None]:
fig, ax = plt.subplots(figsize=(12,8))
ax.scatter(x, y, s=2)

# Add in labels
ax.set_ylabel(y.attrs["standard_name"], fontsize=12)
ax.set_xlabel(x.attrs["standard_name"], fontsize=12)
ax.set_title(D11.attrs["id"], fontsize=12)

# # Add in grid
ax.grid()

# Check if need to autofmt the limit
ax_ymin, ax_ymax = ax.get_ylim()
if ax_ymin < ymin:
    ax.set_ylim(bottom=ymin)
if ax_ymax > ymax:
    ax.set_ylim(top=ymax)

# Check if should format for time
if x.attrs["standard_name"] == "time":
    fig.autofmt_xdate()


In [None]:
ax.get_ylim()