In [1]:
import itertools

import cartopy.crs as ccrs
import cartopy.feature as cfeature
import glob2 as glob
import matplotlib.colors as mcolors
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import numpy as np
import pandas as pd
import validators
import xarray as xr
from cartopy.mpl.gridliner import LATITUDE_FORMATTER, LONGITUDE_FORMATTER
from erddapy import ERDDAP
from siphon.catalog import TDSCatalog

In [None]:
def load_moana_bb(
        source: str = "http://131.203.52.77:6443/thredds/catalog/moana/NZB/moana_backbone_v2.2/avg/catalog.html",
        start_date: np.datetime64 = start_date,
        end_date: np.datetime64 = end_date,
) -> tuple[pd.DataFrame, dict, list]:
    """Loads public Mangōpare data from the Moana Project THREDDS server,
    or local directory, between start_date and end_date.  Calculates statistics 
    including the number of measurements, max depth, and duration of each
    deployment. 

    Parameters
    ----------
    source : str, optional
        THREDDS server url, by default "http://131.203.52.77:6443/thredds/catalog/moana/NZB/moana_backbone_v2.2/avg/catalog.html"
        or directory to find files in, e.g., '/path_to_files/*.nc'
    start_date : np.datetime64, optional
        Start of desired date range, by default start_date
    end_date : np.datetime64, optional
        End of desired date range, by default end_date

    Returns
    -------
    tuple[pd.DataFrame, dict, list]
        Returns a dataframe of the initial latitude, longitude,
        and time of each deployment, a dictionary of the above statistics, 
        and an array of the time of all measurements. 
    """
    if validators.url(source):
    # load THREDDS catalog
        cat = TDSCatalog(source)
        filelist = sorted(cat.datasets)
    else:
        filelist = glob.glob(source)

    # initialise variables

    lat = []
    lon = []
    time = []
    deploy_time = []

    num_measurements = []
    max_depths = []
    durations = []

    for file in filelist:

        sdn = pd.to_datetime(file[6:14], format="%Y%m%d").to_numpy()
        if (sdn < start_date) or (sdn > end_date):
            continue
        if validators.url(source):
            ds = cat.datasets[file].remote_access(use_xarray=True)
        else:
            ds = xr.open_dataset(file)

        mask = ds["QC_FLAG"] < 4
        ds = ds.where(mask, drop=True)
        ds = ds.where(ds["TIME"] >= start_date, drop=True)
        ds = ds.where(ds["TIME"] <= end_date, drop=True)

        if len(ds.LATITUDE) < 1:
            ds.close()
            continue

        lat.append(float(ds.LATITUDE[0]))
        lon.append(float(ds.LONGITUDE[0]))
        deploy_time.append(ds.TIME[0].values)
        time.extend(ds.TIME.values)

        num_measurements.append(len(ds.TIME.values))
        max_depths.append(np.nanmax(ds.DEPTH.values))
        durations.append(np.nanmax(ds.TIME) - np.nanmin(ds.TIME))

        ds.close()

    moana_df = pd.DataFrame({"lat": lat, "lon": lon, "time": deploy_time}).dropna()
    moana_df['time'] = moana_df['time'].dt.tz_localize('UTC')
    
    stats_moana = {
        "num_measurements": num_measurements,
        "max_depths": max_depths,
        "durations": durations,
    }

    return moana_df, stats_moana, time

In [2]:
url = 'http://131.203.52.77:6443/thredds/catalog/moana/NZB/moana_backbone_v2.2/avg/catalog.html'

In [4]:
import xarray as xr
from siphon.catalog import TDSCatalog

url = 'http://131.203.52.77:6443/thredds/catalog/moana/NZB/moana_backbone_v2.2/his/catalog.html'
cat = TDSCatalog(url)
filelist = sorted(cat.datasets)
ds = cat.datasets[1].remote_access(use_xarray=True)



In [5]:
filelist

['nz5km_avg_199302.nc',
 'nz5km_avg_199303.nc',
 'nz5km_avg_199304.nc',
 'nz5km_avg_199305.nc',
 'nz5km_avg_199306.nc',
 'nz5km_avg_199307.nc',
 'nz5km_avg_199308.nc',
 'nz5km_avg_199309.nc',
 'nz5km_avg_199310.nc',
 'nz5km_avg_199311.nc',
 'nz5km_avg_199312.nc',
 'nz5km_avg_199401.nc',
 'nz5km_avg_199402.nc',
 'nz5km_avg_199403.nc',
 'nz5km_avg_199404.nc',
 'nz5km_avg_199405.nc',
 'nz5km_avg_199406.nc',
 'nz5km_avg_199407.nc',
 'nz5km_avg_199408.nc',
 'nz5km_avg_199409.nc',
 'nz5km_avg_199410.nc',
 'nz5km_avg_199411.nc',
 'nz5km_avg_199412.nc',
 'nz5km_avg_199501.nc',
 'nz5km_avg_199502.nc',
 'nz5km_avg_199503.nc',
 'nz5km_avg_199504.nc',
 'nz5km_avg_199505.nc',
 'nz5km_avg_199506.nc',
 'nz5km_avg_199507.nc',
 'nz5km_avg_199508.nc',
 'nz5km_avg_199509.nc',
 'nz5km_avg_199510.nc',
 'nz5km_avg_199511.nc',
 'nz5km_avg_199512.nc',
 'nz5km_avg_199601.nc',
 'nz5km_avg_199602.nc',
 'nz5km_avg_199603.nc',
 'nz5km_avg_199604.nc',
 'nz5km_avg_199605.nc',
 'nz5km_avg_199606.nc',
 'nz5km_avg_1996

In [11]:
ds = cat.datasets[1].remote_access(use_xarray=True)
ds

  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
