In [None]:
import sys

sys.path.append("..")  # we'll use this only until we package gdutils

Example using GdacClient to search for datasets by time and geospatial bounds and save the output to csv.

Search parameters (This is ugly but I'll prettify it later)

In [None]:
import ipydatetime
from datetime import datetime
from ipywidgets import VBox, HBox, Label, BoundedFloatText


dt0 = ipydatetime.NaiveDatetimePicker(value=datetime(2021, 4, 1, 0, 0, 0))
dt1 = ipydatetime.NaiveDatetimePicker(value=datetime(2021, 6, 30, 23, 59, 59))

south = BoundedFloatText(
    value=-90,
    min=-90,
    max=90,
    description='min lat:',
)

north = BoundedFloatText(
    value=90,
    min=-90,
    max=90,
    description='max lat:',
)


west = BoundedFloatText(
    value=-180,
    min=-180,
    max=180,
    description='min lon:',
)

east = BoundedFloatText(
    value=180,
    min=-180,
    max=180,
    description='max lon:',
)



VBox([dt0, dt1])

In [None]:
VBox(
    [
        north,
        HBox([east, west]),
        south,
    ],
)

In [None]:
dt0 = dt0.value
dt1 = dt1.value
south = south.value
north = north.value
west = west.value
east = east.value
        
params = {
    "min_time": dt0,
    "max_time": dt1,
    "min_lat": south,
    "max_lat": north,
    "min_lon": west,
    "max_lon": east,
}

In [None]:
from gdutils import GdacClient


client = GdacClient()

client.search_datasets(params=params)
client.datasets

In [None]:
# Count the total number of deployments within the dt0:dt1 time window
num_deployments = client.datasets.shape[0]

# Count the number of glider days withing the dt0:dt1 time window
glider_days = client.glider_days_per_yyyymmdd.loc[dt0:dt1].sum()

# count the number of profiles per dataset
profile_count = client.profiles_per_yyyymmdd.loc[dt0:dt1].sum()

datasets = client.datasets.copy()

Loop through the datasets, fetch the info url and pull out the desired attributes

In [None]:
import warnings


sea_names = []
funding_sources = []
for dataset_id, row in datasets.iterrows():

    # Fetch the dataset description from ERDDAP
    info = client.get_dataset_metadata(dataset_id)

    if info.empty:
        continue

    # Find all global NetCDF attributes
    nc_globals = info.loc[info["Variable Name"] == "NC_GLOBAL"]

    # Find the sea_name global attribute
    sea_name_attr = nc_globals.loc[nc_globals["Attribute Name"] == "sea_name"]
    sea_name = "unknown"
    if not sea_name_attr.empty:
        sea_name = sea_name_attr.Value.iloc[0]
        sea_name = sea_name or "unknown"
    else:
        warnings.warn(f"{dataset_id}: sea_name NC_GLOBAL not found")

    # Find all global attributes that begin with "acknowledg" as this attribute typically contains the funding sources
    funding_attr = nc_globals.loc[nc_globals["Attribute Name"].str.startswith("acknowledg")]
    funding = "unknown"
    if not funding_attr.empty:
        funding = funding_attr.Value.iloc[0]
        funding = funding or "unknown"
    else:
        warnings.warn(f"{dataset_id}: acknowledgment NC_GLOBAL not found")

    sea_names.append(sea_name)
    funding_sources.append(funding)

In [None]:
# Add the 2 columns
datasets["deployment_area"] = sea_names
datasets["funding"] = funding_sources

# specify the columns we want in the output when dumping to csv
cols = [
    "glider",
    "wmo_id",
    "start_date",
    "end_date",
    "num_profiles",
    "days",
    "institution",
    "deployment_area",
    "funding"
]

datasets.to_csv("2021_Q2_datasets.csv", columns=cols)

In [None]:
datasets