# Finding datasets of interest

In [None]:
import datetime
import pandas as pd
import numpy as np
from erddapy import ERDDAP
from tqdm.notebook import tqdm

We can use ERDDAP's built in search function to find datasets that match a keyword

### Find drifter datasets from OSMC

In [None]:
e = ERDDAP(
    server="https://osmc.noaa.gov/erddap",
    protocol="tabledap"
)

url = e.get_search_url(search_for="drifter", response="csv")

df = pd.read_csv(url)
df[["Dataset ID", "Title"]]

### Find ADCP data on the IOOS sensors ERDDAP

In [None]:
e = ERDDAP(
    server="https://erddap.sensors.ioos.us/erddap",
    protocol="tabledap"
)
url = e.get_search_url(search_for="adcp", response="csv")

df = pd.read_csv(url)
df[["Dataset ID", "Title"]]

### Find argo floats with chlorophyll and CDOM data

In [None]:
e = ERDDAP(
    server="https://erddap.ifremer.fr/erddap",
)

url = e.get_search_url(search_for="chlorophyll CDOM", response="csv")

df = pd.read_csv(url)
df[["Dataset ID", "Title"]]

### Search with allDatasets

For a more sophisticated search, we can use the `allDatasets` dataset from ERDDAP to examine the primary metadata of all datasets. 

### Get metadata for IOOS glider datasets

In [None]:
e = ERDDAP(
    server="https://gliders.ioos.us/erddap",
    protocol="tabledap",
)

# Fetch dataset list
e.dataset_id = "allDatasets"
df_datasets = e.to_pandas()

# drop the allDatasets row and make the datasetID the index for easier reading
df_datasets.set_index("datasetID", inplace=True)
df_datasets.drop("allDatasets", inplace=True)
df_datasets["minTime"] = pd.to_datetime(df_datasets["minTime (UTC)"].str[:-1])
df_datasets["maxTime"] = pd.to_datetime(df_datasets["maxTime (UTC)"].str[:-1])

# Keep a susbset of useful columns
cols = [
    "institution",
    "minLongitude (degrees_east)",
    "maxLongitude (degrees_east)",
    "minLatitude (degrees_north)",
    "maxLatitude (degrees_north)",
    "minAltitude (m)",
    "maxAltitude (m)",
    "minTime",
    "maxTime",
]
df_datasets = df_datasets[cols]

print(f"Found {len(df_datasets)} datasets")

In [None]:
df_datasets

### Which gliders have uploaded data in the last 6 hours?

In [None]:
df_datasets[ np.datetime64('now') - df_datasets.maxTime < datetime.timedelta(hours=6)]

### Find datasets from deployments that lasted longer than 90 days

In [None]:
df_datasets["endurance"] = df_datasets['maxTime'] - df_datasets['minTime']

min_days = 90
df_datasets[df_datasets["endurance"] > datetime.timedelta(days=min_days)].endurance

### Which glider missions extended to a depth of greater than 150 m in seas north of 70 N?

In [None]:
min_altitude = -150 # note the sign!
min_northing = 70

mask = (df_datasets["maxAltitude (m)"] < min_altitude).values \
* (df_datasets["maxLatitude (degrees_north)"] > min_northing).values


df_datasets[mask]

In [None]:
e = ERDDAP(
    server="https://gliders.ioos.us/erddap",
    protocol="tabledap",
)
e.constraints = {"time>=": "2018-09-20"}

e.dataset_id = "unit_595-20180804T0000-delayed"
ds = e.to_xarray()

In [None]:
ds["u"].plot()

---------------------------
### References

IOOS ERDDAP https://gliders.ioos.us/erddap

More info on using ERDDAP's inbuilt search https://ioos.github.io/erddapy/01b-tabledap-output.html