In [1]:
# Setup directories
from pathlib import Path

basedir = Path().absolute()
libdir = basedir.parent.parent.parent

from datetime import datetime

import numpy as np

# Other imports
import pandas as pd
from bokeh import plotting
from bokeh.io import output_notebook
from ioos_qc.plotting import bokeh_plot_collected_results

In [2]:
# Install QC library
#!pip install git+git://github.com/ioos/ioos_qc.git

# # Alternative installation (install specific branch):
!pip uninstall -y ioos_qc
!pip install git+git://github.com/ioos/ioos_qc.git@new_configs

# # Alternative installation (run with local updates):
!pip uninstall -y ioos_qc
# import sys
# sys.path.append(str(libdir))

## Configuration

In [3]:
erddap_server = "https://ferret.pmel.noaa.gov/pmel/erddap"
dataset_id = "sd1055"

## Get data from ERDDAP as an xarray object

In [4]:
from erddapy import ERDDAP

e = ERDDAP(
    server=erddap_server,
    protocol="tabledap",
)
e.response = "csv"
e.dataset_id = dataset_id

In [5]:
ds = e.to_xarray()
ds

## Generate a QC configuration for each variable

In [6]:
# Dataset level metadata to drive climatology extraction
min_t = str(ds.time.min().dt.floor("D").dt.strftime("%Y-%m-%d").data)
max_t = str(ds.time.max().dt.ceil("D").dt.strftime("%Y-%m-%d").data)
min_x = float(ds.longitude.min().data)
min_y = float(ds.latitude.min().data)
max_x = float(ds.longitude.max().data)
max_y = float(ds.latitude.max().data)
bbox = [min_x, min_y, max_x, max_y]

In [7]:
time

CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 3.81 µs


In [8]:
# Configure how each variable's config will be generated
default_config = {
    "bbox": bbox,
    "start_time": min_t,
    "end_time": max_t,
    "tests": {
        "spike_test": {"suspect_threshold": "1", "fail_threshold": "2"},
        "gross_range_test": {
            "suspect_min": "min - std * 2",
            "suspect_max": "max + std / 2",
            "fail_min": "mean / std",
            "fail_max": "mean * std",
        },
    },
}

# For any variable name or standard_name you can define a custom config
custom_config = {
    "air_temperature": {"variable": "air"},
    "air_pressure": {"variable": "pres"},
    "relative_humidity": {"variable": "rhum"},
    "sea_water_temperature": {"variable": "temperature"},
    "sea_water_practical_salinity": {"variable": "salinity"},
    "eastward_wind": {"variable": "uwnd"},
    "northward_wind": {"variable": "vwnd"},
}

In [9]:
# Generate climatology configs
from ioos_qc.config_creator import (
    QC_CONFIG_CREATOR_SCHEMA,
    CreatorConfig,
    QcConfigCreator,
    QcVariableConfig,
)

creator_config = {
    "datasets": [
        {
            "name": "ocean_atlas",
            "file_path": "../../../resources/ocean_atlas.nc",
            "variables": {"o2": "o_an", "salinity": "s_an", "temperature": "t_an"},
            "3d": "depth",
        },
        {
            "name": "narr",
            "file_path": "../../../resources/narr.nc",
            "variables": {
                "air": "air",
                "pres": "slp",
                "rhum": "rhum",
                "uwnd": "uwnd",
                "vwnd": "vwnd",
            },
        },
    ]
}
cc = CreatorConfig(creator_config)
qccc = QcConfigCreator(cc)

In [10]:
# Break down variable by standard name
def not_stddev(v):
    return v and not v.endswith(" SD")


# air_temp_vars = ds.filter_by_attrs(long_name=not_stddev, standard_name='air_temperature')
# pressure_vars = ds.filter_by_attrs(long_name=not_stddev, standard_name='air_pressure')
# humidity_vars = ds.filter_by_attrs(long_name=not_stddev, standard_name='relative_humidity')
# water_temp_vars = ds.filter_by_attrs(long_name=not_stddev, standard_name='sea_water_temperature')
# salinity_vars = ds.filter_by_attrs(long_name=not_stddev, standard_name='sea_water_practical_salinity')
# uwind_vars = ds.filter_by_attrs(long_name=not_stddev, standard_name='eastward_wind')
# vwind_vars = ds.filter_by_attrs(long_name=not_stddev, standard_name='northward_wind')
# all_vars = [air_temp_vars, pressure_vars, humidity_vars, water_temp_vars, salinity_vars, uwind_vars, vwind_vars]
# all_vars

air_temp = ["air_temperature"]
pressure = ["air_pressure"]
humidity = ["relative_humidity"]
water_temp = ["sea_water_temperature"]
salt = ["sea_water_practical_salinity"]
u = ["eastward_wind"]
v = ["northward_wind"]

run_tests = air_temp + pressure + humidity + water_temp + salt + u + v
final_config = {}

for v in ds:
    da = ds[v]

    # Don't run tests for unknown variables
    if "standard_name" not in da.attrs or da.attrs["standard_name"] not in run_tests:
        continue

    # The standard names are identical for the mean and the stddev
    # so ignore the stddev version of the variable
    if v.endswith("_STDDEV"):
        continue

    config = default_config.copy()

    min_t = str(da.time.min().dt.floor("D").dt.strftime("%Y-%m-%d").data)
    max_t = str(da.time.max().dt.ceil("D").dt.strftime("%Y-%m-%d").data)
    min_x = float(da.longitude.min().data)
    min_y = float(da.latitude.min().data)
    max_x = float(da.longitude.max().data)
    max_y = float(da.latitude.max().data)
    bbox = [min_x, min_y, max_x, max_y]

    config["bbox"] = bbox
    config["start_time"] = min_t
    config["end_time"] = max_t

    # Allow custom overrides on a variable name basis
    if v in custom_config:
        config.update(custom_config[v])

    # Allow custom overrides on a standard_name name basis
    if da.attrs["standard_name"] in custom_config:
        config.update(custom_config[da.attrs["standard_name"]])

    # Generate the ioos_qc Config object
    qc_var = QcVariableConfig(config)
    qc_config = qccc.create_config(qc_var)

    # Strip off the variable that create_config added
    qc_config = list(qc_config.values())[0]

    # Add it to the final config
    final_config[v] = qc_config

In [11]:
final_config

{'UWND_MEAN': {'qartod': {'spike_test': {'suspect_threshold': 1.0,
    'fail_threshold': 2.0},
   'gross_range_test': {'suspect_span': [-5.5300699248268215,
     0.25323436074757943],
    'fail_span': [-1.5473811397940986, -1.168704748875281]}}},
 'VWND_MEAN': {'qartod': {'spike_test': {'suspect_threshold': 1.0,
    'fail_threshold': 2.0},
   'gross_range_test': {'suspect_span': [-4.139668258934844,
     0.9204235357318391],
    'fail_span': [-0.09592285696581983, -0.05400437780570839]}}},
 'TEMP_AIR_MEAN': {'qartod': {'spike_test': {'suspect_threshold': 1.0,
    'fail_threshold': 2.0},
   'gross_range_test': {'suspect_span': [-13.512635229998683,
     7.08925221886197],
    'fail_span': [0.2729885537903321, 2.9035131901931153]}}},
 'RH_MEAN': {'qartod': {'spike_test': {'suspect_threshold': 1.0,
    'fail_threshold': 2.0},
   'gross_range_test': {'suspect_span': [70.42998901322298, 91.15288960950033],
    'fail_span': [27.659869272320268, 262.69408211067594]}}},
 'BARO_PRES_MEAN': {'qa

In [12]:
from ioos_qc.config import Config
from ioos_qc.results import collect_results
from ioos_qc.stores import NetcdfStore
from ioos_qc.streams import XarrayStream

c = Config(final_config)
xs = XarrayStream(ds, time="time", lat="latitude", lon="longitude")
qc_results = xs.run(c)
list_results = collect_results(qc_results, how="list")
list_results

Could not run "qartod.gross_range_test: Suspect Span(minv=-5.5300699248268215, maxv=0.25323436074757943) must fall within the Fail Span(minv=-1.5473811397940986, maxv=-1.168704748875281)
Could not run "qartod.gross_range_test: Suspect Span(minv=-4.139668258934844, maxv=0.9204235357318391) must fall within the Fail Span(minv=-0.09592285696581983, maxv=-0.05400437780570839)
Could not run "qartod.gross_range_test: Suspect Span(minv=-13.512635229998683, maxv=7.08925221886197) must fall within the Fail Span(minv=0.2729885537903321, maxv=2.9035131901931153)
Could not run "qartod.gross_range_test: Suspect Span(minv=-3.240692452818513, maxv=9.711827661747101) must fall within the Fail Span(minv=2.232390583284701, maxv=6.764526997704617)
Could not run "qartod.gross_range_test: Suspect Span(minv=-3.240692452818513, maxv=9.711827661747101) must fall within the Fail Span(minv=2.232390583284701, maxv=6.764526997704617)


[<CollectedResult stream_id=UWND_MEAN package=qartod test=spike_test>,
 <CollectedResult stream_id=VWND_MEAN package=qartod test=spike_test>,
 <CollectedResult stream_id=TEMP_AIR_MEAN package=qartod test=spike_test>,
 <CollectedResult stream_id=RH_MEAN package=qartod test=spike_test>,
 <CollectedResult stream_id=RH_MEAN package=qartod test=gross_range_test>,
 <CollectedResult stream_id=BARO_PRES_MEAN package=qartod test=spike_test>,
 <CollectedResult stream_id=BARO_PRES_MEAN package=qartod test=gross_range_test>,
 <CollectedResult stream_id=TEMP_SBE37_MEAN package=qartod test=spike_test>,
 <CollectedResult stream_id=SAL_SBE37_MEAN package=qartod test=spike_test>,
 <CollectedResult stream_id=SAL_SBE37_MEAN package=qartod test=gross_range_test>,
 <CollectedResult stream_id=TEMP_CTD_RBR_MEAN package=qartod test=spike_test>]

In [13]:
# output_notebook()
# plot = bokeh_plot_collected_results(list_results)
# plotting.show(plot)