# Unit Validation with Pint

This example demonstrates the three types of unit validation available in xarray-validate:

1. **`value`**: Exact string match (no unit parsing) - use for CF Convention time units or non-physical units
2. **`units`**: Exact unit match (tolerates abbreviations/spellings) - use for specific units
3. **`units_compatible`**: Compatible units (allows conversions) - most flexible

The schema has been streamlined to cover each validation pattern exactly once without repetition.

In [None]:
# Basic imports
import numpy as np
import xarray as xr

from xarray_validate import DatasetSchema, SchemaError

# Load schema from YAML file
schema = DatasetSchema.from_yaml("schema.yaml")

## Create a Dataset Matching the Schema

We'll create a dataset with the exact units specified in the schema.

In [None]:
print("Creating scientific dataset with unit validation...\n")

# Create dimensions
n_time = 10
n_lat = 5
n_lon = 8

# Create a Dataset that matches the schema
ds = xr.Dataset(
    data_vars={
        # Pattern 1: Exact string match
        "temperature": (
            ["time", "lat", "lon"],
            273.15
            + 20.0
            + 5.0 * np.random.randn(n_time, n_lat, n_lon).astype(np.float32),
            {
                "long_name": "Air Temperature",
                "units": "kelvin",  # Must be exactly "kelvin"
            },
        ),
        # Pattern 2: Exact unit match
        "pressure": (
            ["time", "lat", "lon"],
            101325.0
            + 1000.0 * np.random.randn(n_time, n_lat, n_lon).astype(np.float32),
            {
                "long_name": "Atmospheric Pressure",
                "units": "Pa",  # Will accept Pa, pascal, pascals
            },
        ),
        # Pattern 3: Compatible units
        "solar_radiation": (
            ["time", "lat", "lon"],
            800.0 + 100.0 * np.random.randn(n_time, n_lat, n_lon).astype(np.float32),
            {
                "long_name": "Solar Radiation",
                "units": "W/m^2",  # Schema expects watt/meter**2-compatible
            },
        ),
    },
    coords={
        # Exact string match - CF Convention time reference
        "time": (
            "time",
            np.arange(n_time, dtype=np.int64),
            {
                "long_name": "Time",
                "units": "days since 2024-01-01",  # Must be exactly this string
                "calendar": "gregorian",
            },
        ),
        # Exact unit - latitude
        "lat": (
            "lat",
            np.linspace(-40, 40, n_lat, dtype=np.float32),
            {
                "long_name": "Latitude",
                "units": "degrees",  # Schema expects degree (accepts degrees/deg)
            },
        ),
        # Exact unit - longitude
        "lon": (
            "lon",
            np.linspace(-80, 80, n_lon, dtype=np.float32),
            {
                "long_name": "Longitude",
                "units": "deg",  # Schema expects degree (accepts degrees/deg)
            },
        ),
    },
    attrs={
        "title": "Unit Validation Example Dataset",
        "Conventions": "CF-1.8",
    },
)

print("Dataset created successfully!")
print(f"Variables: {list(ds.data_vars)}")
print(f"Coordinates: {list(ds.coords)}\n")

In [None]:
# Validate the Dataset
try:
    schema.validate(ds)
    print("Validation succeeded")
except SchemaError as e:
    print(f"Validation failed: {e}\n")

In [None]:
# Demonstrate compatible unit conversion

# Create a similar dataset but with different compatible units
ds_alternative = ds.copy(deep=True)

# Pattern 1 (value): Cannot change - must be exactly "kelvin"
# No change to make

# Pattern 2 (units): Can use different spelling
ds_alternative["pressure"].attrs["units"] = "pascals"  # Alternative spelling

# Pattern 3 (units_compatible): Can use completely different compatible units
ds_alternative["solar_radiation"].attrs["units"] = "kW/cmÂ²"  # Different units

# Validate with alternative units
try:
    schema.validate(ds_alternative)
    print("Validation succeeded")
except SchemaError as e:
    print(f"Validation failed: {e}\n")