In [1]:
from __future__ import annotations

%load_ext jupyter_black

In [3]:
from mesoscaler import ERA5, URMA, Coordinates, Dimensions, IndependentVariables, DependentVariables

This application uses Enums to define Independent (Coordinates and Dimensions) and Dependent Variables (Variables).
In order to process multiple datasets a standard Coordinate and Dimension naming convention is required. Much of the
scheme was taken from the Climate and Forecast Metadata Conventions (CF).

# Coordinates and Dimensions (Independent Variables)
The enums are a subset of a string. So their actual `.value` is a true `LiteralString` and the `EnumMember` is subset
of that. The `Enum.__call__` can either be called with the member.name, member.value, the member it'self or any of the aliases.


In [4]:
print(
    Coordinates,
    Dimensions,
    f"""\
{Coordinates.vertical.name = }
{Coordinates.vertical.value = }
{Coordinates.vertical.axis = }
{Coordinates.vertical.aliases = }
{Coordinates(Coordinates.vertical.aliases) = }
{Coordinates('height') = }
{Coordinates('level') = }
""",
    sep="\n\n",
)
assert Coordinates("level") == Coordinates.vertical == Coordinates("height") == "vertical"
assert Coordinates.vertical.axis == (Dimensions.Z,)
assert all(issubclass(x, IndependentVariables) for x in (Coordinates, Dimensions))

Coordinates:
-      time: time
-  vertical: vertical
-  latitude: latitude
- longitude: longitude

Dimensions:
- T: T
- Z: Z
- Y: Y
- X: X

Coordinates.vertical.name = 'vertical'
Coordinates.vertical.value = 'vertical'
Coordinates.vertical.axis = (Z,)
Coordinates.vertical.aliases = ['level', 'height']
Coordinates(Coordinates.vertical.aliases) = [vertical]
Coordinates('height') = vertical
Coordinates('level') = vertical



# ERA5 and URMA (Dependent Variables)

The 2 datasets used in used initially are the era5 and urma, there are some scripts do download and process the data
into `.zarr` files. 















In [5]:
print(ERA5, URMA, sep="\n")
assert ERA5("Z") is ERA5("z") is ERA5.Z is ERA5("geopotential") and ERA5.Z == "geopotential"
assert ERA5("z") is ERA5.Z
assert ERA5("z") == ERA5.Z
assert (
    ERA5("u") is ERA5["U"] is ERA5.loc["U"] is ERA5.U is ERA5("u_component_of_wind")
    and ERA5.U == "u_component_of_wind"
)

assert set(ERA5).difference(ERA5(["u", "v"])) == ERA5.difference(["u", "v"]) == {ERA5.Q, ERA5.T, ERA5.W, ERA5.Z}
assert set(ERA5).intersection(ERA5(["u", "v"])) == ERA5.intersection(["u", "v"]) == {ERA5.U, ERA5.V}
assert all(issubclass(x, DependentVariables) for x in (ERA5, ERA5))

ERA5:
- Z: geopotential
- Q: specific_humidity
- T: temperature
- U: u_component_of_wind
- V: v_component_of_wind
- W: vertical_velocity
URMA:
-    TCC: total_cloud_cover
-   CEIL: ceiling
-    U10: u_wind_component_10m
-    V10: v_wind_component_10m
-   SI10: wind_speed_10m
-   GUST: wind_speed_gust
- WDIR10: wind_direction_10m
-    T2M: temperature_2m
-    D2M: dewpoint_temperature_2m
-    SH2: specific_humidity_2m
-     SP: surface_pressure
-    VIS: visibility
-   OROG: orography


In [None]:
(u, v), z = ERA5.loc[["U", "V"]], ERA5.loc["Z"]

u, v, z

In [None]:
ERA5.intersection(["u", "v"])

In [None]:
# the crs is loaded lazily
assert "crs" not in ERA5.metadata
print(repr(ERA5.crs))
assert "crs" in ERA5.metadata

In [None]:
print(
    ERA5.metadata,
    ERA5.Z.metadata,
    ERA5.__metadata__,
    sep="\n",
)

In [None]:
print(
    ERA5.difference(list("tuv")),
    ERA5.difference(ERA5(list("tuv"))),
    ERA5(list("tuv")),
    sep="\n",
)

In [None]:
assert Coordinates.intersection(["vertical", "time", "latitude", "longitude"]) == set(Coordinates)
assert Coordinates.difference(list(Coordinates)) == set() == set(Coordinates).difference(iter(Coordinates))
assert Dimensions.intersection(["time", "latitude", "longitude"]) == {Dimensions.T, Dimensions.X, Dimensions.Y}

In [14]:
import mesoscaler as ms
import numpy as np

np.arange(np.timedelta64(1, "h"), np.timedelta64(5, "h"))
try:
    np.arange("2022-01-01", "2022-01-06", 6)
except TypeError as e:
    print('not supported: np.arange("2022-01-01", "2022-01-06", 6)')

time_array = ms.hours.arange("2022-01-01", "2022-01-06", 6)
time_array

not supported: np.arange("2022-01-01", "2022-01-06", 6)


array(['2022-01-01T00', '2022-01-01T06', '2022-01-01T12', '2022-01-01T18',
       '2022-01-02T00', '2022-01-02T06', '2022-01-02T12', '2022-01-02T18',
       '2022-01-03T00', '2022-01-03T06', '2022-01-03T12', '2022-01-03T18',
       '2022-01-04T00', '2022-01-04T06', '2022-01-04T12', '2022-01-04T18',
       '2022-01-05T00', '2022-01-05T06', '2022-01-05T12', '2022-01-05T18'],
      dtype='datetime64[h]')