Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Minimally invasive workflow for xarray transition #275

Draft
wants to merge 11 commits into
base: RC_v1.5.x
Choose a base branch
from
Draft
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
tobac - Tracking and Object-based Analysis of Clouds
======
[![Documentation Status](https://readthedocs.org/projects/tobac/badge/?version=latest)](https://tobac.readthedocs.io/en/latest/?badge=latest)[![Download Counter](https://anaconda.org/conda-forge/tobac/badges/downloads.svg)](https://anaconda.org/conda-forge/tobac/)
[![Release Version](https://img.shields.io/conda/vn/conda-forge/tobac.svg)](https://anaconda.org/conda-forge/tobac)[![Download Counter](https://img.shields.io/conda/dn/conda-forge/tobac.svg)](https://anaconda.org/conda-forge/tobac)[![Documentation Status](https://readthedocs.org/projects/tobac/badge/?version=latest)](https://tobac.readthedocs.io/en/latest/?badge=latest)

What is it?
-----------

*tobac* is a Python package for identifiying, tracking and analysing of clouds and other meteorological phenomena in different types of gridded datasets. *tobac* is unique in its ability to track phenomena using **any** variable on **any** grid, including radar data, satellite observations, and numerical model output. *tobac* has been used in a variety of peer-reviewed [publications](https://tobac.readthedocs.io/en/rc_v1.4.0/publications.html) and is an international, multi-institutional collaboration.
*tobac* is a Python package for identifiying, tracking and analysing of clouds and other meteorological phenomena in different types of gridded datasets. *tobac* is unique in its ability to track phenomena using **any** variable on **any** grid, including radar data, satellite observations, and numerical model output. *tobac* has been used in a variety of peer-reviewed [publications](https://tobac.readthedocs.io/en/latest/publications.html) and is an international, multi-institutional collaboration.

Documentation
-------------
Expand Down
17 changes: 14 additions & 3 deletions tobac/feature_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,14 @@
import pandas as pd
from .utils import internal as internal_utils
from tobac.utils.general import spectral_filtering
from tobac.utils.internal import (
xarray_to_iris,
iris_to_xarray,
xarray_to_irispandas,
irispandas_to_xarray,
)


import warnings


Expand Down Expand Up @@ -266,7 +274,7 @@ def feature_detection_threshold(

Parameters
----------
data_i : iris.cube.Cube
data_i : numpy array
2D field to perform the feature detection (single timestep) on.

i_time : int
Expand Down Expand Up @@ -523,6 +531,7 @@ def feature_detection_threshold(
return features_threshold, regions


@irispandas_to_xarray
def feature_detection_multithreshold_timestep(
data_i,
i_time,
Expand All @@ -548,7 +557,7 @@ def feature_detection_multithreshold_timestep(
Parameters
----------

data_i : iris.cube.Cube
data_i : xarray.Dataset (or for legacy: iris.cube.Cube)
2D field to perform the feature detection (single timestep) on.

threshold : float, optional
Expand Down Expand Up @@ -611,7 +620,8 @@ def feature_detection_multithreshold_timestep(
)

# get actual numpy array
track_data = data_i.core_data()
# track_data = data_i.core_data()
track_data = data_i.data

track_data = gaussian_filter(
track_data, sigma=sigma_threshold
Expand Down Expand Up @@ -713,6 +723,7 @@ def feature_detection_multithreshold_timestep(
return features_thresholds


@xarray_to_iris
def feature_detection_multithreshold(
field_in,
dxy=None,
Expand Down
74 changes: 50 additions & 24 deletions tobac/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,45 +516,71 @@ def make_dataset_from_arr(
"""

import xarray as xr
from iris.cube import Cube
import iris

has_time = time_dim_num is not None

# dimension handling
has_time = time_dim_num is not None
is_3D = z_dim_num is not None
output_arr = xr.DataArray(in_arr)

dims = []
for idim in range(in_arr.ndim):
dims += [f"dim{idim}"]

t_dim_name = "time"

if has_time:
dims[time_dim_num] = t_dim_name

if is_3D:
dims[z_dim_num] = z_dim_name

# coordinates handling
if is_3D:
z_max = in_arr.shape[z_dim_num]
z_coordinate = np.arange(0, z_max)
z_attrs = dict(standard_name=z_dim_name)

if has_time:
time_min = datetime.datetime(2022, 1, 1)
time_num = in_arr.shape[time_dim_num]
time_coordinate = (
pd.date_range(start=time_min, periods=time_num)
.values.astype("datetime64[s]")
.astype(int)
)
time_attrs = dict(
standard_name=t_dim_name,
units="seconds since epoch",
)

# setup data structures
sample_data = Cube( in_arr, )

# out_arr_iris = xr.DataArray(data=in_arr).to_iris()

if is_3D:
sample_data.add_dim_coord(
iris.coords.DimCoord(z_coordinate, **z_attrs),
z_dim_num,
)
if has_time:
sample_data.add_dim_coord(
iris.coords.DimCoord(time_coordinate, **time_attrs),
time_dim_num,
)

if data_type == "xarray":
return output_arr
elif data_type == "iris":
out_arr_iris = output_arr.to_iris()

if is_3D:
out_arr_iris.add_dim_coord(
iris.coords.DimCoord(np.arange(0, z_max), standard_name=z_dim_name),
z_dim_num,
)
if has_time:
out_arr_iris.add_dim_coord(
iris.coords.DimCoord(
pd.date_range(start=time_min, periods=time_num)
.values.astype("datetime64[s]")
.astype(int),
standard_name="time",
units="seconds since epoch",
),
time_dim_num,
)
return out_arr_iris
else:
sample_data = DataArray.from_iris(sample_data)

elif not data_type == "iris":
raise ValueError("data_type must be 'xarray' or 'iris'")


return sample_data


def make_feature_blob(
in_arr,
h1_loc,
Expand Down
51 changes: 33 additions & 18 deletions tobac/tests/test_feature_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,21 @@


@pytest.mark.parametrize(
"test_threshs, n_min_threshold, dxy, wavelength_filtering",
"test_threshs, n_min_threshold, dxy, wavelength_filtering, data_type",
[
([1.5], 2, -1, None),
([1, 1.5, 2], 2, 10000, (100 * 1000, 500 * 1000)),
([1, 2, 1.5], [3, 1, 2], -1, None),
([1, 1.5, 2], {1.5: 2, 1: 3, 2: 1}, -1, None),
([1.5], 2, -1, None, "iris"),
([1.5], 2, -1, None, "xarray"),
([1, 1.5, 2], 2, 10000, (100 * 1000, 500 * 1000), "iris"),
([1, 2, 1.5], [3, 1, 2], -1, None, "iris"),
([1, 1.5, 2], {1.5: 2, 1: 3, 2: 1}, -1, None, "iris"),
],
)
def test_feature_detection_multithreshold_timestep(
test_threshs, n_min_threshold, dxy, wavelength_filtering
test_threshs,
n_min_threshold,
dxy,
wavelength_filtering,
data_type,
):
"""
Tests ```tobac.feature_detection.feature_detection_multithreshold_timestep```
Expand All @@ -40,7 +45,7 @@ def test_feature_detection_multithreshold_timestep(
h2_size=test_hdim_2_sz,
amplitude=test_amp,
)
test_data_iris = tbtest.make_dataset_from_arr(test_data, data_type="iris")
test_data_iris = tbtest.make_dataset_from_arr(test_data, data_type)
fd_output = feat_detect.feature_detection_multithreshold_timestep(
test_data_iris,
0,
Expand Down Expand Up @@ -370,17 +375,26 @@ def test_filter_min_distance(

@pytest.mark.parametrize(
"test_dset_size, vertical_axis_num, "
"vertical_coord_name,"
" vertical_coord_opt, expected_raise",
"vertical_coord_name, "
"vertical_coord_opt, expected_raise, "
"data_type",
[
((1, 20, 30, 40), 1, "altitude", "auto", False),
((1, 20, 30, 40), 2, "altitude", "auto", False),
((1, 20, 30, 40), 3, "altitude", "auto", False),
((1, 20, 30, 40), 1, "air_pressure", "air_pressure", False),
((1, 20, 30, 40), 1, "air_pressure", "auto", True),
((1, 20, 30, 40), 1, "model_level_number", "auto", False),
((1, 20, 30, 40), 1, "altitude", "auto", False),
((1, 20, 30, 40), 1, "geopotential_height", "auto", False),
((1, 20, 30, 40), 1, "altitude", "auto", False, "iris"),
((1, 20, 30, 40), 2, "altitude", "auto", False, "iris"),
((1, 20, 30, 40), 3, "altitude", "auto", False, "iris"),
((1, 20, 30, 40), 1, "air_pressure", "air_pressure", False, "iris"),
((1, 20, 30, 40), 1, "air_pressure", "auto", True, "iris"),
((1, 20, 30, 40), 1, "model_level_number", "auto", False, "iris"),
((1, 20, 30, 40), 1, "altitude", "auto", False, "iris"),
((1, 20, 30, 40), 1, "geopotential_height", "auto", False, "iris"),
((1, 20, 30, 40), 1, "altitude", "auto", False, "xarray"),
((1, 20, 30, 40), 2, "altitude", "auto", False, "xarray"),
((1, 20, 30, 40), 3, "altitude", "auto", False, "xarray"),
((1, 20, 30, 40), 1, "air_pressure", "air_pressure", False, "xarray"),
((1, 20, 30, 40), 1, "air_pressure", "auto", True, "xarray"),
((1, 20, 30, 40), 1, "model_level_number", "auto", False, "xarray"),
((1, 20, 30, 40), 1, "altitude", "auto", False, "xarray"),
((1, 20, 30, 40), 1, "geopotential_height", "auto", False, "xarray"),
],
)
def test_feature_detection_multiple_z_coords(
Expand All @@ -389,6 +403,7 @@ def test_feature_detection_multiple_z_coords(
vertical_coord_name,
vertical_coord_opt,
expected_raise,
data_type,
):
"""Tests ```tobac.feature_detection.feature_detection_multithreshold```
with different axes
Expand Down Expand Up @@ -417,7 +432,7 @@ def test_feature_detection_multiple_z_coords(
test_data[0, 0:5, 0:5, 0:5] = 3
common_dset_opts = {
"in_arr": test_data,
"data_type": "iris",
"data_type": data_type,
"z_dim_name": vertical_coord_name,
}
if vertical_axis_num == 1:
Expand Down