Skip to content

Commit

Permalink
Merge pull request #321 from roocs/docstring-linting-cleanup
Browse files Browse the repository at this point in the history
Docstring linting cleanup
  • Loading branch information
Zeitsperre committed Feb 9, 2024
2 parents 640dbc3 + c262b52 commit f46b44f
Show file tree
Hide file tree
Showing 13 changed files with 202 additions and 159 deletions.
9 changes: 7 additions & 2 deletions .pre-commit-config.yaml
Expand Up @@ -39,7 +39,7 @@ repos:
# - id: pydocstyle
# args: ["--convention=numpy"]
- repo: https://github.com/kynan/nbstripout
rev: 0.6.1
rev: 0.7.1
hooks:
- id: nbstripout
files: ".ipynb"
Expand All @@ -48,8 +48,13 @@ repos:
hooks:
- id: blackdoc
additional_dependencies: [ 'black==24.1.1' ]
- repo: https://github.com/codespell-project/codespell
rev: v2.2.6
hooks:
- id: codespell
additional_dependencies: [ 'tomli' ]
- repo: https://github.com/python-jsonschema/check-jsonschema
rev: 0.27.3
rev: 0.28.0
hooks:
- id: check-github-workflows
- id: check-readthedocs
Expand Down
4 changes: 4 additions & 0 deletions HISTORY.rst
Expand Up @@ -11,10 +11,14 @@ Bug Fixes
* Raising KeyError for temporal subsetting by components when no time steps match the selection criteria (#316).
* Coordinate detection for remapping operator via standard_name if detection via cf-xarray fails / is ambiguous (#316).
* Remove encoding settings with regards to compression for string variables to avoid netCDF write errors with newer netcdf-c versions (>4.9.0) (#319).
* Fixed a few docstrings, specifies some class methods as static methods (#321).
* Renamed a few internal variables for clarity, rephrased a few sentences for grammar/spelling (#321).

Other Changes
^^^^^^^^^^^^^
* The compression level is capped at 1 to reduce write times (#319).
* Updated pre-commit hooks, pinned linting tools to their pre-commit equivalents (#321).
* Added a pre-commit hook as well as a configuration for `codespell` (#321).

v0.12.2 (2024-01-03)
--------------------
Expand Down
2 changes: 1 addition & 1 deletion clisops/core/regrid.py
Expand Up @@ -373,7 +373,7 @@ def _grid_from_instructor(self, grid_instructor: tuple | float | int):
@require_xesmf
def _grid_from_ds_adaptive(self, ds: xr.Dataset | xr.DataArray):
"""Create Grid of similar extent and resolution of input dataset."""
# TODO: dachar/daops to deal with missing values occuring in the coordinate variables
# TODO: dachar/daops to deal with missing values occurring in the coordinate variables
# while no _FillValue/missing_value attribute is set
# -> FillValues else might get selected as minimum/maximum lat/lon value
# since they are not masked
Expand Down
5 changes: 2 additions & 3 deletions clisops/ops/average.py
Expand Up @@ -51,7 +51,7 @@ def average_over_dims(
split_method: str = "time:auto",
file_namer: str = "standard",
) -> List[Union[xr.Dataset, str]]:
"""
"""Calculate an average over given dimensions.
Parameters
----------
Expand Down Expand Up @@ -126,7 +126,7 @@ def average_shape(
split_method: str = "time:auto",
file_namer: str = "standard",
) -> List[Union[xr.Dataset, str]]:
"""
"""Calculate a spatial average over a given shape.
Parameters
----------
Expand Down Expand Up @@ -158,7 +158,6 @@ def average_shape(
| output_type: "netcdf"
| split_method: "time:auto"
| file_namer: "standard"
"""
op = AverageShape(**locals())
return op.process()
Expand Down
84 changes: 51 additions & 33 deletions clisops/ops/base_operation.py
@@ -1,6 +1,6 @@
from collections import ChainMap
from pathlib import Path
from typing import List, Union
from typing import List, Optional, Union

import xarray as xr
from loguru import logger
Expand All @@ -17,17 +17,16 @@ class Operation:
def __init__(
self,
ds,
file_namer="standard",
split_method="time:auto",
output_dir=None,
output_type="netcdf",
file_namer: str = "standard",
split_method: str = "time:auto",
output_dir: Optional[Union[str, Path]] = None,
output_type: str = "netcdf",
**params,
):
"""
Constructor for each operation.
"""Constructor for each operation.
Sets common input parameters as attributes.
Parameters that are specific to each operation are handled in:
self._resolve_params()
Parameters that are specific to each operation are handled in `self._resolve_params()`
"""
self._file_namer = file_namer
self._split_method = split_method
Expand All @@ -47,7 +46,7 @@ def _resolve_dsets(self, ds):

self.ds = ds

def _resolve_params(self, **params):
def _resolve_params(self, **params) -> None:
"""
Resolve the operation-specific input parameters to `self.params`.
"""
Expand All @@ -62,7 +61,7 @@ def _get_file_namer(self):

def _calculate(self):
"""The `_calculate()` method is implemented within each operation subclass."""
raise NotImplementedError
raise NotImplementedError()

def _remove_str_compression(self, ds):
"""
Expand Down Expand Up @@ -117,19 +116,26 @@ def _cap_deflate_level(self, ds):

return ds

def _remove_redundant_fill_values(self, ds):
"""
Get coordinate and data variables and remove fill values added by xarray
(CF conventions say that coordinate variables cannot have missing values).
@staticmethod
def _remove_redundant_fill_values(ds):
"""Get coordinate and data variables and remove fill values added by xarray.
CF-conventions say that coordinate variables cannot have missing values.
See issue: https://github.com/roocs/clisops/issues/224
See Also
--------
https://github.com/roocs/clisops/issues/224
"""
if isinstance(ds, xr.Dataset):
varlist = list(ds.coords) + list(ds.data_vars)
var_list = list(ds.coords) + list(ds.data_vars)
elif isinstance(ds, xr.DataArray):
varlist = list(ds.coords)
var_list = list(ds.coords)
else:
raise ValueError(
f"Expected xarray.Dataset or xarray.DataArray, got {type(ds)}"
)

for var in varlist:
for var in var_list:
fval = ChainMap(ds[var].attrs, ds[var].encoding).get("_FillValue", None)
mval = ChainMap(ds[var].attrs, ds[var].encoding).get("missing_value", None)
if not fval and not mval:
Expand All @@ -154,30 +160,42 @@ def _remove_redundant_fill_values(self, ds):
)
return ds

def _remove_redundant_coordinates_attr(self, ds):
"""
This method removes the coordinates attribute added by xarray, example:
@staticmethod
def _remove_redundant_coordinates_attr(ds):
"""This method removes the coordinates attribute added by xarray.
double time_bnds(time, bnds) ;
time_bnds:coordinates = "height" ;
Example
-------
.. code-block:: cpp
double time_bnds(time, bnds);
time_bnds:coordinates = "height";
Programs like cdo will complain about this:
Programs like `cdo` will complain about this:
.. code-block:: shell
Warning (cdf_set_var): Inconsistent variable definition for time_bnds!
See issue: https://github.com/roocs/clisops/issues/224
See Also
--------
https://github.com/roocs/clisops/issues/224
"""
if isinstance(ds, xr.Dataset):
varlist = list(ds.coords) + list(ds.data_vars)
var_list = list(ds.coords) + list(ds.data_vars)
elif isinstance(ds, xr.DataArray):
varlist = list(ds.coords)

for var in varlist:
cattr = ChainMap(ds[var].attrs, ds[var].encoding).get("coordinates", None)
if not cattr:
var_list = list(ds.coords)
else:
raise ValueError(
f"Expected xarray.Dataset or xarray.DataArray, got {type(ds)}"
)

for var in var_list:
c_attr = ChainMap(ds[var].attrs, ds[var].encoding).get("coordinates", None)
if not c_attr:
ds[var].encoding["coordinates"] = None
else:
ds[var].encoding["coordinates"] = cattr
ds[var].encoding["coordinates"] = c_attr
ds[var].attrs.pop("coordinates", None)
return ds

Expand Down
53 changes: 28 additions & 25 deletions clisops/ops/regrid.py
Expand Up @@ -21,8 +21,8 @@
class Regrid(Operation):
"""Class for regridding operation, extends clisops.ops.base_operation.Operation."""

@staticmethod
def _get_grid_in(
self,
grid_desc: Union[xr.Dataset, xr.DataArray],
compute_bounds: bool,
):
Expand All @@ -41,11 +41,12 @@ def _get_grid_out(
self,
grid_desc: Union[xr.Dataset, xr.DataArray, int, float, tuple, str],
compute_bounds: bool,
):
"""
Create clisops.core.regrid.Grid object as target grid of the regridding operation.
) -> Grid:
"""Create clisops.core.regrid.Grid object as target grid of the regridding operation.
Returns the Grid object
Returns
-------
Grid
"""
if isinstance(grid_desc, str):
if grid_desc in ["auto", "adaptive"]:
Expand All @@ -62,17 +63,20 @@ def _get_grid_out(
# clisops.core.regrid.Grid will raise the exception
return Grid()

def _get_weights(self, grid_in: Grid, grid_out: Grid, method: str):
"""
Generate the remapping weights using clisops.core.regrid.Weights.
@staticmethod
def _get_weights(grid_in: Grid, grid_out: Grid, method: str):
"""Generate the remapping weights using clisops.core.regrid.Weights.
Returns the Weights object.
Returns
-------
Weights
An instance of the Weights object.
"""
return Weights(grid_in=grid_in, grid_out=grid_out, method=method)

def _resolve_params(self, **params):
def _resolve_params(self, **params) -> None:
"""Generate a dictionary of regrid parameters."""
# all regrid specific paramterers should be passed in via **params
# all regrid specific parameters should be passed in via **params
# this is where we resolve them and set self.params as a dict or as separate attributes
# this would be where you make use of your other methods/ attributes e.g.
# get_grid_in(), get_grid_out() and get_weights() to generate the regridder
Expand Down Expand Up @@ -121,8 +125,8 @@ def _resolve_params(self, **params):
# Input grid / Dataset
self.ds = self.params.get("grid_in").ds

# Theres no __str__() method for the Regridder object, so I used its filename attribute,
# which specifies a default filename (which has but not much to do with the filename we would give the weight file).
# There is no __str__() method for the Regridder object, so I used its filename attribute,
# which specifies a default filename (does not correspond with the filename we would give the weight file).
# todo: Better option might be to have the Weights class extend the Regridder class or to define
# a __str__() method for the Weights class.
logger.debug(
Expand All @@ -133,7 +137,7 @@ def _resolve_params(self, **params):
)
)

def _get_file_namer(self):
def _get_file_namer(self) -> object:
"""Return the appropriate file namer object."""
# "extra" is what will go at the end of the file name before .nc
extra = "_regrid-{}-{}".format(
Expand Down Expand Up @@ -176,20 +180,19 @@ def regrid(
file_namer: Optional[str] = "standard",
keep_attrs: Optional[Union[bool, str]] = True,
) -> List[Union[xr.Dataset, str]]:
"""
Regrid specified input file or xarray object.
"""Regrid specified input file or xarray object.
Parameters
----------
ds: Union[xr.Dataset, str]
method="nearest_s2d",
adaptive_masking_threshold=0.5,
grid="adaptive",
output_dir: Optional[Union[str, Path]] = None
output_type: {"netcdf", "nc", "zarr", "xarray"}
split_method: {"time:auto"}
file_namer: {"standard", "simple"}
keep_attrs: {True, False, "target"}
ds : Union[xr.Dataset, str]
method : {"nearest_s2d", "conservative", "patch", "bilinear"}
adaptive_masking_threshold : Optional[Union[int, float]]
grid : Union[xr.Dataset, xr.DataArray, int, float, tuple, str]
output_dir : Optional[Union[str, Path]] = None
output_type : {"netcdf", "nc", "zarr", "xarray"}
split_method : {"time:auto"}
file_namer : {"standard", "simple"}
keep_attrs : {True, False, "target"}
Returns
-------
Expand Down
2 changes: 1 addition & 1 deletion clisops/utils/common.py
Expand Up @@ -92,7 +92,7 @@ def _list_ten(list1d):
Returns
-------
str
String containing the comma separated 5 first and last elements of the list, with "..." inbetween.
String containing the comma separated 5 first and last elements of the list, with "..." in between.
For example "1, 2, 3, 4, 5 ... , 20, 21, 22, 23, 24, 25".
"""
if len(list1d) < 11:
Expand Down
21 changes: 16 additions & 5 deletions clisops/utils/file_namers.py
@@ -1,3 +1,6 @@
from typing import Optional, Union

import xarray as xr
from roocs_utils.project_utils import get_project_name
from roocs_utils.xarray_utils import xarray_utils as xu

Expand All @@ -7,9 +10,9 @@

def get_file_namer(name):
"""Returns the correct filenamer from the provided name."""
namers = {"standard": StandardFileNamer, "simple": SimpleFileNamer}
file_namer = {"standard": StandardFileNamer, "simple": SimpleFileNamer}

return namers.get(name, StandardFileNamer)
return file_namer.get(name, StandardFileNamer)


class _BaseFileNamer:
Expand Down Expand Up @@ -42,7 +45,8 @@ class StandardFileNamer(SimpleFileNamer):
Generates file names based on input dataset.
"""

def _get_project(self, ds):
@staticmethod
def _get_project(ds):
"""Gets the project name from the input dataset"""

return get_project_name(ds)
Expand Down Expand Up @@ -73,7 +77,13 @@ def _get_template(self, ds):
except KeyError:
return None

def _resolve_derived_attrs(self, ds, attrs, template, fmt=None):
def _resolve_derived_attrs(
self,
ds: Union[xr.DataArray, xr.Dataset],
attrs: dict,
template: dict,
fmt: Optional[str] = None,
) -> None:
"""Finds var_id, time_range and format_extension of dataset and output to generate output file name."""
if "__derive__var_id" in template:
attrs["__derive__var_id"] = xu.get_main_variable(ds)
Expand All @@ -91,7 +101,8 @@ def _resolve_derived_attrs(self, ds, attrs, template, fmt=None):
for key, value in self._replace.items():
attrs[key] = value

def _get_time_range(self, da):
@staticmethod
def _get_time_range(da: Union[xr.DataArray, xr.Dataset]) -> str:
"""Finds the time range of the data in the output."""
try:
times = da.time.values
Expand Down

0 comments on commit f46b44f

Please sign in to comment.