Merge pull request #321 from roocs/docstring-linting-cleanup

Docstring linting cleanup
roocs · Feb 9, 2024 · f46b44f · f46b44f
2 parents 640dbc3 + c262b52
commit f46b44f
Show file tree

Hide file tree

Showing 13 changed files with 202 additions and 159 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -39,7 +39,7 @@ repos:
 #    -   id: pydocstyle
 #        args: ["--convention=numpy"]
   - repo: https://github.com/kynan/nbstripout
-    rev: 0.6.1
+    rev: 0.7.1
     hooks:
       - id: nbstripout
         files: ".ipynb"
@@ -48,8 +48,13 @@ repos:
     hooks:
       - id: blackdoc
         additional_dependencies: [ 'black==24.1.1' ]
+  - repo: https://github.com/codespell-project/codespell
+    rev: v2.2.6
+    hooks:
+    - id: codespell
+      additional_dependencies: [ 'tomli' ]
   - repo: https://github.com/python-jsonschema/check-jsonschema
-    rev: 0.27.3
+    rev: 0.28.0
     hooks:
       - id: check-github-workflows
       - id: check-readthedocs

diff --git a/HISTORY.rst b/HISTORY.rst
@@ -11,10 +11,14 @@ Bug Fixes
 * Raising KeyError for temporal subsetting by components when no time steps match the selection criteria (#316).
 * Coordinate detection for remapping operator via standard_name if detection via cf-xarray fails / is ambiguous (#316).
 * Remove encoding settings with regards to compression for string variables to avoid netCDF write errors with newer netcdf-c versions (>4.9.0) (#319).
+* Fixed a few docstrings, specifies some class methods as static methods (#321).
+* Renamed a few internal variables for clarity, rephrased a few sentences for grammar/spelling (#321).
 
 Other Changes
 ^^^^^^^^^^^^^
 * The compression level is capped at 1 to reduce write times (#319).
+* Updated pre-commit hooks, pinned linting tools to their pre-commit equivalents (#321).
+* Added a pre-commit hook as well as a configuration for `codespell` (#321).
 
 v0.12.2 (2024-01-03)
 --------------------

diff --git a/clisops/core/regrid.py b/clisops/core/regrid.py
@@ -373,7 +373,7 @@ def _grid_from_instructor(self, grid_instructor: tuple | float | int):
     @require_xesmf
     def _grid_from_ds_adaptive(self, ds: xr.Dataset | xr.DataArray):
         """Create Grid of similar extent and resolution of input dataset."""
-        # TODO: dachar/daops to deal with missing values occuring in the coordinate variables
+        # TODO: dachar/daops to deal with missing values occurring in the coordinate variables
         #       while no _FillValue/missing_value attribute is set
         #  -> FillValues else might get selected as minimum/maximum lat/lon value
         #     since they are not masked

diff --git a/clisops/ops/average.py b/clisops/ops/average.py
@@ -51,7 +51,7 @@ def average_over_dims(
     split_method: str = "time:auto",
     file_namer: str = "standard",
 ) -> List[Union[xr.Dataset, str]]:
-    """
+    """Calculate an average over given dimensions.
 
     Parameters
     ----------
@@ -126,7 +126,7 @@ def average_shape(
     split_method: str = "time:auto",
     file_namer: str = "standard",
 ) -> List[Union[xr.Dataset, str]]:
-    """
+    """Calculate a spatial average over a given shape.
 
     Parameters
     ----------
@@ -158,7 +158,6 @@ def average_shape(
     | output_type: "netcdf"
     | split_method: "time:auto"
     | file_namer: "standard"
-
     """
     op = AverageShape(**locals())
     return op.process()

diff --git a/clisops/ops/base_operation.py b/clisops/ops/base_operation.py
@@ -1,6 +1,6 @@
 from collections import ChainMap
 from pathlib import Path
-from typing import List, Union
+from typing import List, Optional, Union
 
 import xarray as xr
 from loguru import logger
@@ -17,17 +17,16 @@ class Operation:
     def __init__(
         self,
         ds,
-        file_namer="standard",
-        split_method="time:auto",
-        output_dir=None,
-        output_type="netcdf",
+        file_namer: str = "standard",
+        split_method: str = "time:auto",
+        output_dir: Optional[Union[str, Path]] = None,
+        output_type: str = "netcdf",
         **params,
     ):
-        """
-        Constructor for each operation.
+        """Constructor for each operation.
+
         Sets common input parameters as attributes.
-        Parameters that are specific to each operation are handled in:
-          self._resolve_params()
+        Parameters that are specific to each operation are handled in `self._resolve_params()`
         """
         self._file_namer = file_namer
         self._split_method = split_method
@@ -47,7 +46,7 @@ def _resolve_dsets(self, ds):
 
         self.ds = ds
 
-    def _resolve_params(self, **params):
+    def _resolve_params(self, **params) -> None:
         """
         Resolve the operation-specific input parameters to `self.params`.
         """
@@ -62,7 +61,7 @@ def _get_file_namer(self):
 
     def _calculate(self):
         """The `_calculate()` method is implemented within each operation subclass."""
-        raise NotImplementedError
+        raise NotImplementedError()
 
     def _remove_str_compression(self, ds):
         """
@@ -117,19 +116,26 @@ def _cap_deflate_level(self, ds):
 
         return ds
 
-    def _remove_redundant_fill_values(self, ds):
-        """
-        Get coordinate and data variables and remove fill values added by xarray
-        (CF conventions say that coordinate variables cannot have missing values).
+    @staticmethod
+    def _remove_redundant_fill_values(ds):
+        """Get coordinate and data variables and remove fill values added by xarray.
+
+        CF-conventions say that coordinate variables cannot have missing values.
 
-        See issue: https://github.com/roocs/clisops/issues/224
+        See Also
+        --------
+        https://github.com/roocs/clisops/issues/224
         """
         if isinstance(ds, xr.Dataset):
-            varlist = list(ds.coords) + list(ds.data_vars)
+            var_list = list(ds.coords) + list(ds.data_vars)
         elif isinstance(ds, xr.DataArray):
-            varlist = list(ds.coords)
+            var_list = list(ds.coords)
+        else:
+            raise ValueError(
+                f"Expected xarray.Dataset or xarray.DataArray, got {type(ds)}"
+            )
 
-        for var in varlist:
+        for var in var_list:
             fval = ChainMap(ds[var].attrs, ds[var].encoding).get("_FillValue", None)
             mval = ChainMap(ds[var].attrs, ds[var].encoding).get("missing_value", None)
             if not fval and not mval:
@@ -154,30 +160,42 @@ def _remove_redundant_fill_values(self, ds):
                     )
         return ds
 
-    def _remove_redundant_coordinates_attr(self, ds):
-        """
-        This method removes the coordinates attribute added by xarray, example:
+    @staticmethod
+    def _remove_redundant_coordinates_attr(ds):
+        """This method removes the coordinates attribute added by xarray.
 
-            double time_bnds(time, bnds) ;
-                time_bnds:coordinates = "height" ;
+         Example
+        -------
+        .. code-block:: cpp
+
+            double time_bnds(time, bnds);
+            time_bnds:coordinates = "height";
 
-        Programs like cdo will complain about this:
+        Programs like `cdo` will complain about this:
+
+        .. code-block:: shell
 
             Warning (cdf_set_var): Inconsistent variable definition for time_bnds!
 
-        See issue: https://github.com/roocs/clisops/issues/224
+        See Also
+        --------
+        https://github.com/roocs/clisops/issues/224
         """
         if isinstance(ds, xr.Dataset):
-            varlist = list(ds.coords) + list(ds.data_vars)
+            var_list = list(ds.coords) + list(ds.data_vars)
         elif isinstance(ds, xr.DataArray):
-            varlist = list(ds.coords)
-
-        for var in varlist:
-            cattr = ChainMap(ds[var].attrs, ds[var].encoding).get("coordinates", None)
-            if not cattr:
+            var_list = list(ds.coords)
+        else:
+            raise ValueError(
+                f"Expected xarray.Dataset or xarray.DataArray, got {type(ds)}"
+            )
+
+        for var in var_list:
+            c_attr = ChainMap(ds[var].attrs, ds[var].encoding).get("coordinates", None)
+            if not c_attr:
                 ds[var].encoding["coordinates"] = None
             else:
-                ds[var].encoding["coordinates"] = cattr
+                ds[var].encoding["coordinates"] = c_attr
                 ds[var].attrs.pop("coordinates", None)
         return ds
 

diff --git a/clisops/ops/regrid.py b/clisops/ops/regrid.py
@@ -21,8 +21,8 @@
 class Regrid(Operation):
     """Class for regridding operation, extends clisops.ops.base_operation.Operation."""
 
+    @staticmethod
     def _get_grid_in(
-        self,
         grid_desc: Union[xr.Dataset, xr.DataArray],
         compute_bounds: bool,
     ):
@@ -41,11 +41,12 @@ def _get_grid_out(
         self,
         grid_desc: Union[xr.Dataset, xr.DataArray, int, float, tuple, str],
         compute_bounds: bool,
-    ):
-        """
-        Create clisops.core.regrid.Grid object as target grid of the regridding operation.
+    ) -> Grid:
+        """Create clisops.core.regrid.Grid object as target grid of the regridding operation.
 
-        Returns the Grid object
+        Returns
+        -------
+        Grid
         """
         if isinstance(grid_desc, str):
             if grid_desc in ["auto", "adaptive"]:
@@ -62,17 +63,20 @@ def _get_grid_out(
             # clisops.core.regrid.Grid will raise the exception
             return Grid()
 
-    def _get_weights(self, grid_in: Grid, grid_out: Grid, method: str):
-        """
-        Generate the remapping weights using clisops.core.regrid.Weights.
+    @staticmethod
+    def _get_weights(grid_in: Grid, grid_out: Grid, method: str):
+        """Generate the remapping weights using clisops.core.regrid.Weights.
 
-        Returns the Weights object.
+        Returns
+        -------
+        Weights
+            An instance of the Weights object.
         """
         return Weights(grid_in=grid_in, grid_out=grid_out, method=method)
 
-    def _resolve_params(self, **params):
+    def _resolve_params(self, **params) -> None:
         """Generate a dictionary of regrid parameters."""
-        # all regrid specific paramterers should be passed in via **params
+        # all regrid specific parameters should be passed in via **params
         # this is where we resolve them and set self.params as a dict or as separate attributes
         # this would be where you make use of your other methods/ attributes e.g.
         # get_grid_in(), get_grid_out() and get_weights() to generate the regridder
@@ -121,8 +125,8 @@ def _resolve_params(self, **params):
         # Input grid / Dataset
         self.ds = self.params.get("grid_in").ds
 
-        # Theres no __str__() method for the Regridder object, so I used its filename attribute,
-        #  which specifies a default filename (which has but not much to do with the filename we would give the weight file).
+        # There is no __str__() method for the Regridder object, so I used its filename attribute,
+        # which specifies a default filename (does not correspond with the filename we would give the weight file).
         # todo: Better option might be to have the Weights class extend the Regridder class or to define
         #  a __str__() method for the Weights class.
         logger.debug(
@@ -133,7 +137,7 @@ def _resolve_params(self, **params):
             )
         )
 
-    def _get_file_namer(self):
+    def _get_file_namer(self) -> object:
         """Return the appropriate file namer object."""
         # "extra" is what will go at the end of the file name before .nc
         extra = "_regrid-{}-{}".format(
@@ -176,20 +180,19 @@ def regrid(
     file_namer: Optional[str] = "standard",
     keep_attrs: Optional[Union[bool, str]] = True,
 ) -> List[Union[xr.Dataset, str]]:
-    """
-    Regrid specified input file or xarray object.
+    """Regrid specified input file or xarray object.
 
     Parameters
     ----------
-    ds: Union[xr.Dataset, str]
-    method="nearest_s2d",
-    adaptive_masking_threshold=0.5,
-    grid="adaptive",
-    output_dir: Optional[Union[str, Path]] = None
-    output_type: {"netcdf", "nc", "zarr", "xarray"}
-    split_method: {"time:auto"}
-    file_namer: {"standard", "simple"}
-    keep_attrs: {True, False, "target"}
+    ds : Union[xr.Dataset, str]
+    method : {"nearest_s2d", "conservative", "patch", "bilinear"}
+    adaptive_masking_threshold : Optional[Union[int, float]]
+    grid : Union[xr.Dataset, xr.DataArray, int, float, tuple, str]
+    output_dir : Optional[Union[str, Path]] = None
+    output_type : {"netcdf", "nc", "zarr", "xarray"}
+    split_method : {"time:auto"}
+    file_namer : {"standard", "simple"}
+    keep_attrs : {True, False, "target"}
 
     Returns
     -------

diff --git a/clisops/utils/common.py b/clisops/utils/common.py
@@ -92,7 +92,7 @@ def _list_ten(list1d):
     Returns
     -------
     str
-        String containing the comma separated 5 first and last elements of the list, with "..." inbetween.
+        String containing the comma separated 5 first and last elements of the list, with "..." in between.
         For example "1, 2, 3, 4, 5 ... , 20, 21, 22, 23, 24, 25".
     """
     if len(list1d) < 11:

diff --git a/clisops/utils/file_namers.py b/clisops/utils/file_namers.py
@@ -1,3 +1,6 @@
+from typing import Optional, Union
+
+import xarray as xr
 from roocs_utils.project_utils import get_project_name
 from roocs_utils.xarray_utils import xarray_utils as xu
 
@@ -7,9 +10,9 @@
 
 def get_file_namer(name):
     """Returns the correct filenamer from the provided name."""
-    namers = {"standard": StandardFileNamer, "simple": SimpleFileNamer}
+    file_namer = {"standard": StandardFileNamer, "simple": SimpleFileNamer}
 
-    return namers.get(name, StandardFileNamer)
+    return file_namer.get(name, StandardFileNamer)
 
 
 class _BaseFileNamer:
@@ -42,7 +45,8 @@ class StandardFileNamer(SimpleFileNamer):
     Generates file names based on input dataset.
     """
 
-    def _get_project(self, ds):
+    @staticmethod
+    def _get_project(ds):
         """Gets the project name from the input dataset"""
 
         return get_project_name(ds)
@@ -73,7 +77,13 @@ def _get_template(self, ds):
         except KeyError:
             return None
 
-    def _resolve_derived_attrs(self, ds, attrs, template, fmt=None):
+    def _resolve_derived_attrs(
+        self,
+        ds: Union[xr.DataArray, xr.Dataset],
+        attrs: dict,
+        template: dict,
+        fmt: Optional[str] = None,
+    ) -> None:
         """Finds var_id, time_range and format_extension of dataset and output to generate output file name."""
         if "__derive__var_id" in template:
             attrs["__derive__var_id"] = xu.get_main_variable(ds)
@@ -91,7 +101,8 @@ def _resolve_derived_attrs(self, ds, attrs, template, fmt=None):
             for key, value in self._replace.items():
                 attrs[key] = value
 
-    def _get_time_range(self, da):
+    @staticmethod
+    def _get_time_range(da: Union[xr.DataArray, xr.Dataset]) -> str:
         """Finds the time range of the data in the output."""
         try:
             times = da.time.values