diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 970a76bc..b95a3a5b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,6 +2,11 @@ default_language_version: python: python3 repos: +- repo: https://github.com/asottile/pyupgrade + rev: v3.3.1 + hooks: + - id: pyupgrade + args: [ '--py38-plus' ] - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.4.0 hooks: @@ -13,7 +18,7 @@ repos: - id: debug-statements - id: mixed-line-ending - repo: https://github.com/psf/black - rev: 23.1.0 + rev: 23.3.0 hooks: - id: black args: ["--target-version", "py38"] @@ -32,20 +37,15 @@ repos: # hooks: # - id: pydocstyle # args: ["--convention=numpy"] -- repo: https://github.com/asottile/pyupgrade - rev: v3.3.1 - hooks: - - id: pyupgrade - args: ['--py38-plus'] -- repo: meta - hooks: - - id: check-hooks-apply - - id: check-useless-excludes - repo: https://github.com/kynan/nbstripout rev: 0.6.1 hooks: - id: nbstripout files: ".ipynb" +- repo: meta + hooks: + - id: check-hooks-apply + - id: check-useless-excludes ci: autofix_commit_msg: | @@ -55,5 +55,5 @@ ci: autoupdate_branch: '' autoupdate_commit_msg: '[pre-commit.ci] pre-commit autoupdate' autoupdate_schedule: weekly - skip: [] + skip: [ ] submodules: false diff --git a/clisops/core/subset.py b/clisops/core/subset.py index 2b08d04e..ff775560 100644 --- a/clisops/core/subset.py +++ b/clisops/core/subset.py @@ -11,6 +11,7 @@ import numpy as np import xarray from packaging import version +from pandas import DataFrame from pandas.api.types import is_integer_dtype # noqa from pyproj import Geod from pyproj.crs import CRS @@ -414,6 +415,8 @@ def func_checker(*args, **kwargs): UserWarning, stacklevel=4, ) + + split_features = dict() split_flag = False for index, feature in poly.iterrows(): if (feature.geometry.bounds[0] < 0) and ( @@ -439,13 +442,19 @@ def func_checker(*args, **kwargs): feat.difference(buffered) for feat in split_polygons.geoms ] - # Cannot assign iterable with `at` (pydata/pandas#26333) so a small hack: - # Load split features into a new GeoDataFrame with WGS84 CRS - split_gdf = gpd.GeoDataFrame( - geometry=[unary_union(buffered_split_polygons)], - crs=CRS(4326), - ) - poly.at[[index], "geometry"] = split_gdf.geometry.values + split_features[index] = [unary_union(buffered_split_polygons)] + + if split_flag: + split_df = DataFrame.from_dict( + split_features, + orient="index", + columns=["geometry"], + ) + split_gdf = gpd.GeoDataFrame(split_df, geometry=split_df.geometry) + poly.update(split_gdf) + + # Set CRS on polygon for correct reprojection + poly = poly.set_crs(CRS(4326)) # Reproject features in WGS84 CSR to use 0 to 360 as longitudinal values wrapped_lons = CRS.from_string( diff --git a/environment.yml b/environment.yml index e9afe3ec..3379f817 100644 --- a/environment.yml +++ b/environment.yml @@ -1,29 +1,27 @@ name: clisops channels: - conda-forge - - defaults dependencies: - python >=3.8 - pip - - bottleneck>=1.3.1,<1.4 - - cf_xarray>=0.7.0 - - cftime>=1.4.1 - - dask>=2.6.0 - - gdal<3.5 - - geopandas>=0.7 - - loguru>=0.5.3 - - netCDF4>=1.4 + - bottleneck >=1.3.1 + - cf_xarray >=0.7.0 + - cftime >=1.4.1 + - dask >=2.6.0 + - gdal >=3.0 + - geopandas >=0.11 + - loguru >=0.5.3 + - netCDF4 >=1.4 - numba # needed for xesmf v0.6.3, see: https://github.com/conda-forge/xesmf-feedstock/pull/24 - - numpy>=1.16 + - numpy >=1.16 - packaging - - pandas>=1.0.3,<1.4 - - poppler>=0.67 - - pygeos>=0.9 - - pyproj>=2.5 + - pandas >=1.0.3 + - poppler >=0.67 + - pyproj >=3.3.0 - pooch - requests>=2.0 - roocs-utils>=0.6.4,<0.7 - - shapely>=1.6 - - sparse>=0.8.0 # needed for xesmf v0.6.3, see: https://github.com/conda-forge/xesmf-feedstock/pull/24 - - xarray>=0.15 - - xesmf>=0.6.2 + - shapely >=1.9 + - sparse >=0.8.0 # needed for xesmf v0.6.3, see: https://github.com/conda-forge/xesmf-feedstock/pull/24 + - xarray >=0.21 + - xesmf >=0.6.3 diff --git a/requirements.txt b/requirements.txt index a036e052..c07a1331 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,18 +1,18 @@ -numpy>=1.16 -xarray>=0.15 -pandas>=1.0.3,<1.4 +bottleneck>=1.3.1 +cf-xarray>=0.7.0 +# cf-xarray @ git+https://github.com/xarray-contrib/cf-xarray/@main#egg=cf-xarray cftime>=1.4.1 -netCDF4>=1.4 -shapely>=1.6 -geopandas>=0.7 dask[complete]>=2.6 +geopandas>=0.11 +loguru>=0.5.3 +netCDF4>=1.4 +numpy>=1.16 packaging -pyproj>=2.5 +pandas>=1.0.3 pooch -cf-xarray>=0.7.0 -#cf-xarray @ git+https://github.com/xarray-contrib/cf-xarray/@main#egg=cf-xarray -bottleneck>=1.3.1 +pyproj>=3.3.0 requests>=2.0 roocs-utils>=0.6.4,<0.7 # roocs-utils @ git+https://github.com/roocs/roocs-utils.git@master#egg=roocs-utils -loguru>=0.5.3 +shapely>=1.9 +xarray>=0.21 diff --git a/requirements_dev.txt b/requirements_dev.txt index 35a811c7..7e81fec3 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -1,20 +1,20 @@ -pip +gitpython>=3.1.30 +sphinx +black>=23.3.0 bumpversion -wheel -watchdog flake8 -tox~=3.0 -pytest-cov -Sphinx -sphinx-rtd-theme>=1.0 -twine +ipython +jinja2>=2.11 +nbconvert +nbsphinx +pip +pre-commit>=3.0.0 pytest +pytest-cov pytest-loguru>=0.2.0 pytest-runner -pre-commit>=2.9.0 -black>=22.12 -nbsphinx -nbconvert -ipython -jinja2>=2.11 -GitPython==3.1.30 +sphinx-rtd-theme>=1.0 +tox>=4.0 +twine +watchdog +wheel diff --git a/tests/conftest.py b/tests/conftest.py index d8488de8..cd03e26c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -155,22 +155,6 @@ def ndq_series(): ) -# @pytest.fixture -# def per_doy(): -# def _per_doy(values, calendar="standard", units="kg m-2 s-1"): -# n = calendars[calendar] -# if len(values) != n: -# raise ValueError( -# "Values must be same length as number of days in calendar." -# ) -# coords = xr.IndexVariable("dayofyear", np.arange(1, n + 1)) -# return xr.DataArray( -# values, coords=[coords], attrs={"calendar": calendar, "units": units} -# ) -# -# return _per_doy - - @pytest.fixture def areacella(): """Return a rectangular grid of grid cell area.""" @@ -287,7 +271,7 @@ def cmip6_o3(): # Fixture to load mini-esgf-data repository used by roocs tests -@pytest.fixture +@pytest.fixture(scope="session", autouse=True) def load_esgf_test_data(): """ This fixture ensures that the required test data repository diff --git a/tests/core/test_subset.py b/tests/core/test_subset.py index 0e13309e..1f95861b 100644 --- a/tests/core/test_subset.py +++ b/tests/core/test_subset.py @@ -844,8 +844,8 @@ def test_mask_multiregions(self, toggle_pygeos): x_dim=ds.lon, y_dim=ds.lat, poly=regions, wrap_lons=True ) vals, counts = np.unique(mask.values[mask.notnull()], return_counts=True) - assert all(vals == [0, 1, 2]) - assert all(counts == [58, 250, 22]) + np.testing.assert_array_equal(vals, [0, 1, 2]) + np.testing.assert_array_equal(counts, [58, 250, 22]) @pytest.mark.skipif( xesmf is None, reason="xESMF >= 0.6.2 is needed for average_shape." diff --git a/tests/ops/test_subset.py b/tests/ops/test_subset.py index 67d4b1bc..6e66f928 100644 --- a/tests/ops/test_subset.py +++ b/tests/ops/test_subset.py @@ -186,7 +186,7 @@ def test_subset_with_time_and_area(cmip5_tas_file, tmpdir): assert ds.lat.values.tolist() == [35] -def test_subset_4D_data_all_argument_permutations(load_esgf_test_data, tmpdir): +def test_subset_4D_data_all_argument_permutations(tmpdir): """Tests clisops subset function with: - no args (collection only) - time only @@ -253,7 +253,7 @@ def test_subset_4D_data_all_argument_permutations(load_esgf_test_data, tmpdir): assert ds.ta.shape == tuple(expected_shape) -def test_subset_with_multiple_files_tas(load_esgf_test_data, tmpdir): +def test_subset_with_multiple_files_tas(tmpdir): """Tests with multiple tas files""" result = subset( ds=CMIP5_TAS, @@ -266,7 +266,7 @@ def test_subset_with_multiple_files_tas(load_esgf_test_data, tmpdir): _check_output_nc(result) -def test_subset_with_multiple_files_zostoga(load_esgf_test_data, tmpdir): +def test_subset_with_multiple_files_zostoga(tmpdir): """Tests with multiple zostoga files""" result = subset( ds=CMIP5_ZOSTOGA, @@ -278,7 +278,7 @@ def test_subset_with_multiple_files_zostoga(load_esgf_test_data, tmpdir): _check_output_nc(result) -def test_subset_with_multiple_files_rh(load_esgf_test_data, tmpdir): +def test_subset_with_multiple_files_rh(tmpdir): """Tests with multiple rh files""" result = subset( ds=CMIP5_RH, @@ -602,7 +602,7 @@ def test_coord_variables_subsetted_rlat_rlon(): assert np.all(out.lat.values[mask1.values] <= area[3]) -def test_time_invariant_subset_standard_name(load_esgf_test_data, tmpdir): +def test_time_invariant_subset_standard_name(tmpdir): result = subset( ds=CMIP6_MRSOFC, area=(5.0, 10.0, 360.0, 90.0), @@ -614,7 +614,7 @@ def test_time_invariant_subset_standard_name(load_esgf_test_data, tmpdir): _check_output_nc(result, fname="mrsofc_fx_IPSL-CM6A-LR_ssp119_r1i1p1f1_gr.nc") -def test_longitude_and_latitude_coords_only(load_esgf_test_data, tmpdir): +def test_longitude_and_latitude_coords_only(tmpdir): """Test subset suceeds when latitude and longitude are coordinates not dims and are not called lat/lon""" result = subset( @@ -630,7 +630,7 @@ def test_longitude_and_latitude_coords_only(load_esgf_test_data, tmpdir): ) -def test_time_invariant_subset_simple_name(load_esgf_test_data, tmpdir): +def test_time_invariant_subset_simple_name(tmpdir): result = subset( ds=CMIP6_MRSOFC, area=(5.0, 10.0, 360.0, 90.0), diff --git a/tests/ops/test_subset_cordex.py b/tests/ops/test_subset_cordex.py index a3488891..0c53cd8c 100644 --- a/tests/ops/test_subset_cordex.py +++ b/tests/ops/test_subset_cordex.py @@ -11,7 +11,7 @@ ) -def test_subset_cordex_afr(load_esgf_test_data, tmpdir): +def test_subset_cordex_afr(tmpdir): """Test subset on cordex data AFR domain""" result = subset( @@ -29,7 +29,7 @@ def test_subset_cordex_afr(load_esgf_test_data, tmpdir): ) -def test_subset_cordex_nam(load_esgf_test_data, tmpdir): +def test_subset_cordex_nam(tmpdir): """Test subset on cordex data NAM domain""" result = subset( @@ -47,7 +47,7 @@ def test_subset_cordex_nam(load_esgf_test_data, tmpdir): ) -def test_subset_cordex_eur(load_esgf_test_data, tmpdir): +def test_subset_cordex_eur(tmpdir): """Test subset on cordex data EUR domain""" result = subset( @@ -65,7 +65,7 @@ def test_subset_cordex_eur(load_esgf_test_data, tmpdir): ) -def test_subset_cordex_ant(load_esgf_test_data, tmpdir): +def test_subset_cordex_ant(tmpdir): """Test subset on cordex data ANT domain""" result = subset( diff --git a/tests/test_file_namers.py b/tests/test_file_namers.py index 2c36700d..6a33d154 100644 --- a/tests/test_file_namers.py +++ b/tests/test_file_namers.py @@ -31,7 +31,7 @@ def test_SimpleFileNamer_no_fmt(): s.get_file_name(*args) -def test_SimpleFileNamer_with_chunking(load_esgf_test_data, tmpdir): +def test_SimpleFileNamer_with_chunking(tmpdir): start_time, end_time = "2001-01-01T00:00:00", "2200-12-30T00:00:00" area = (0.0, 10.0, 175.0, 90.0) @@ -68,7 +68,7 @@ class Thing: s.get_file_name(mock_ds) -def test_StandardFileNamer_cmip5(load_esgf_test_data): +def test_StandardFileNamer_cmip5(): s = get_file_namer("standard")() _ds = xr.open_mfdataset( diff --git a/tox.ini b/tox.ini index 207b91ea..d390877e 100644 --- a/tox.ini +++ b/tox.ini @@ -1,25 +1,30 @@ [tox] -envlist = py{38,39,310}, black, docs -requires = pip >= 21.0 +min_version = 4.0 +envlist = + py{38,39,310} + black + docs +requires = + pip >= 21.0 opts = -v [testenv:black] skip_install = True basepython = python deps = - flake8 - black + flake8 + black commands = - flake8 clisops tests - black --check --target-version py38 clisops tests --exclude tests/mini-esgf-data + flake8 clisops tests + black --check --target-version py38 clisops tests --exclude tests/mini-esgf-data [testenv:docs] extras = docs deps = commands = - make --directory=docs clean html + make --directory=docs clean html whitelist_externals = - make + make [testenv] setenv =