Merge pull request #28 from nens/casper-warning

Suppress warning
nens · Feb 3, 2020 · abae3fa · abae3fa
2 parents 81a90e8 + 53275e7
commit abae3fa
Show file tree

Hide file tree

Showing 10 changed files with 45 additions and 18 deletions.
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -5,6 +5,11 @@ Changelog of dask-geomodeling
 2.2.1 (unreleased)
 ------------------
 
+- Suppressed "invalid value encountered in greater than" warning in
+  ClassifyFromColumns.
+
+- Compatibility fixes for pandas 1.0.0.
+
 - Implemented raster.RasterizeWKT
 
 

diff --git a/dask_geomodeling/geometry/aggregate.py b/dask_geomodeling/geometry/aggregate.py
@@ -416,9 +416,8 @@ def process(geom_data, raster_data, process_kwargs):
 
             # if there is a threshold, generate a raster with thresholds
             if threshold_name:
-                thresholds = features.loc[
-                    labels.ravel(), threshold_name
-                ].values.reshape(labels.shape)
+                thresholds = features[threshold_name].reindex(labels.ravel())\
+                    .values.reshape(labels.shape)
             else:
                 thresholds = None
 

diff --git a/dask_geomodeling/geometry/base.py b/dask_geomodeling/geometry/base.py
@@ -207,7 +207,7 @@ def source(self):
     @staticmethod
     def process(data, name):
         if "features" not in data or name not in data["features"].columns:
-            return pd.Series([])
+            return pd.Series([], dtype=float)
         return data["features"][name]
 
 

diff --git a/dask_geomodeling/geometry/field_operations.py b/dask_geomodeling/geometry/field_operations.py
@@ -171,7 +171,7 @@ def right(self):
     @staticmethod
     def process(data, value_column, bin_columns, labels, right):
         if "features" not in data or len(data["features"]) == 0:
-            return pd.Series([])
+            return pd.Series([], dtype=float)
         features = data["features"]
         values = features[value_column].values
         bins = features[bin_columns].values
@@ -180,10 +180,11 @@ def process(data, value_column, bin_columns, labels, right):
         # Check in which bin every value is. because bins may be different for
         # each value, searchsorted is not an option. We assume that bins are
         # sorted in increasing order. Checking that would be costly.
-        if right:
-            indices = np.sum(values[:, np.newaxis] > bins, axis=1)
-        else:
-            indices = np.sum(values[:, np.newaxis] >= bins, axis=1)
+        with np.errstate(invalid='ignore'):  # comparison to NaN is OK here
+            if right:
+                indices = np.sum(values[:, np.newaxis] > bins, axis=1)
+            else:
+                indices = np.sum(values[:, np.newaxis] >= bins, axis=1)
 
         # If we have e.g. 2 labels and 3 bins, the outside intervals are closed
         # any index that is 0 or 3 should become -1 (unclassified).

diff --git a/dask_geomodeling/geometry/text.py b/dask_geomodeling/geometry/text.py
@@ -120,7 +120,7 @@ def parser(description):
         # Align the generated dataframe with the original. Pandas versions
         # later than 0.19 have a pd.align that could be used also.
         try:
-            extra_columns_aligned = extra_columns.loc[column.cat.codes]
+            extra_columns_aligned = extra_columns.reindex(column.cat.codes)
             extra_columns_aligned.index = f.index
         except KeyError:
             extra_columns_aligned = pd.DataFrame([], columns=key_mapping.values())

diff --git a/dask_geomodeling/raster/temporal.py b/dask_geomodeling/raster/temporal.py
@@ -8,6 +8,7 @@
 
 import numpy as np
 import pandas as pd
+import warnings
 
 from dask_geomodeling.utils import (
     get_dtype_max,
@@ -251,7 +252,7 @@ def _ts_to_dt(timestamp, timezone):
         timestamp = timestamp.tz_localize(timezone)
     except TypeError:
         pass
-    return timestamp.tz_convert("UTC").tz_localize(None).to_pydatetime()
+    return timestamp.tz_convert("UTC").tz_localize(None).to_pydatetime(warn=False)
 
 
 def _get_bin_label(dt, frequency, closed, label, timezone):
@@ -298,7 +299,7 @@ def _get_closest_label(dt, frequency, closed, label, timezone, side="both"):
         differences = differences[differences >= pd.Timedelta(0)]
     elif side == "left":
         differences = differences[differences <= pd.Timedelta(0)]
-    result = differences.abs().argmin()
+    result = differences.abs().idxmin()
     return _ts_to_dt(result, timezone)
 
 
@@ -578,7 +579,9 @@ def process(process_kwargs, time_data=None, data=None):
         times = time_data["time"]
 
         # convert times to a pandas series
-        series = pd.Series(index=times).tz_localize("UTC").tz_convert(timezone)
+        series = (
+            pd.Series(index=times, dtype=float).tz_localize("UTC").tz_convert(timezone)
+        )
 
         # localize the labels so we can use it as an index
         labels = labels.tz_localize("UTC").tz_convert(timezone)
@@ -632,7 +635,10 @@ def process(process_kwargs, time_data=None, data=None):
             inds = indices[timestamp]
             if len(inds) == 0:
                 continue
-            aggregated = agg_func(values[inds], axis=0)
+            with warnings.catch_warnings():
+                # the agg_func could give use 'All-NaN slice encountered'
+                warnings.simplefilter("ignore", category=RuntimeWarning)
+                aggregated = agg_func(values[inds], axis=0)
             # keep track of NaN or inf values before casting to target dtype
             no_data_mask = ~np.isfinite(aggregated)
             # cast to target dtype
@@ -797,7 +803,9 @@ def process(process_kwargs, time_data=None, data=None):
         closed = process_kwargs["closed"]
         label = process_kwargs["label"]
         times = (
-            pd.Series(index=time_data["time"]).tz_localize("UTC").tz_convert(timezone)
+            pd.Series(index=time_data["time"], dtype=float)
+            .tz_localize("UTC")
+            .tz_convert(timezone)
         )
 
         if frequency is None:

diff --git a/dask_geomodeling/tests/test_geometry.py b/dask_geomodeling/tests/test_geometry.py
@@ -5,7 +5,6 @@
 
 from numpy.testing import assert_almost_equal
 from osgeo import ogr
-from pandas.util.testing import assert_series_equal
 from shapely.geometry import box, Point, Polygon
 import geopandas as gpd
 import numpy as np
@@ -30,6 +29,11 @@
 from dask_geomodeling.geometry import text
 from dask_geomodeling import geometry
 
+try:
+    from pandas.testing import assert_series_equal
+except ImportError:
+    from pandas.util.testing import assert_series_equal
+
 
 def create_geojson(abspath, polygons=10, bbox=None, ndim=2, projection="EPSG:4326"):
     """Create random triangle polygons inside bbox"""

diff --git a/dask_geomodeling/tests/test_geometry_sinks.py b/dask_geomodeling/tests/test_geometry_sinks.py
@@ -3,7 +3,6 @@
 
 import geopandas as gpd
 import pytest
-from pandas.util.testing import assert_frame_equal
 from shapely.geometry import box
 
 from dask_geomodeling.geometry import parallelize, sinks
@@ -12,6 +11,10 @@
     setup_temp_root,
     teardown_temp_root,
 )
+try:
+    from pandas.testing import assert_frame_equal
+except ImportError:
+    from pandas.util.testing import assert_frame_equal
 
 
 class TestGeometryFileSink(unittest.TestCase):

diff --git a/dask_geomodeling/utils.py b/dask_geomodeling/utils.py
@@ -610,7 +610,7 @@ def rasterize_geoseries(geoseries, bbox, projection, height, width, values=None)
             values = None  # discard values
     elif str(values.dtype) == "category":
         # transform pandas Categorical dtype to normal dtype
-        values = pd.Series(values.get_values(), index=values.index)
+        values = pd.Series(np.asarray(values), index=values.index)
 
     if values is not None:
         if np.issubdtype(values.dtype, np.floating):

diff --git a/setup.cfg b/setup.cfg
@@ -0,0 +1,7 @@
+[tool:pytest]
+filterwarnings =
+    error:::dask_geomodeling[.*]
+    error:::numpy[.*]
+    error:::scipy[.*]
+    error:::pandas[.*]
+    error:::dask[.*]