fix #577 improve validate_series (#578)

Co-authored-by: Martin Vonk <vonk.mart@gmail.com> Co-authored-by: Martin Vonk <66305055+martinvonk@users.noreply.github.com>
pastas · May 11, 2023 · b5ccbd5 · b5ccbd5
1 parent 1938128
commit b5ccbd5
Show file tree

Hide file tree

Showing 5 changed files with 62 additions and 62 deletions.
diff --git a/doc/benchmarks/autocorrelation.ipynb b/doc/benchmarks/autocorrelation.ipynb
diff --git a/pastas/modelcompare.py b/pastas/modelcompare.py
@@ -496,8 +496,8 @@ def plot_noise(self, axn: str = "res") -> None:
             axs = self.axes
 
         for i, ml in enumerate(self.models):
-            noise = ml.noise()
-            if noise is not None:
+            if ml.settings["noise"]:
+                noise = ml.noise()
                 axs[axn].plot(
                     noise.index,
                     noise.values,

diff --git a/pastas/timeseries.py b/pastas/timeseries.py
@@ -595,9 +595,10 @@ def validate_oseries(series: Series):
     1. Make sure the values are floats
     2. Make sure the index is a DatetimeIndex
     3. Make sure the indices are datetime64
-    4. Make sure the index is monotonically increasing
-    5. Make sure there are no duplicate indices
-    6. Make sure the time series has no nan-values
+    4. Make sure the index has no NaT-values
+    5. Make sure the index is monotonically increasing
+    6. Make sure there are no duplicate indices
+    7. Make sure the time series has no nan-values
 
     If any of these checks are not passed the method will throw an error that needs
     to be fixed by the user.
@@ -631,6 +632,11 @@ def _validate_series(series: Series, equidistant: bool = True):
     if isinstance(series, pd.DataFrame):
         if len(series.columns) == 1:
             series = series.iloc[:, 0]
+        elif len(series.columns) > 1:
+            # helpful specific message for multi-column DataFrames
+            msg = "DataFrame with multiple columns. Please select one."
+            logger.error(msg)
+            raise ValueError(msg)
 
     # 0. Make sure it is a Series and not something else (e.g., DataFrame)
     if not isinstance(series, pd.Series):
@@ -658,7 +664,16 @@ def _validate_series(series: Series, equidistant: bool = True):
         logger.error(msg)
         raise ValueError(msg)
 
-    # 4. Make sure the index is monotonically increasing
+    # 4. Make sure there are no NaT in index
+    if series.index.hasnans:
+        msg = (
+            f"The index of series {name} contains NaNs. "
+            "Try to remove these with `series.loc[series.index.dropna()]`."
+        )
+        logger.error(msg)
+        raise ValueError(msg)
+
+    # 5. Make sure the index is monotonically increasing
     if not series.index.is_monotonic_increasing:
         msg = (
             f"The time-indices of series {name} are not monotonically increasing. Try "
@@ -667,25 +682,26 @@ def _validate_series(series: Series, equidistant: bool = True):
         logger.error(msg)
         raise ValueError(msg)
 
-    # 5. Make sure there are no duplicate indices
+    # 6. Make sure there are no duplicate indices
     if not series.index.is_unique:
         msg = (
             f"duplicate time-indexes were found in the time series {name}. Make sure "
-            f"there are no duplicate indices. For example by "
-            f"`grouped = series.groupby(level=0); series = grouped.mean()`"
+            "there are no duplicate indices. For example by "
+            "`grouped = series.groupby(level=0); series = grouped.mean()`"
+            "or `series = series.loc[~series.index.duplicated(keep='first/last')]`"
         )
         logger.error(msg)
         raise ValueError(msg)
 
-    # 6. Make sure the time series has no nan-values
+    # 7. Make sure the time series has no nan-values
     if series.hasnans:
         msg = (
             "The time series %s has nan-values. Pastas will use the fill_nan "
             "settings to fill up the nan-values."
         )
         logger.warning(msg, name)
 
-    # 7. Make sure the time series has equidistant time steps
+    # 8. Make sure the time series has equidistant time steps
     if equidistant:
         if not pd.infer_freq(series.index):
             msg = (

diff --git a/pastas/transform.py b/pastas/transform.py
@@ -18,10 +18,10 @@ class ThresholdTransform:
     Parameters
     ----------
     value : float, optional
-        The starting value above which the simulation is lowered.
+        The initial starting value above which the simulation is lowered.
     vmin : float, optional
         The minimum value above which the simulation is lowered.
-    vmin : float, optional
+    vmax : float, optional
         The maximum value above which the simulation is lowered.
     name: str, optional
         Name of the transform.

diff --git a/pyproject.toml b/pyproject.toml
@@ -2,7 +2,6 @@
 requires = ["setuptools>=64.0.0", "wheel"]
 build-backend = "setuptools.build_meta"
 
-
 [project]
 name = "pastas"
 dynamic = ["version"]
@@ -54,14 +53,15 @@ formatting = ["isort", "black[jupyter]"]
 linting = ["flake8"]
 pytesting = ["pytest>=7", "pytest-cov", "pytest-sugar"]
 ci = [
-    "pastas[solvers,pytesting]",
+    "pastas[pytesting,solvers]",
     "jupyter",
     "coverage",
     "corner",
     "emcee",
     "tqdm",
 ]
 rtd = [
+    "pastas[solvers]",
     "nbsphinx",
     "Ipython",
     "ipykernel",
@@ -75,7 +75,6 @@ rtd = [
 dev = ["tox", "pastas[formatting,linting,ci,rtd]"]
 numbascipy = ["numba-scipy >= 0.3.1"]
 
-
 [tool.setuptools.dynamic]
 version = { attr = "pastas.version.__version__" }