Skip to content

Commit

Permalink
fix #577 improve validate_series (#578)
Browse files Browse the repository at this point in the history
Co-authored-by: Martin Vonk <vonk.mart@gmail.com>
Co-authored-by: Martin Vonk <66305055+martinvonk@users.noreply.github.com>
  • Loading branch information
3 people committed May 11, 2023
1 parent 1938128 commit b5ccbd5
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 62 deletions.
77 changes: 31 additions & 46 deletions doc/benchmarks/autocorrelation.ipynb

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions pastas/modelcompare.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,8 +496,8 @@ def plot_noise(self, axn: str = "res") -> None:
axs = self.axes

for i, ml in enumerate(self.models):
noise = ml.noise()
if noise is not None:
if ml.settings["noise"]:
noise = ml.noise()
axs[axn].plot(
noise.index,
noise.values,
Expand Down
34 changes: 25 additions & 9 deletions pastas/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -595,9 +595,10 @@ def validate_oseries(series: Series):
1. Make sure the values are floats
2. Make sure the index is a DatetimeIndex
3. Make sure the indices are datetime64
4. Make sure the index is monotonically increasing
5. Make sure there are no duplicate indices
6. Make sure the time series has no nan-values
4. Make sure the index has no NaT-values
5. Make sure the index is monotonically increasing
6. Make sure there are no duplicate indices
7. Make sure the time series has no nan-values
If any of these checks are not passed the method will throw an error that needs
to be fixed by the user.
Expand Down Expand Up @@ -631,6 +632,11 @@ def _validate_series(series: Series, equidistant: bool = True):
if isinstance(series, pd.DataFrame):
if len(series.columns) == 1:
series = series.iloc[:, 0]
elif len(series.columns) > 1:
# helpful specific message for multi-column DataFrames
msg = "DataFrame with multiple columns. Please select one."
logger.error(msg)
raise ValueError(msg)

# 0. Make sure it is a Series and not something else (e.g., DataFrame)
if not isinstance(series, pd.Series):
Expand Down Expand Up @@ -658,7 +664,16 @@ def _validate_series(series: Series, equidistant: bool = True):
logger.error(msg)
raise ValueError(msg)

# 4. Make sure the index is monotonically increasing
# 4. Make sure there are no NaT in index
if series.index.hasnans:
msg = (
f"The index of series {name} contains NaNs. "
"Try to remove these with `series.loc[series.index.dropna()]`."
)
logger.error(msg)
raise ValueError(msg)

# 5. Make sure the index is monotonically increasing
if not series.index.is_monotonic_increasing:
msg = (
f"The time-indices of series {name} are not monotonically increasing. Try "
Expand All @@ -667,25 +682,26 @@ def _validate_series(series: Series, equidistant: bool = True):
logger.error(msg)
raise ValueError(msg)

# 5. Make sure there are no duplicate indices
# 6. Make sure there are no duplicate indices
if not series.index.is_unique:
msg = (
f"duplicate time-indexes were found in the time series {name}. Make sure "
f"there are no duplicate indices. For example by "
f"`grouped = series.groupby(level=0); series = grouped.mean()`"
"there are no duplicate indices. For example by "
"`grouped = series.groupby(level=0); series = grouped.mean()`"
"or `series = series.loc[~series.index.duplicated(keep='first/last')]`"
)
logger.error(msg)
raise ValueError(msg)

# 6. Make sure the time series has no nan-values
# 7. Make sure the time series has no nan-values
if series.hasnans:
msg = (
"The time series %s has nan-values. Pastas will use the fill_nan "
"settings to fill up the nan-values."
)
logger.warning(msg, name)

# 7. Make sure the time series has equidistant time steps
# 8. Make sure the time series has equidistant time steps
if equidistant:
if not pd.infer_freq(series.index):
msg = (
Expand Down
4 changes: 2 additions & 2 deletions pastas/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ class ThresholdTransform:
Parameters
----------
value : float, optional
The starting value above which the simulation is lowered.
The initial starting value above which the simulation is lowered.
vmin : float, optional
The minimum value above which the simulation is lowered.
vmin : float, optional
vmax : float, optional
The maximum value above which the simulation is lowered.
name: str, optional
Name of the transform.
Expand Down
5 changes: 2 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
requires = ["setuptools>=64.0.0", "wheel"]
build-backend = "setuptools.build_meta"


[project]
name = "pastas"
dynamic = ["version"]
Expand Down Expand Up @@ -54,14 +53,15 @@ formatting = ["isort", "black[jupyter]"]
linting = ["flake8"]
pytesting = ["pytest>=7", "pytest-cov", "pytest-sugar"]
ci = [
"pastas[solvers,pytesting]",
"pastas[pytesting,solvers]",
"jupyter",
"coverage",
"corner",
"emcee",
"tqdm",
]
rtd = [
"pastas[solvers]",
"nbsphinx",
"Ipython",
"ipykernel",
Expand All @@ -75,7 +75,6 @@ rtd = [
dev = ["tox", "pastas[formatting,linting,ci,rtd]"]
numbascipy = ["numba-scipy >= 0.3.1"]


[tool.setuptools.dynamic]
version = { attr = "pastas.version.__version__" }

Expand Down

0 comments on commit b5ccbd5

Please sign in to comment.