Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix #577 improve validate_series #578

Merged
merged 16 commits into from
May 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 31 additions & 46 deletions doc/benchmarks/autocorrelation.ipynb

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions pastas/modelcompare.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,8 +496,8 @@ def plot_noise(self, axn: str = "res") -> None:
axs = self.axes

for i, ml in enumerate(self.models):
noise = ml.noise()
if noise is not None:
if ml.settings["noise"]:
noise = ml.noise()
axs[axn].plot(
noise.index,
noise.values,
Expand Down
34 changes: 25 additions & 9 deletions pastas/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -595,9 +595,10 @@ def validate_oseries(series: Series):
1. Make sure the values are floats
2. Make sure the index is a DatetimeIndex
3. Make sure the indices are datetime64
4. Make sure the index is monotonically increasing
5. Make sure there are no duplicate indices
6. Make sure the time series has no nan-values
4. Make sure the index has no NaT-values
5. Make sure the index is monotonically increasing
6. Make sure there are no duplicate indices
7. Make sure the time series has no nan-values

If any of these checks are not passed the method will throw an error that needs
to be fixed by the user.
Expand Down Expand Up @@ -631,6 +632,11 @@ def _validate_series(series: Series, equidistant: bool = True):
if isinstance(series, pd.DataFrame):
if len(series.columns) == 1:
series = series.iloc[:, 0]
elif len(series.columns) > 1:
# helpful specific message for multi-column DataFrames
msg = "DataFrame with multiple columns. Please select one."
logger.error(msg)
raise ValueError(msg)

# 0. Make sure it is a Series and not something else (e.g., DataFrame)
if not isinstance(series, pd.Series):
Expand Down Expand Up @@ -658,7 +664,16 @@ def _validate_series(series: Series, equidistant: bool = True):
logger.error(msg)
raise ValueError(msg)

# 4. Make sure the index is monotonically increasing
# 4. Make sure there are no NaT in index
if series.index.hasnans:
msg = (
f"The index of series {name} contains NaNs. "
"Try to remove these with `series.loc[series.index.dropna()]`."
)
logger.error(msg)
raise ValueError(msg)

# 5. Make sure the index is monotonically increasing
if not series.index.is_monotonic_increasing:
msg = (
f"The time-indices of series {name} are not monotonically increasing. Try "
Expand All @@ -667,25 +682,26 @@ def _validate_series(series: Series, equidistant: bool = True):
logger.error(msg)
raise ValueError(msg)

# 5. Make sure there are no duplicate indices
# 6. Make sure there are no duplicate indices
if not series.index.is_unique:
msg = (
f"duplicate time-indexes were found in the time series {name}. Make sure "
f"there are no duplicate indices. For example by "
f"`grouped = series.groupby(level=0); series = grouped.mean()`"
"there are no duplicate indices. For example by "
"`grouped = series.groupby(level=0); series = grouped.mean()`"
"or `series = series.loc[~series.index.duplicated(keep='first/last')]`"
)
logger.error(msg)
raise ValueError(msg)

# 6. Make sure the time series has no nan-values
# 7. Make sure the time series has no nan-values
if series.hasnans:
msg = (
"The time series %s has nan-values. Pastas will use the fill_nan "
"settings to fill up the nan-values."
)
logger.warning(msg, name)

# 7. Make sure the time series has equidistant time steps
# 8. Make sure the time series has equidistant time steps
if equidistant:
if not pd.infer_freq(series.index):
msg = (
Expand Down
4 changes: 2 additions & 2 deletions pastas/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ class ThresholdTransform:
Parameters
----------
value : float, optional
The starting value above which the simulation is lowered.
The initial starting value above which the simulation is lowered.
vmin : float, optional
The minimum value above which the simulation is lowered.
vmin : float, optional
vmax : float, optional
The maximum value above which the simulation is lowered.
name: str, optional
Name of the transform.
Expand Down
5 changes: 2 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
requires = ["setuptools>=64.0.0", "wheel"]
build-backend = "setuptools.build_meta"


[project]
name = "pastas"
dynamic = ["version"]
Expand Down Expand Up @@ -54,14 +53,15 @@ formatting = ["isort", "black[jupyter]"]
linting = ["flake8"]
pytesting = ["pytest>=7", "pytest-cov", "pytest-sugar"]
ci = [
"pastas[solvers,pytesting]",
"pastas[pytesting,solvers]",
"jupyter",
"coverage",
"corner",
"emcee",
"tqdm",
]
rtd = [
"pastas[solvers]",
"nbsphinx",
"Ipython",
"ipykernel",
Expand All @@ -75,7 +75,6 @@ rtd = [
dev = ["tox", "pastas[formatting,linting,ci,rtd]"]
numbascipy = ["numba-scipy >= 0.3.1"]


[tool.setuptools.dynamic]
version = { attr = "pastas.version.__version__" }

Expand Down