Skip to content

Commit

Permalink
Make SGI applicable to DataFrame and add Dropna (#760)
Browse files Browse the repository at this point in the history
  • Loading branch information
raoulcollenteur committed May 31, 2024
2 parents 20d03d0 + 31febec commit ff1199b
Showing 1 changed file with 26 additions and 13 deletions.
39 changes: 26 additions & 13 deletions pastas/stats/sgi.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""This module contains methods to compute the Standardized Groundwater Index."""

from numpy import linspace
from pandas import Series
from pandas import DataFrame, Series
from scipy.stats import norm


Expand All @@ -11,22 +11,35 @@ def sgi(series: Series) -> Series:
Parameters
----------
series: pandas.Series
series: pandas.Series or Pandas.DataFrame
Returns
-------
sgi_series: pandas.Series
sgi_series: pandas.Series or Pandas.DataFrame
Pandas time series of the groundwater levels. Time series index should be a
pandas DatetimeIndex.
Notes
-----
The Standardized Groundwater Index (SGI) is a non-parametric method to
standardize groundwater levels. The SGI is calculated for each month
separately. The SGI is a dimensionless index and is used to compare
groundwater levels across different wells. It is generally recommended to resample
the data to a monthly time series before computing the SGI.
"""
series = series.copy() # Create a copy to ensure series is untouched.

# Loop over the months
for month in range(1, 13):
data = series[series.index.month == month]
n = data.size # Number of observations
pmin = 1 / (2 * n)
pmax = 1 - pmin
sgi_values = norm.ppf(linspace(pmin, pmax, n))
series.loc[data.sort_values().index] = sgi_values
if isinstance(series, DataFrame):
series = series.apply(sgi)
elif isinstance(series, Series):
series = series.dropna().copy() # Create a copy to ensure series is untouched.

# Loop over the months
for month in range(1, 13):
data = series[series.index.month == month]
n = data.size # Number of observations
pmin = 1 / (2 * n)
pmax = 1 - pmin
sgi_values = norm.ppf(linspace(pmin, pmax, n))
series.loc[data.sort_values().index] = sgi_values

return series

0 comments on commit ff1199b

Please sign in to comment.