Skip to content

Commit

Permalink
[ENH] Change GGS to inherit from BaseSeriesAnnotator (#5315)
Browse files Browse the repository at this point in the history
Updates `GreedyGaussianSegmentation` to inherit from
`BaseSerieAnnotator` following the `HMM` class as a guide.


#### What should a reviewer concentrate their feedback on?

<!-- This section is particularly useful if you have a pull request that
is still in development. You can guide the reviews to focus on the parts
that are ready for their comments. We suggest using bullets (indicated
by * or -) and filled checkboxes [x] here -->

* Currently type check and conversion is done in the `_predict` method.
It would be good to refactor this out into the `BaseSeriesAnnotator`
class but I think that is a job for another PR.
* This PR makes no attempt to convert the `GGS` class to inherit from
`BaseSeriesAnnotator` class. It would be good for this to be done but
again I think this is a job for another PR.

I had to add the type conversions in the `_predict` method to get
the `annotation` tests to pass locally.
  • Loading branch information
Alex-JG3 committed Nov 17, 2023
1 parent e547c5e commit b031687
Showing 1 changed file with 60 additions and 66 deletions.
126 changes: 60 additions & 66 deletions sktime/annotation/ggs.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,15 @@

import logging
import math
from dataclasses import asdict, dataclass, field
from typing import Dict, List, Tuple
from dataclasses import dataclass, field
from typing import List, Tuple

import numpy as np
import numpy.typing as npt
import pandas as pd
from sklearn.utils.validation import check_random_state

from sktime.base import BaseEstimator
from sktime.annotation.base._base import BaseSeriesAnnotator
from sktime.utils.validation._dependencies import _check_estimator_deps

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -367,7 +368,7 @@ def find_change_points(self, data: npt.ArrayLike) -> List[int]:
return change_points


class GreedyGaussianSegmentation(BaseEstimator):
class GreedyGaussianSegmentation(BaseSeriesAnnotator):
"""Greedy Gaussian Segmentation Estimator.
The method approximates solutions for the problem of breaking a
Expand Down Expand Up @@ -408,10 +409,6 @@ class GreedyGaussianSegmentation(BaseEstimator):
change_points_: array_like, default=[]
Locations of change points as integer indexes. By convention change points
include the identity segmentation, i.e. first and last index + 1 values.
_intermediate_change_points: List[List[int]], default=[]
Intermediate values of change points for each value of k = 1...k_max
_intermediate_ll: List[float], default=[]
Intermediate values for log-likelihood for each value of k = 1...k_max
Notes
-----
Expand All @@ -428,6 +425,8 @@ class GreedyGaussianSegmentation(BaseEstimator):
https://doi.org/10.1007/s11634-018-0335-0
"""

_tags = {"fit_is_empty": True}

def __init__(
self,
k_max: int = 10,
Expand All @@ -444,7 +443,7 @@ def __init__(
self.random_state = random_state

_check_estimator_deps(self)
super().__init__()
super().__init__(fmt="dense", labels="int_label")

self._adaptee = GGS(
k_max=k_max,
Expand All @@ -454,33 +453,47 @@ def __init__(
random_state=random_state,
)

def fit(self, X: npt.ArrayLike, y: npt.ArrayLike = None):
"""Fit method for compatibility with sklearn-type estimator interface.
@property
def _intermediate_change_points(self) -> List[List[int]]:
"""Intermediate values of change points for each value of k = 1...k_max.
Default value is an empty list.
"""
return self._adaptee._intermediate_change_points

It sets the internal state of the estimator and returns the initialized
instance.
@property
def _intermediate_ll(self) -> List[float]:
"""Intermediate values for log-likelihood for each value of k = 1...k_max.
Default value is an empty list.
"""
return self._adaptee._intermediate_ll

def _fit(self, X, Y=None):
"""Fit method for compatibility with sklearn-type estimator interface.
Parameters
----------
X: array_like
2D `array_like` representing time series with sequence index along
the first dimension and value series as columns.
X: array_like (1D or 2D), pd.Series, or pd.DataFrame
1D array of timeseries values, or 2D array with index along the first
dimension and columns representing features of the timeseries. If pd.Series,
the values of the timeseries are the values of the series. If pd.DataFrame,
each column represents a feature of the timeseries.
y: array_like
Placeholder for compatibility with sklearn-api, not used, default=None.
"""
self._adaptee.initialize_intermediates()
return self

def predict(self, X: npt.ArrayLike, y: npt.ArrayLike = None) -> npt.ArrayLike:
def _predict(self, X) -> npt.ArrayLike:
"""Perform segmentation.
Parameters
----------
X: array_like
2D `array_like` representing time series with sequence index along
the first dimension and value series as columns.
y: array_like
Placeholder for compatibility with sklearn-api, not used, default=None.
X: array_like (1D or 2D), pd.Series, or pd.DataFrame
1D array of timeseries values, or 2D array with index along the first
dimension and columns representing features of the timeseries. If pd.Series,
the values of the timeseries are the values of the series. If pd.DataFrame,
each column represents a feature of the timeseries.
Returns
-------
Expand All @@ -489,6 +502,15 @@ def predict(self, X: npt.ArrayLike, y: npt.ArrayLike = None) -> npt.ArrayLike:
dimension of X. The numerical values represent distinct segments
labels for each of the data points.
"""
if isinstance(X, pd.Series):
X = X.values[:, np.newaxis]
elif isinstance(X, pd.DataFrame):
X = X.values
elif len(X.shape) == 1:
X = X[:, np.newaxis]
elif len(X.shape) > 2:
raise ValueError("X must not have more than two dimensions.")
self._adaptee.initialize_intermediates()
self.change_points_ = self._adaptee.find_change_points(X)

labels = np.zeros(X.shape[0], dtype=np.int32)
Expand All @@ -498,16 +520,16 @@ def predict(self, X: npt.ArrayLike, y: npt.ArrayLike = None) -> npt.ArrayLike:
labels[start:stop] = i
return labels

def fit_predict(self, X: npt.ArrayLike, y: npt.ArrayLike = None) -> npt.ArrayLike:
def fit_predict(self, X) -> npt.ArrayLike:
"""Perform segmentation.
Parameters
----------
X: array_like
2D `array_like` representing time series with sequence index along
the first dimension and value series as columns.
y: array_like
Placeholder for compatibility with sklearn-api, not used, default=None.
X: array_like (1D or 2D), pd.Series, or pd.DataFrame
1D array of timeseries values, or 2D array with index along the first
dimension and columns representing features of the timeseries. If pd.Series,
the values of the timeseries are the values of the series. If pd.DataFrame,
each column represents a feature of the timeseries.
Returns
-------
Expand All @@ -516,49 +538,21 @@ def fit_predict(self, X: npt.ArrayLike, y: npt.ArrayLike = None) -> npt.ArrayLik
dimension of X. The numerical values represent distinct segments
labels for each of the data points.
"""
return self.fit(X, y).predict(X, y)
return self.fit(X, None).predict(X)

def get_params(self, deep: bool = True) -> Dict:
"""Return initialization parameters.
@classmethod
def get_test_params(cls, parameter_set="default"):
"""Return testing parameter settings for the estimator.
Parameters
----------
deep: bool
Dummy argument for compatibility with sklearn-api, not used.
parameter_set : str, default="default"
Name of the set of test parameters to return, for use in tests. If no
special parameters are defined for a value, will return `"default"` set.
Returns
-------
params: dict
Dictionary with the estimator's initialization parameters, with
keys being argument names and values being argument values.
params : dict or list of dict
"""
attrs_to_ignore = [
"change_points_",
"_intermediate_change_points",
"_intermediate_ll",
]
params = asdict(self._adaptee)
params = {
key: value for key, value in params.items() if key not in attrs_to_ignore
}
params = {"k_max": 10, "lamb": 1.0}
return params

def set_params(self, **parameters):
"""Set the parameters of this object.
Parameters
----------
parameters : dict
Initialization parameters for th estimator.
Returns
-------
self : reference to self (after parameters have been set)
"""
for key, value in parameters.items():
setattr(self._adaptee, key, value)
return self

def __repr__(self) -> str:
"""Return a string representation of the estimator."""
return self._adaptee.__repr__()

0 comments on commit b031687

Please sign in to comment.