From 7c4849413a7712c4bfba0bd4224e76987a93dd66 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Tue, 11 Jul 2023 01:07:55 -0400 Subject: [PATCH 1/5] add test function --- tests/test_stimp.py | 79 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/tests/test_stimp.py b/tests/test_stimp.py index 0793f2f3c..eddee2c84 100644 --- a/tests/test_stimp.py +++ b/tests/test_stimp.py @@ -1,3 +1,5 @@ +import functools + import naive import numpy as np import numpy.testing as npt @@ -223,3 +225,80 @@ def test_stimped(T, dask_cluster): naive.replace_inf(cmp_pan) npt.assert_almost_equal(ref_pan, cmp_pan) + + +def test_stimp_1_percent_with_isconstant(): + T = np.random.uniform(-1, 1, [64]) + isconstant_func = functools.partial( + naive.isconstant_func_stddev_threshold, stddev_threshold=0.5 + ) + + threshold = 0.2 + percentage = 0.01 + min_m = 3 + n = T.shape[0] - min_m + 1 + + seed = np.random.randint(100000) + + np.random.seed(seed) + pan = stimp( + T, + min_m=min_m, + max_m=None, + step=1, + percentage=percentage, + pre_scrump=True, + # normalize=True, + T_subseq_isconstant_func=isconstant_func, + ) + + for i in range(n): + pan.update() + + ref_PAN = np.full((pan.M_.shape[0], T.shape[0]), fill_value=np.inf) + + np.random.seed(seed) + for idx, m in enumerate(pan.M_[:n]): + zone = int(np.ceil(m / 4)) + s = zone + tmp_P, tmp_I = naive.prescrump( + T, + m, + T, + s=s, + exclusion_zone=zone, + T_A_subseq_isconstant=isconstant_func, + T_B_subseq_isconstant=isconstant_func, + ) + ref_P, ref_I, _, _ = naive.scrump( + T, + m, + T, + percentage, + zone, + True, + s, + T_A_subseq_isconstant=isconstant_func, + T_B_subseq_isconstant=isconstant_func, + ) + naive.merge_topk_PI(ref_P, tmp_P, ref_I, tmp_I) + ref_PAN[pan._bfs_indices[idx], : ref_P.shape[0]] = ref_P + + # Compare raw pan + cmp_PAN = pan._PAN + + naive.replace_inf(ref_PAN) + naive.replace_inf(cmp_PAN) + + npt.assert_almost_equal(ref_PAN, cmp_PAN) + + # Compare transformed pan + cmp_pan = pan.PAN_ + ref_pan = naive.transform_pan( + pan._PAN, pan._M, threshold, pan._bfs_indices, pan._n_processed + ) + + naive.replace_inf(ref_pan) + naive.replace_inf(cmp_pan) + + npt.assert_almost_equal(ref_pan, cmp_pan) From 132d1a4ee248207278d3ea5619b925f625bb71eb Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Tue, 11 Jul 2023 01:08:36 -0400 Subject: [PATCH 2/5] add param isconstant to stimp --- stumpy/stimp.py | 69 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/stumpy/stimp.py b/stumpy/stimp.py index 0d40a93ae..7c5dab2c5 100644 --- a/stumpy/stimp.py +++ b/stumpy/stimp.py @@ -87,6 +87,14 @@ class _stimp: mp_func : function, default stump The matrix profile function to use when `percentage = 1.0` + T_subseq_isconstant_func : function, default None + A custom, user-defined function that returns a boolean array that indicates + whether a subsequence in `T` is constant (True). The function must only take + two arguments, `a`, a 1-D array, and `w`, the window size, while additional + arguments may be specified by currying the user-defined function using + `functools.partial`. Any subsequence with at least one np.nan/np.inf will + automatically have its corresponding value set to False in this boolean array. + Attributes ---------- PAN_ : numpy.ndarray @@ -122,6 +130,7 @@ def __init__( client=None, device_id=None, mp_func=stump, + T_subseq_isconstant_func=None, ): """ Initialize the `stimp` object and compute the Pan Matrix Profile @@ -167,6 +176,15 @@ def __init__( mp_func : function, default stump The matrix profile function to use when `percentage = 1.0` + + T_subseq_isconstant_func : function, default None + A custom, user-defined function that returns a boolean array that indicates + whether a subsequence in `T` is constant (True). The function must only take + two arguments, `a`, a 1-D array, and `w`, the window size, while additional + arguments may be specified by currying the user-defined function using + `functools.partial`. Any subsequence with at least one np.nan/np.inf will + automatically have its corresponding value set to False in this boolean + array. """ self._T = T.copy() if max_m is None: @@ -189,6 +207,16 @@ def __init__( mp_func, client=client, device_id=device_id ) + if T_subseq_isconstant_func is None: + T_subseq_isconstant_func = core._rolling_isconstant + if not callable(T_subseq_isconstant_func): # pragma: no cover + msg = ( + "`T_subseq_isconstant_func` was expected to be a callable function " + + f"but {type(T_subseq_isconstant_func)} was found." + ) + raise ValueError(msg) + self._T_subseq_isconstant_func = T_subseq_isconstant_func + self._PAN = np.full( (self._M.shape[0], self._T.shape[0]), fill_value=np.inf, dtype=np.float64 ) @@ -223,6 +251,7 @@ def update(self): percentage=self._percentage, pre_scrump=self._pre_scrump, k=1, + T_A_subseq_isconstant=self._T_subseq_isconstant_func, ) approx.update() self._PAN[ @@ -233,6 +262,7 @@ def update(self): self._T, m, ignore_trivial=True, + T_A_subseq_isconstant=self._T_subseq_isconstant_func, ) self._PAN[ self._bfs_indices[self._n_processed], : out[:, 0].shape[0] @@ -393,6 +423,14 @@ class stimp(_stimp): The p-norm to apply for computing the Minkowski distance. This parameter is ignored when `normalize == True`. + T_subseq_isconstant_func : function, default None + A custom, user-defined function that returns a boolean array that indicates + whether a subsequence in `T` is constant (True). The function must only take + two arguments, `a`, a 1-D array, and `w`, the window size, while additional + arguments may be specified by currying the user-defined function using + `functools.partial`. Any subsequence with at least one np.nan/np.inf will + automatically have its corresponding value set to False in this boolean array. + Attributes ---------- PAN_ : numpy.ndarray @@ -442,6 +480,7 @@ def __init__( pre_scrump=True, normalize=True, p=2.0, + T_subseq_isconstant_func=None, ): """ Initialize the `stimp` object and compute the Pan Matrix Profile @@ -483,6 +522,15 @@ def __init__( p : float, default 2.0 The p-norm to apply for computing the Minkowski distance. This parameter is ignored when `normalize == True`. + + T_subseq_isconstant_func : function, default None + A custom, user-defined function that returns a boolean array that indicates + whether a subsequence in `T` is constant (True). The function must only take + two arguments, `a`, a 1-D array, and `w`, the window size, while additional + arguments may be specified by currying the user-defined function using + `functools.partial`. Any subsequence with at least one np.nan/np.inf will + automatically have its corresponding value set to False in this boolean + array. """ super().__init__( T, @@ -492,6 +540,7 @@ def __init__( percentage=percentage, pre_scrump=pre_scrump, mp_func=stump, + T_subseq_isconstant_func=T_subseq_isconstant_func, ) @@ -537,6 +586,15 @@ class stimped(_stimp): The p-norm to apply for computing the Minkowski distance. This parameter is ignored when `normalize == True`. + T_subseq_isconstant_func : function, default None + A custom, user-defined function that returns a boolean array that indicates + whether a subsequence in `T` is constant (True). The function must only take + two arguments, `a`, a 1-D array, and `w`, the window size, while additional + arguments may be specified by currying the user-defined function using + `functools.partial`. Any subsequence with at least one np.nan/np.inf will + automatically have its corresponding value set to False in this boolean + array. + Attributes ---------- PAN_ : numpy.ndarray @@ -590,6 +648,7 @@ def __init__( step=1, normalize=True, p=2.0, + T_subseq_isconstant_func=None, ): """ Initialize the `stimp` object and compute the Pan Matrix Profile @@ -625,6 +684,15 @@ def __init__( p : float, default 2.0 The p-norm to apply for computing the Minkowski distance. This parameter is ignored when `normalize == True`. + + T_subseq_isconstant_func : function, default None + A custom, user-defined function that returns a boolean array that indicates + whether a subsequence in `T` is constant (True). The function must only take + two arguments, `a`, a 1-D array, and `w`, the window size, while additional + arguments may be specified by currying the user-defined function using + `functools.partial`. Any subsequence with at least one np.nan/np.inf will + automatically have its corresponding value set to False in this boolean + array. """ super().__init__( T, @@ -635,4 +703,5 @@ def __init__( pre_scrump=False, client=client, mp_func=stumped, + T_subseq_isconstant_func=T_subseq_isconstant_func, ) From c4852388d499c166f2a573300774bffd404dbb6d Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Tue, 11 Jul 2023 01:20:54 -0400 Subject: [PATCH 3/5] add test function for stimped --- tests/test_stimp.py | 58 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/tests/test_stimp.py b/tests/test_stimp.py index eddee2c84..258d03737 100644 --- a/tests/test_stimp.py +++ b/tests/test_stimp.py @@ -302,3 +302,61 @@ def test_stimp_1_percent_with_isconstant(): naive.replace_inf(cmp_pan) npt.assert_almost_equal(ref_pan, cmp_pan) + + +@pytest.mark.filterwarnings("ignore:\\s+Port 8787 is already in use:UserWarning") +def test_stimped_with_isconstant(dask_cluster): + T = np.random.uniform(-1, 1, [64]) + isconstant_func = functools.partial( + naive.isconstant_func_stddev_threshold, stddev_threshold=0.5 + ) + + with Client(dask_cluster) as dask_client: + threshold = 0.2 + min_m = 3 + n = T.shape[0] - min_m + 1 + + pan = stimped( + dask_client, + T, + min_m=min_m, + max_m=None, + step=1, + # normalize=True, + T_subseq_isconstant_func=isconstant_func, + ) + + for i in range(n): + pan.update() + + ref_PAN = np.full((pan.M_.shape[0], T.shape[0]), fill_value=np.inf) + + for idx, m in enumerate(pan.M_[:n]): + zone = int(np.ceil(m / 4)) + ref_mp = naive.stump( + T, + m, + T_B=None, + exclusion_zone=zone, + T_A_subseq_isconstant=isconstant_func, + ) + ref_PAN[pan._bfs_indices[idx], : ref_mp.shape[0]] = ref_mp[:, 0] + + # Compare raw pan + cmp_PAN = pan._PAN + + naive.replace_inf(ref_PAN) + naive.replace_inf(cmp_PAN) + + npt.assert_almost_equal(ref_PAN, cmp_PAN) + + # Compare transformed pan + cmp_pan = pan.PAN_ + ref_pan = naive.transform_pan( + pan._PAN, pan._M, threshold, pan._bfs_indices, pan._n_processed + ) + + naive.replace_inf(ref_pan) + naive.replace_inf(cmp_pan) + + npt.assert_almost_equal(ref_pan, cmp_pan) From 59d185e5f2d919ae74ffb04bb7d1cb180c7f8675 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Tue, 11 Jul 2023 01:25:29 -0400 Subject: [PATCH 4/5] minor fixes --- stumpy/stimp.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/stumpy/stimp.py b/stumpy/stimp.py index 7c5dab2c5..820b971a4 100644 --- a/stumpy/stimp.py +++ b/stumpy/stimp.py @@ -50,16 +50,16 @@ class _stimp: T : numpy.ndarray The time series or sequence for which to compute the pan matrix profile - m_start : int, default 3 + min_m : int, default 3 The starting (or minimum) subsequence window size for which a matrix profile may be computed - m_stop : int, default None + max_m : int, default None The stopping (or maximum) subsequence window size for which a matrix profile - may be computed. When `m_stop = Non`, this is set to the maximum allowable + may be computed. When `max_m = Non`, this is set to the maximum allowable subsequence window size - m_step : int, default 1 + step : int, default 1 The step between subsequence window sizes percentage : float, default 0.01 @@ -391,16 +391,16 @@ class stimp(_stimp): T : numpy.ndarray The time series or sequence for which to compute the pan matrix profile - m_start : int, default 3 + min_m : int, default 3 The starting (or minimum) subsequence window size for which a matrix profile may be computed - m_stop : int, default None + max_m : int, default None The stopping (or maximum) subsequence window size for which a matrix profile - may be computed. When `m_stop = Non`, this is set to the maximum allowable + may be computed. When `max_m = Non`, this is set to the maximum allowable subsequence window size - m_step : int, default 1 + step : int, default 1 The step between subsequence window sizes percentage : float, default 0.01 @@ -565,16 +565,16 @@ class stimped(_stimp): T : numpy.ndarray The time series or sequence for which to compute the pan matrix profile - m_start : int, default 3 + min_m : int, default 3 The starting (or minimum) subsequence window size for which a matrix profile may be computed - m_stop : int, default None + max_m : int, default None The stopping (or maximum) subsequence window size for which a matrix profile - may be computed. When `m_stop = Non`, this is set to the maximum allowable + may be computed. When `max_m = Non`, this is set to the maximum allowable subsequence window size - m_step : int, default 1 + step : int, default 1 The step between subsequence window sizes normalize : bool, default True From eac1b9065ed97ff2a367385052d9bab3a76374d1 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Tue, 11 Jul 2023 06:31:39 -0400 Subject: [PATCH 5/5] fix non-normalized --- stumpy/stimp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stumpy/stimp.py b/stumpy/stimp.py index 820b971a4..75a5d43ed 100644 --- a/stumpy/stimp.py +++ b/stumpy/stimp.py @@ -377,7 +377,7 @@ def M_(self): @core.non_normalized( aamp_stimp, - exclude=["pre_scrump", "normalize", "p", "pre_scraamp"], + exclude=["pre_scrump", "normalize", "p", "T_subseq_isconstant_func", "pre_scraamp"], replace={"pre_scrump": "pre_scraamp"}, ) class stimp(_stimp): @@ -546,7 +546,7 @@ def __init__( @core.non_normalized( aamp_stimped, - exclude=["pre_scrump", "normalize", "p", "pre_scraamp"], + exclude=["pre_scrump", "normalize", "p", "T_subseq_isconstant_func", "pre_scraamp"], replace={"pre_scrump": "pre_scraamp"}, ) class stimped(_stimp):