From efb6f73a06f2cb80271e0d5c7b892a469b24117a Mon Sep 17 00:00:00 2001 From: Sean Law Date: Mon, 7 Mar 2022 21:38:21 -0500 Subject: [PATCH 1/2] Made excl_zone optional in _multi_distance_profile funcs --- stumpy/maamp.py | 15 ++++++++------- stumpy/mstump.py | 25 ++++++++++++++++++------- tests/naive.py | 3 --- 3 files changed, 26 insertions(+), 17 deletions(-) diff --git a/stumpy/maamp.py b/stumpy/maamp.py index a00980c3a..c2fc91ae1 100644 --- a/stumpy/maamp.py +++ b/stumpy/maamp.py @@ -303,11 +303,11 @@ def _maamp_multi_distance_profile( T_A, T_B, m, - excl_zone, T_B_subseq_isfinite, p=2.0, include=None, discords=False, + excl_zone=None, ): """ Multi-dimensional wrapper to compute the multi-dimensional non-normalized (i.e., @@ -330,9 +330,6 @@ def _maamp_multi_distance_profile( m : int Window size - excl_zone : int - The half width for the exclusion zone relative to the `query_idx`. - T_B_subseq_isfinite : numpy.ndarray A boolean array that indicates whether a subsequence in `T_B` contains a `np.nan`/`np.inf` value (False) @@ -352,6 +349,9 @@ def _maamp_multi_distance_profile( When set to `True`, this reverses the distance profile to favor discords rather than motifs. Note that indices in `include` are still maintained and respected. + excl_zone : int + The half width for the exclusion zone relative to the `query_idx`. + Returns ------- D : numpy.ndarray @@ -384,7 +384,8 @@ def _maamp_multi_distance_profile( D_prime[:] = D_prime + D[i] D[i, :] = D_prime / (i + 1) - core.apply_exclusion_zone(D, query_idx, excl_zone, np.inf) + if excl_zone is not None: + core.apply_exclusion_zone(D, query_idx, excl_zone, np.inf) return D @@ -444,7 +445,7 @@ def maamp_multi_distance_profile(query_idx, T, m, include=None, discords=False, ) # See Definition 3 and Figure 3 D = _maamp_multi_distance_profile( - query_idx, T, T, m, excl_zone, T_subseq_isfinite, p, include, discords + query_idx, T, T, m, T_subseq_isfinite, p, include, discords, excl_zone ) return D @@ -520,7 +521,7 @@ def _get_first_maamp_profile( equal to `start` """ D = _maamp_multi_distance_profile( - start, T_A, T_B, m, excl_zone, T_B_subseq_isfinite, p, include, discords + start, T_A, T_B, m, T_B_subseq_isfinite, p, include, discords, excl_zone ) d = T_A.shape[0] diff --git a/stumpy/mstump.py b/stumpy/mstump.py index 3e91bc07b..6a11e9944 100644 --- a/stumpy/mstump.py +++ b/stumpy/mstump.py @@ -543,7 +543,17 @@ def mdl( def _multi_distance_profile( - query_idx, T_A, T_B, m, excl_zone, M_T, Σ_T, μ_Q, σ_Q, include=None, discords=False + query_idx, + T_A, + T_B, + m, + M_T, + Σ_T, + μ_Q, + σ_Q, + include=None, + discords=False, + excl_zone=None, ): """ Multi-dimensional wrapper to compute the multi-dimensional distance profile for a @@ -565,9 +575,6 @@ def _multi_distance_profile( m : int Window size - excl_zone : int - The half width for the exclusion zone relative to the `query_idx`. - M_T : numpy.ndarray Sliding mean for `T_A` @@ -592,6 +599,9 @@ def _multi_distance_profile( When set to `True`, this reverses the distance profile to favor discords rather than motifs. Note that indices in `include` are still maintained and respected. + excl_zone : int, default None + The half width for the exclusion zone relative to the `query_idx`. + Returns ------- D : numpy.ndarray @@ -625,7 +635,8 @@ def _multi_distance_profile( D_prime[:] = D_prime + D[i] D[i, :] = D_prime / (i + 1) - core.apply_exclusion_zone(D, query_idx, excl_zone, np.inf) + if excl_zone is not None: + core.apply_exclusion_zone(D, query_idx, excl_zone, np.inf) return D @@ -694,7 +705,7 @@ def multi_distance_profile( ) # See Definition 3 and Figure 3 D = _multi_distance_profile( - query_idx, T, T, m, excl_zone, M_T, Σ_T, M_T, Σ_T, include, discords + query_idx, T, T, m, M_T, Σ_T, M_T, Σ_T, include, discords, excl_zone ) return D @@ -766,7 +777,7 @@ def _get_first_mstump_profile( equal to `start` """ D = _multi_distance_profile( - start, T_A, T_B, m, excl_zone, M_T, Σ_T, μ_Q, σ_Q, include, discords + start, T_A, T_B, m, M_T, Σ_T, μ_Q, σ_Q, include, discords, excl_zone ) d = T_A.shape[0] diff --git a/tests/naive.py b/tests/naive.py index 9f17f924b..a85ee2b18 100644 --- a/tests/naive.py +++ b/tests/naive.py @@ -3,7 +3,6 @@ import numpy as np from scipy.spatial.distance import cdist from scipy.stats import norm -from numba import njit from stumpy import core, config @@ -1555,12 +1554,10 @@ def _get_mask_slices(mask): return np.array(idx).reshape(len(idx) // 2, 2) -@njit(fastmath=True) def _total_trapezoid_ndists(a, b, h): return (a + b) * h // 2 -@njit(fastmath=True) def _total_diagonal_ndists(tile_lower_diag, tile_upper_diag, tile_height, tile_width): total_ndists = 0 From 60b5047f59638ed6546839bef4ce7a829442b6fb Mon Sep 17 00:00:00 2001 From: Sean Law Date: Wed, 16 Mar 2022 10:35:56 -0400 Subject: [PATCH 2/2] Fixed stimp, stimped, gpu_stimp normalized reroute --- stumpy/aamp_stimp.py | 2 +- stumpy/core.py | 15 +++-- stumpy/gpu_stimp.py | 6 +- stumpy/stimp.py | 14 ++++- test.sh | 4 +- tests/test_non_normalized_decorator.py | 85 ++++++++++++++++++++++++-- 6 files changed, 107 insertions(+), 19 deletions(-) diff --git a/stumpy/aamp_stimp.py b/stumpy/aamp_stimp.py index cdb6527ef..e7d7be669 100644 --- a/stumpy/aamp_stimp.py +++ b/stumpy/aamp_stimp.py @@ -182,7 +182,7 @@ def __init__( p : float, default 2.0 The p-norm to apply for computing the Minkowski distance. """ - self._T = T + self._T = T.copy() self._T_min = np.min(self._T[np.isfinite(self._T)]) self._T_max = np.max(self._T[np.isfinite(self._T)]) self._p = p diff --git a/stumpy/core.py b/stumpy/core.py index a2dccdca9..1096f3558 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -81,17 +81,17 @@ def non_normalized(non_norm, exclude=None, replace=None): parameters when necessary. ``` - def non_norm_func(Q, T, A): + def non_norm_func(Q, T, A_non_norm): ... return @non_normalized( non_norm_func, - exclude=["normalize", "A", "B"], - replace={"A": None}, + exclude=["normalize", "p", "A", "B"], + replace={"A_norm": "A_non_norm", "other_norm": None}, ) - def norm_func(Q, T, B=None, normalize=True): + def norm_func(Q, T, A_norm=None, other_norm=None, normalize=True, p=2.0): ... return ``` @@ -104,13 +104,16 @@ def norm_func(Q, T, B=None, normalize=True): exclude : list, default None A list of function (or class) parameter names to exclude when comparing the - function (or class) signatures + function (or class) signatures. When `exlcude is None`, this parameter is + automatically set to `exclude = ["normalize", "p"]` by default. replace : dict, default None A dictionary of function (or class) parameter key-value pairs. Each key that is found as a parameter name in the `norm` function (or class) will be replaced by its corresponding or complementary parameter name in the `non_norm` function - (or class). + (or class) (e.g., {"norm_param": "non_norm_param"}). To remove any parameter in + the `norm` function (or class) that does not exist in the `non_norm` function, + simply set the value to `None` (i.e., {"norm_param": None}). Returns ------- diff --git a/stumpy/gpu_stimp.py b/stumpy/gpu_stimp.py index ec0003315..c36dbd45e 100644 --- a/stumpy/gpu_stimp.py +++ b/stumpy/gpu_stimp.py @@ -7,7 +7,11 @@ from .stimp import _stimp -@core.non_normalized(gpu_aamp_stimp) +@core.non_normalized( + gpu_aamp_stimp, + exclude=["pre_scrump", "normalize", "p", "pre_scraamp"], + replace={"pre_scrump": "pre_scraamp"}, +) class gpu_stimp(_stimp): """ Compute the Pan Matrix Profile with with one or more GPU devices diff --git a/stumpy/stimp.py b/stumpy/stimp.py index b46c2b999..1c285f116 100644 --- a/stumpy/stimp.py +++ b/stumpy/stimp.py @@ -167,7 +167,7 @@ def __init__( mp_func : object, default stump The matrix profile function to use when `percentage = 1.0` """ - self._T = T + self._T = T.copy() if max_m is None: max_m = max(min_m + 1, core.get_max_window_size(self._T.shape[0])) M = np.arange(min_m, max_m + 1, step).astype(np.int64) @@ -320,7 +320,11 @@ def M_(self): # return self._n_processed -@core.non_normalized(aamp_stimp) +@core.non_normalized( + aamp_stimp, + exclude=["pre_scrump", "normalize", "p", "pre_scraamp"], + replace={"pre_scrump": "pre_scraamp"}, +) class stimp(_stimp): """ Compute the Pan Matrix Profile @@ -464,7 +468,11 @@ def __init__( ) -@core.non_normalized(aamp_stimped) +@core.non_normalized( + aamp_stimped, + exclude=["pre_scrump", "normalize", "p", "pre_scraamp"], + replace={"pre_scrump": "pre_scraamp"}, +) class stimped(_stimp): """ Compute the Pan Matrix Profile with a distributed dask cluster diff --git a/test.sh b/test.sh index d1a0f4906..ecc4f6206 100755 --- a/test.sh +++ b/test.sh @@ -79,7 +79,7 @@ test_custom() # Test one or more user-defined functions repeatedly for VARIABLE in {1..10} do - pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_.py + pytest -x -W ignore::DeprecationWarning tests/test_.py check_errs $? done clean_up @@ -138,7 +138,7 @@ test_unit() pytest -rsx -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_gpu_aamp_stimp.py pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_aamp_stimp.py check_errs $? - pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_non_normalized_decorator.py + pytest -x -W ignore::DeprecationWarning tests/test_non_normalized_decorator.py check_errs $? } diff --git a/tests/test_non_normalized_decorator.py b/tests/test_non_normalized_decorator.py index fffd3f975..e2471dcc3 100644 --- a/tests/test_non_normalized_decorator.py +++ b/tests/test_non_normalized_decorator.py @@ -6,6 +6,8 @@ from dask.distributed import Client, LocalCluster from numba import cuda +import naive + try: from numba.errors import NumbaPerformanceWarning except ModuleNotFoundError: @@ -352,18 +354,65 @@ def test_snippets(): @pytest.mark.parametrize("T, m", test_data) def test_stimp(T, m): + if T.ndim > 1: + T = T.copy() + T = T[0] + n = 3 + seed = np.random.randint(100000) + + np.random.seed(seed) ref = stumpy.aamp_stimp(T, m) - comp = stumpy.stimp(T, m, normalize=False) - npt.assert_almost_equal(ref.PAN_, comp.PAN_) + for i in range(n): + ref.update() + + np.random.seed(seed) + cmp = stumpy.stimp(T, m, normalize=False) + for i in range(n): + cmp.update() + + # Compare raw pan + ref_PAN = ref._PAN + cmp_PAN = cmp._PAN + + naive.replace_inf(ref_PAN) + naive.replace_inf(cmp_PAN) + + npt.assert_almost_equal(ref_PAN, cmp_PAN) + + # Compare transformed pan + npt.assert_almost_equal(ref.PAN_, cmp.PAN_) @pytest.mark.filterwarnings("ignore:\\s+Port 8787 is already in use:UserWarning") @pytest.mark.parametrize("T, m", test_data) def test_stimped(T, m, dask_cluster): + if T.ndim > 1: + T = T.copy() + T = T[0] + n = 3 + seed = np.random.randint(100000) with Client(dask_cluster) as dask_client: + np.random.seed(seed) ref = stumpy.aamp_stimped(dask_client, T, m) - comp = stumpy.stimped(dask_client, T, m, normalize=False) - npt.assert_almost_equal(ref.PAN_, comp.PAN_) + for i in range(n): + ref.update() + + np.random.seed(seed) + cmp = stumpy.stimped(dask_client, T, m, normalize=False) + for i in range(n): + cmp.update() + + # Compare raw pan + ref_PAN = ref._PAN + cmp_PAN = cmp._PAN + + naive.replace_inf(ref_PAN) + naive.replace_inf(cmp_PAN) + + npt.assert_almost_equal(ref_PAN, cmp_PAN) + + # Compare transformed pan + npt.assert_almost_equal(ref.PAN_, cmp.PAN_) @pytest.mark.filterwarnings("ignore", category=NumbaPerformanceWarning) @@ -372,6 +421,30 @@ def test_gpu_stimp(T, m): if not cuda.is_available(): # pragma: no cover pytest.skip("Skipping Tests No GPUs Available") + if T.ndim > 1: + T = T.copy() + T = T[0] + n = 3 + seed = np.random.randint(100000) + + np.random.seed(seed) ref = stumpy.gpu_aamp_stimp(T, m) - comp = stumpy.gpu_stimp(T, m, normalize=False) - npt.assert_almost_equal(ref.PAN_, comp.PAN_) + for i in range(n): + ref.update() + + np.random.seed(seed) + cmp = stumpy.gpu_stimp(T, m, normalize=False) + for i in range(n): + cmp.update() + + # Compare raw pan + ref_PAN = ref._PAN + cmp_PAN = cmp._PAN + + naive.replace_inf(ref_PAN) + naive.replace_inf(cmp_PAN) + + npt.assert_almost_equal(ref_PAN, cmp_PAN) + + # Compare transformed pan + npt.assert_almost_equal(ref.PAN_, cmp.PAN_)