From efb6f73a06f2cb80271e0d5c7b892a469b24117a Mon Sep 17 00:00:00 2001 From: Sean Law Date: Mon, 7 Mar 2022 21:38:21 -0500 Subject: [PATCH 1/2] Made excl_zone optional in _multi_distance_profile funcs --- stumpy/maamp.py | 15 ++++++++------- stumpy/mstump.py | 25 ++++++++++++++++++------- tests/naive.py | 3 --- 3 files changed, 26 insertions(+), 17 deletions(-) diff --git a/stumpy/maamp.py b/stumpy/maamp.py index a00980c3a..c2fc91ae1 100644 --- a/stumpy/maamp.py +++ b/stumpy/maamp.py @@ -303,11 +303,11 @@ def _maamp_multi_distance_profile( T_A, T_B, m, - excl_zone, T_B_subseq_isfinite, p=2.0, include=None, discords=False, + excl_zone=None, ): """ Multi-dimensional wrapper to compute the multi-dimensional non-normalized (i.e., @@ -330,9 +330,6 @@ def _maamp_multi_distance_profile( m : int Window size - excl_zone : int - The half width for the exclusion zone relative to the `query_idx`. - T_B_subseq_isfinite : numpy.ndarray A boolean array that indicates whether a subsequence in `T_B` contains a `np.nan`/`np.inf` value (False) @@ -352,6 +349,9 @@ def _maamp_multi_distance_profile( When set to `True`, this reverses the distance profile to favor discords rather than motifs. Note that indices in `include` are still maintained and respected. + excl_zone : int + The half width for the exclusion zone relative to the `query_idx`. + Returns ------- D : numpy.ndarray @@ -384,7 +384,8 @@ def _maamp_multi_distance_profile( D_prime[:] = D_prime + D[i] D[i, :] = D_prime / (i + 1) - core.apply_exclusion_zone(D, query_idx, excl_zone, np.inf) + if excl_zone is not None: + core.apply_exclusion_zone(D, query_idx, excl_zone, np.inf) return D @@ -444,7 +445,7 @@ def maamp_multi_distance_profile(query_idx, T, m, include=None, discords=False, ) # See Definition 3 and Figure 3 D = _maamp_multi_distance_profile( - query_idx, T, T, m, excl_zone, T_subseq_isfinite, p, include, discords + query_idx, T, T, m, T_subseq_isfinite, p, include, discords, excl_zone ) return D @@ -520,7 +521,7 @@ def _get_first_maamp_profile( equal to `start` """ D = _maamp_multi_distance_profile( - start, T_A, T_B, m, excl_zone, T_B_subseq_isfinite, p, include, discords + start, T_A, T_B, m, T_B_subseq_isfinite, p, include, discords, excl_zone ) d = T_A.shape[0] diff --git a/stumpy/mstump.py b/stumpy/mstump.py index 3e91bc07b..6a11e9944 100644 --- a/stumpy/mstump.py +++ b/stumpy/mstump.py @@ -543,7 +543,17 @@ def mdl( def _multi_distance_profile( - query_idx, T_A, T_B, m, excl_zone, M_T, Σ_T, μ_Q, σ_Q, include=None, discords=False + query_idx, + T_A, + T_B, + m, + M_T, + Σ_T, + μ_Q, + σ_Q, + include=None, + discords=False, + excl_zone=None, ): """ Multi-dimensional wrapper to compute the multi-dimensional distance profile for a @@ -565,9 +575,6 @@ def _multi_distance_profile( m : int Window size - excl_zone : int - The half width for the exclusion zone relative to the `query_idx`. - M_T : numpy.ndarray Sliding mean for `T_A` @@ -592,6 +599,9 @@ def _multi_distance_profile( When set to `True`, this reverses the distance profile to favor discords rather than motifs. Note that indices in `include` are still maintained and respected. + excl_zone : int, default None + The half width for the exclusion zone relative to the `query_idx`. + Returns ------- D : numpy.ndarray @@ -625,7 +635,8 @@ def _multi_distance_profile( D_prime[:] = D_prime + D[i] D[i, :] = D_prime / (i + 1) - core.apply_exclusion_zone(D, query_idx, excl_zone, np.inf) + if excl_zone is not None: + core.apply_exclusion_zone(D, query_idx, excl_zone, np.inf) return D @@ -694,7 +705,7 @@ def multi_distance_profile( ) # See Definition 3 and Figure 3 D = _multi_distance_profile( - query_idx, T, T, m, excl_zone, M_T, Σ_T, M_T, Σ_T, include, discords + query_idx, T, T, m, M_T, Σ_T, M_T, Σ_T, include, discords, excl_zone ) return D @@ -766,7 +777,7 @@ def _get_first_mstump_profile( equal to `start` """ D = _multi_distance_profile( - start, T_A, T_B, m, excl_zone, M_T, Σ_T, μ_Q, σ_Q, include, discords + start, T_A, T_B, m, M_T, Σ_T, μ_Q, σ_Q, include, discords, excl_zone ) d = T_A.shape[0] diff --git a/tests/naive.py b/tests/naive.py index 9f17f924b..a85ee2b18 100644 --- a/tests/naive.py +++ b/tests/naive.py @@ -3,7 +3,6 @@ import numpy as np from scipy.spatial.distance import cdist from scipy.stats import norm -from numba import njit from stumpy import core, config @@ -1555,12 +1554,10 @@ def _get_mask_slices(mask): return np.array(idx).reshape(len(idx) // 2, 2) -@njit(fastmath=True) def _total_trapezoid_ndists(a, b, h): return (a + b) * h // 2 -@njit(fastmath=True) def _total_diagonal_ndists(tile_lower_diag, tile_upper_diag, tile_height, tile_width): total_ndists = 0 From bebd15ac2d0350bfd42fdd313e26b0bc958a47f0 Mon Sep 17 00:00:00 2001 From: Sean Law Date: Sat, 19 Mar 2022 16:32:40 -0400 Subject: [PATCH 2/2] Finalized aamp_mmotifs and mmotifs, fixed typos --- stumpy/aamp_mmotifs.py | 30 ++++++++++++++++-------------- stumpy/mmotifs.py | 29 +++++++++++++++-------------- 2 files changed, 31 insertions(+), 28 deletions(-) diff --git a/stumpy/aamp_mmotifs.py b/stumpy/aamp_mmotifs.py index 10b43435a..8e9307811 100644 --- a/stumpy/aamp_mmotifs.py +++ b/stumpy/aamp_mmotifs.py @@ -57,13 +57,12 @@ def aamp_mmotifs( cutoffs: numpy.ndarray or float, default None The largest matrix profile value (distance) for each dimension of the multidimensional matrix profile that a multidimenisonal candidate motif is - allowed to have. - If cutoffs is only one value, these value will be applied to every dimension. + allowed to have. If `cutoffs` is a scalar value, then this value will be + applied to every dimension. max_matches: int, default 10 - The maximum amount of similar matches (nearest neighbors) of a motif - representative to be returned. - The first match is always the self-match for each motif. + The maximum number of similar matches (nearest neighbors) to return for each + motif. The first match is always the self/trivial-match for each motif. max_motifs: int, default 1 The maximum number of motifs to return @@ -73,18 +72,14 @@ def aamp_mmotifs( when comparing distances between subsequences. k: int, default None - The number of dimensions (k + 1) in which a motif is present. - This value is available for doing guided search or - together with 'include' - - for constrained search. - The value will be applied to the discovery of all motifs. - If k is None, the value will automatically be computed for each motif using - MDL (unconstrained search). - For more informatioin on search types, see DOI: 10.1109/ICDM.2017.66s + The number of dimensions (`k + 1`) required for discovering all motifs. This + value is available for doing guided search or, together with `include`, for + constrained search. If `k is None`, then this will be automatically be computed + for each motif using MDL (unconstrained search). include: numpy.ndarray, default None A list of (zero based) indices corresponding to the dimensions in T that must be - included in the constrained multidimensional motif search. For more information, - see Section IV D in: DOI: 10.1109/ICDM.2017.66 + included in the constrained multidimensional motif search. p: float, default 2.0 The p-norm to apply for computing the Minkowski distance. @@ -104,6 +99,13 @@ def aamp_mmotifs( motif_mdls: list A list consisting of arrays that contain the mdl results for finding the dimension of each motif + + Notes + ----- + `DOI: 10.1109/ICDM.2017.66 \ + `__ + + For more information on `include` and search types, see Section IV D and IV E """ T = core._preprocess(T) m = T.shape[-1] - P.shape[-1] + 1 diff --git a/stumpy/mmotifs.py b/stumpy/mmotifs.py index 04460ae1c..63cf999c4 100644 --- a/stumpy/mmotifs.py +++ b/stumpy/mmotifs.py @@ -57,13 +57,12 @@ def mmotifs( cutoffs: numpy.ndarray or float, default None The largest matrix profile value (distance) for each dimension of the multidimensional matrix profile that a multidimenisonal candidate motif is - allowed to have. - If cutoffs is only one value, these value will be applied to every dimension. + allowed to have. If `cutoffs` is a scalar value, then this value will be + applied to every dimension. max_matches: int, default 10 - The maximum amount of similar matches (nearest neighbors) of a motif - representative to be returned. - The first match is always the self-match for each motif. + The maximum number of similar matches (nearest neighbors) to return for each + motif. The first match is always the self/trivial-match for each motif. max_motifs: int, default 1 The maximum number of motifs to return @@ -73,18 +72,14 @@ def mmotifs( when comparing distances between subsequences. k: int, default None - The number of dimensions (k + 1) in which a motif is present. - This value is available for doing guided search or - together with 'include' - - for constrained search. - The value will be applied to the discovery of all motifs. - If k is None, the value will automatically be computed for each motif using - MDL (unconstrained search). - For more informatioin on search types, see DOI: 10.1109/ICDM.2017.66s + The number of dimensions (`k + 1`) required for discovering all motifs. This + value is available for doing guided search or, together with `include`, for + constrained search. If `k is None`, then this will be automatically be computed + for each motif using MDL (unconstrained search). include: numpy.ndarray, default None A list of (zero based) indices corresponding to the dimensions in T that must be - included in the constrained multidimensional motif search. For more information, - see Section IV D in: DOI: 10.1109/ICDM.2017.66 + included in the constrained multidimensional motif search. normalize : bool, default True When set to `True`, this z-normalizes subsequences prior to computing distances. @@ -123,6 +118,12 @@ def mmotifs( stumpy.mdl : Compute the number of bits needed to compress one array with another using the minimum description length (MDL) + Notes + ----- + `DOI: 10.1109/ICDM.2017.66 \ + `__ + + For more information on `include` and search types, see Section IV D and IV E """ T = core._preprocess(T) m = T.shape[-1] - P.shape[-1] + 1