Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
ad32d18
add isconstant param to naive mstump and relevant naive functions
NimaSarajpoor May 29, 2023
ae53ba3
rename param and add support if it is callable object
NimaSarajpoor Jun 3, 2023
a57bd4b
Merge branch 'main' into add_isconstant_mstump
NimaSarajpoor Jun 3, 2023
cb3d6b5
add test function for param isconstant, expecting error
NimaSarajpoor Jun 3, 2023
02193c9
revise naive function to support only numpy.ndarray or callable funct…
NimaSarajpoor Jun 4, 2023
a402757
add param to performant multi_mass and fix issues
NimaSarajpoor Jun 4, 2023
6a53d77
add test function for multi_distance_profile, expecting error
NimaSarajpoor Jun 4, 2023
54a5f90
add param isconstant to private func and fix issues
NimaSarajpoor Jun 4, 2023
61452e6
fix decorator
NimaSarajpoor Jun 4, 2023
e282b85
update mstumped
NimaSarajpoor Jun 4, 2023
ee29291
add new test functions, and revise naive function
NimaSarajpoor Jun 4, 2023
0362800
add new test function for mstump with param isconstant, expecting error
NimaSarajpoor Jun 4, 2023
7491893
add param to performant mstump and fix issues
NimaSarajpoor Jun 5, 2023
983096e
preprocessT in the beginning to ensure the input becomes np.ndarray e…
NimaSarajpoor Jun 7, 2023
a872568
minor changes
NimaSarajpoor Jun 8, 2023
c8a9293
add param isconstant to subpace and mdl
NimaSarajpoor Jun 8, 2023
5849ecb
fix minor bug
NimaSarajpoor Jun 8, 2023
16fbc95
fix decorator
NimaSarajpoor Jun 8, 2023
b85cd8c
remove trailing colon
NimaSarajpoor Jun 10, 2023
40b87da
replace reshape with expand_dims to improve readability
NimaSarajpoor Jun 10, 2023
124b002
minor changes
NimaSarajpoor Jun 10, 2023
10b9fce
add test for isconstant support in mstumped, expecting error
NimaSarajpoor Jun 10, 2023
77445e4
add support for isconstant, fixed error
NimaSarajpoor Jun 10, 2023
be62488
add missing param p for p-norm support
NimaSarajpoor Jun 10, 2023
8e4456d
improve docstrings
NimaSarajpoor Jun 19, 2023
451d2af
re-order elements of a list for sake of readability
NimaSarajpoor Jun 19, 2023
4e0bd04
Merge branch 'main' into add_isconstant_mstump
NimaSarajpoor Jul 8, 2023
e85e654
Refactor
NimaSarajpoor Jul 8, 2023
c768c39
fix docstring
NimaSarajpoor Jul 9, 2023
4647931
fix docstring
NimaSarajpoor Jul 9, 2023
767b5e8
use numpy.expand_dims instead of reshape(-1, 1) to improve readability
NimaSarajpoor Jul 11, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion stumpy/floss.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def _iac(
b_mean = np.round(np.mean(params[:, 1]), 2)

IAC = scipy.stats.beta.pdf(np.arange(width), a_mean, b_mean, loc=0, scale=width)
slope, _, _, _ = np.linalg.lstsq(IAC.reshape(-1, 1), target_AC, rcond=None)
slope, _, _, _ = np.linalg.lstsq(np.expand_dims(IAC, axis=1), target_AC, rcond=None)

IAC *= slope

Expand Down
137 changes: 125 additions & 12 deletions stumpy/mstump.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,18 @@
from .maamp import maamp, maamp_mdl, maamp_multi_distance_profile, maamp_subspace


def _multi_mass(Q, T, m, M_T, Σ_T, μ_Q, σ_Q, T_subseq_isconstant):
def _multi_mass(
Q,
T,
m,
M_T,
Σ_T,
μ_Q,
σ_Q,
T_subseq_isconstant,
Q_subseq_isconstant,
query_idx=None,
):
"""
A multi-dimensional wrapper around "Mueen's Algorithm for Similarity Search"
(MASS) to compute multi-dimensional distance profile.
Expand Down Expand Up @@ -43,6 +54,16 @@ def _multi_mass(Q, T, m, M_T, Σ_T, μ_Q, σ_Q, T_subseq_isconstant):
T_subseq_isconstant : numpy.ndarray
A boolean array that indicates whether a subsequence in `T` is constant (True)

Q_subseq_isconstant : numpy.ndarray
Comment thread
NimaSarajpoor marked this conversation as resolved.
A boolean array that indicates whether a subsequence in `Q` is constant (True)

query_idx : int, default None
This is the index position along each of the time series in `T`, where
the query subsequence, `Q`, is located. `query_idx` should be set to None
if `Q` is not a subsequence of `T`. If `Q` is a subsequence of `T`, provding
this argument is optional. If query_idx is provided, the distance between Q
and `T[:, query_idx : query_idx + m]` will automatically be set to zero.

Returns
-------
D : numpy.ndarray
Expand All @@ -58,13 +79,19 @@ def _multi_mass(Q, T, m, M_T, Σ_T, μ_Q, σ_Q, T_subseq_isconstant):
D[i, :] = np.inf
else:
D[i, :] = core.mass(
Q[i], T[i], M_T[i], Σ_T[i], T_subseq_isconstant=T_subseq_isconstant[i]
Q[i],
T[i],
M_T[i],
Σ_T[i],
T_subseq_isconstant=T_subseq_isconstant[i],
Q_subseq_isconstant=Q_subseq_isconstant[i],
query_idx=query_idx,
)

return D


@core.non_normalized(maamp_subspace)
@core.non_normalized(maamp_subspace, exclude=["normalize", "T_subseq_isconstant"])
def subspace(
T,
m,
Expand All @@ -77,6 +104,7 @@ def subspace(
n_bit=8,
normalize=True,
p=2.0,
T_subseq_isconstant=None,
):
"""
Compute the k-dimensional matrix profile subspace for a given subsequence index and
Expand Down Expand Up @@ -141,6 +169,15 @@ def subspace(
and the Euclidean distance, respectively. This parameter is ignored when
`normalize == True`.

T_subseq_isconstant : numpy.ndarray, function, or list, default None
A parameter that is used to show whether a subsequence of a time series in `T`
is constant (True) or not. T_subseq_isconstant can be a 2D boolean numpy.ndarry
or a function that can be applied to each time series in `T`. Alternatively, for
maximum flexibility, a list (with length equal to the total number of time
series) may also be used. In this case, T_subseq_isconstant[i] corresponds to
the i-th time series T[i] and each element in the list can either be a 1D
boolean np.ndarray, a function, or None.

Returns
-------
S : numpy.ndarray
Expand Down Expand Up @@ -177,15 +214,19 @@ def subspace(
"""
T = core._preprocess(T)
core.check_window_size(m, max_size=T.shape[-1])
T_subseq_isconstant = core.process_isconstant(T, m, T_subseq_isconstant)

if discretize_func is None:
bins = _inverse_norm(n_bit)
discretize_func = partial(_discretize, bins=bins)

subseqs, _, _, _ = core.preprocess(T[:, subseq_idx : subseq_idx + m], m)
subseqs = core.z_norm(subseqs, axis=1)
subseqs[T_subseq_isconstant[:, subseq_idx]] = 0.0

neighbors, _, _, _ = core.preprocess(T[:, nn_idx : nn_idx + m], m)
neighbors = core.z_norm(neighbors, axis=1)
neighbors[T_subseq_isconstant[:, nn_idx]] = 0.0

disc_subseqs = discretize_func(subseqs)
disc_neighbors = discretize_func(neighbors)
Expand Down Expand Up @@ -243,7 +284,7 @@ def _discretize(a, bins, right=True): # pragma: no cover
return np.digitize(a, bins, right=right)


@core.non_normalized(maamp_mdl)
@core.non_normalized(maamp_mdl, exclude=["normalize", "T_subseq_isconstant"])
def mdl(
T,
m,
Expand All @@ -255,6 +296,7 @@ def mdl(
n_bit=8,
normalize=True,
p=2.0,
T_subseq_isconstant=None,
):
"""
Compute the multi-dimensional number of bits needed to compress one
Expand Down Expand Up @@ -316,6 +358,15 @@ def mdl(
and the Euclidean distance, respectively. This parameter is ignored when
`normalize == True`.

T_subseq_isconstant : numpy.ndarray, function, or list, default None
A parameter that is used to show whether a subsequence of a time series in `T`
is constant (True) or not. T_subseq_isconstant can be a 2D boolean numpy.ndarry
or a function that can be applied to each time series in `T`. Alternatively, for
maximum flexibility, a list (with length equal to the total number of time
series) may also be used. In this case, T_subseq_isconstant[i] corresponds to
the i-th time series T[i] and each element in the list can either be a 1D
boolean np.ndarray, a function, or None.

Returns
-------
bit_sizes : numpy.ndarray
Expand Down Expand Up @@ -352,6 +403,7 @@ def mdl(
"""
T = core._preprocess(T)
core.check_window_size(m, max_size=T.shape[-1])
T_subseq_isconstant = core.process_isconstant(T, m, T_subseq_isconstant)

if discretize_func is None:
bins = _inverse_norm(n_bit)
Expand All @@ -362,8 +414,11 @@ def mdl(
for k in range(T.shape[0]):
subseqs, _, _, _ = core.preprocess(T[:, subseq_idx[k] : subseq_idx[k] + m], m)
subseqs = core.z_norm(subseqs, axis=1)
subseqs[T_subseq_isconstant[:, subseq_idx[k]]] = 0.0

neighbors, _, _, _ = core.preprocess(T[:, nn_idx[k] : nn_idx[k] + m], m)
neighbors = core.z_norm(neighbors, axis=1)
neighbors[T_subseq_isconstant[:, nn_idx[k]]] = 0.0

disc_subseqs = discretize_func(subseqs)
disc_neighbors = discretize_func(neighbors)
Expand All @@ -387,6 +442,7 @@ def _multi_distance_profile(
μ_Q,
σ_Q,
T_subseq_isconstant,
Q_subseq_isconstant,
include=None,
discords=False,
excl_zone=None,
Expand All @@ -399,7 +455,7 @@ def _multi_distance_profile(
Parameters
----------
query_idx : int
The window index to calculate the multi-dimensional distance profile for
The start index of the (multi-dimensional) query subsequence in `T_B`

T_A : numpy.ndarray
The time series or sequence for which the multi-dimensional distance profile
Expand All @@ -426,6 +482,10 @@ def _multi_distance_profile(
T_subseq_isconstant : numpy.ndarray
A boolean array that indicates whether a subsequence in `T_A` is constant (True)

Q_subseq_isconstant : numpy.ndarray
A boolean array that indicates whether a subsequence in `T_B` is
constant (True)

include : numpy.ndarray, default None
A list of (zero-based) indices corresponding to the dimensions in `T` that
must be included in the constrained multidimensional motif search.
Expand All @@ -450,6 +510,7 @@ def _multi_distance_profile(
d, n = T_A.shape
k = n - m + 1
start_row_idx = 0

D = _multi_mass(
T_B[:, query_idx : query_idx + m],
T_A,
Expand All @@ -459,6 +520,8 @@ def _multi_distance_profile(
μ_Q[:, query_idx],
σ_Q[:, query_idx],
T_subseq_isconstant,
np.expand_dims(Q_subseq_isconstant[:, query_idx], 1),
query_idx=query_idx,
Comment thread
NimaSarajpoor marked this conversation as resolved.
)

if include is not None:
Expand All @@ -481,9 +544,18 @@ def _multi_distance_profile(
return D


@core.non_normalized(maamp_multi_distance_profile)
@core.non_normalized(
maamp_multi_distance_profile, exclude=["normalize", "T_subseq_isconstant"]
)
def multi_distance_profile(
query_idx, T, m, include=None, discords=False, normalize=True, p=2.0
query_idx,
T,
m,
include=None,
discords=False,
normalize=True,
p=2.0,
T_subseq_isconstant=None,
):
"""
Multi-dimensional wrapper to compute the multi-dimensional distance profile for a
Expand Down Expand Up @@ -525,13 +597,24 @@ def multi_distance_profile(
and the Euclidean distance, respectively. This parameter is ignored when
`normalize == True`.

T_subseq_isconstant : numpy.ndarray, function, or list, default None
A parameter that is used to show whether a subsequence of a time series in `T`
is constant (True) or not. T_subseq_isconstant can be a 2D boolean numpy.ndarry
or a function that can be applied to each time series in `T`. Alternatively, for
maximum flexibility, a list (with length equal to the total number of time
series) may also be used. In this case, T_subseq_isconstant[i] corresponds to
the i-th time series T[i] and each element in the list can either be a 1D
boolean np.ndarray, a function, or None.

Returns
-------
D : numpy.ndarray
Multi-dimensional distance profile for the window with index equal to
`query_idx`
"""
T, M_T, Σ_T, T_subseq_isconstant = core.preprocess(T, m)
T, M_T, Σ_T, T_subseq_isconstant = core.preprocess(
T, m, T_subseq_isconstant=T_subseq_isconstant
)

if T.ndim <= 1: # pragma: no cover
err = f"T is {T.ndim}-dimensional and must be at least 1-dimensional"
Expand All @@ -556,6 +639,7 @@ def multi_distance_profile(
M_T,
Σ_T,
T_subseq_isconstant,
T_subseq_isconstant,
include,
discords,
excl_zone,
Expand All @@ -575,6 +659,7 @@ def _get_first_mstump_profile(
μ_Q,
σ_Q,
T_subseq_isconstant,
Q_subseq_isconstant,
include=None,
discords=False,
):
Expand Down Expand Up @@ -621,6 +706,10 @@ def _get_first_mstump_profile(
T_subseq_isconstant : numpy.ndarray
A boolean array that indicates whether a subsequence in `T_A` is constant (True)

Q_subseq_isconstant : numpy.ndarray
A boolean array that indicates whether a (query) subsequence in `T_B` is
constant (True)

include : numpy.ndarray, default None
A list of (zero-based) indices corresponding to the dimensions in `T` that
must be included in the constrained multidimensional motif search.
Expand Down Expand Up @@ -653,6 +742,7 @@ def _get_first_mstump_profile(
μ_Q,
σ_Q,
T_subseq_isconstant,
Q_subseq_isconstant,
include,
discords,
excl_zone,
Expand Down Expand Up @@ -1013,8 +1103,10 @@ def _mstump(
return P, I


@core.non_normalized(maamp)
def mstump(T, m, include=None, discords=False, normalize=True, p=2.0):
@core.non_normalized(maamp, exclude=["normalize", "T_subseq_isconstant"])
def mstump(
T, m, include=None, discords=False, normalize=True, p=2.0, T_subseq_isconstant=None
):
"""
Compute the multi-dimensional z-normalized matrix profile

Expand Down Expand Up @@ -1059,6 +1151,15 @@ def mstump(T, m, include=None, discords=False, normalize=True, p=2.0):
and the Euclidean distance, respectively. This parameter is ignored when
`normalize == True`.

T_subseq_isconstant : numpy.ndarray, function, or list, default None
A parameter that is used to show whether a subsequence of a time series in `T`
is constant (True) or not. T_subseq_isconstant can be a 2D boolean numpy.ndarry
or a function that can be applied to each time series in `T`. Alternatively, for
maximum flexibility, a list (with length equal to the total number of time
series) may also be used. In this case, T_subseq_isconstant[i] corresponds to
the i-th time series T[i] and each element in the list can either be a 1D
boolean np.ndarray, a function, or None.

Returns
-------
P : numpy.ndarray
Expand Down Expand Up @@ -1100,8 +1201,19 @@ def mstump(T, m, include=None, discords=False, normalize=True, p=2.0):
T_A = T
T_B = T_A

T_A, M_T, Σ_T, T_subseq_isconstant = core.preprocess(T_A, m)
T_B, μ_Q, σ_Q, Q_subseq_isconstant = core.preprocess(T_B, m)
T_A = core._preprocess(T_A)
T_B = core._preprocess(T_B)

T_A_subseq_isconstant = T_subseq_isconstant
T_A_subseq_isconstant = core.process_isconstant(T_A, m, T_A_subseq_isconstant)
T_B_subseq_isconstant = T_A_subseq_isconstant

T_A, M_T, Σ_T, T_subseq_isconstant = core.preprocess(
Comment thread
NimaSarajpoor marked this conversation as resolved.
T_A, m, T_subseq_isconstant=T_A_subseq_isconstant
)
T_B, μ_Q, σ_Q, Q_subseq_isconstant = core.preprocess(
T_B, m, T_subseq_isconstant=T_B_subseq_isconstant
)

if T_A.ndim <= 1: # pragma: no cover
err = f"T is {T_A.ndim}-dimensional and must be at least 1-dimensional"
Expand Down Expand Up @@ -1135,6 +1247,7 @@ def mstump(T, m, include=None, discords=False, normalize=True, p=2.0):
μ_Q,
σ_Q,
T_subseq_isconstant,
Q_subseq_isconstant,
include,
discords,
)
Expand Down
Loading