Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions khiva/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,11 @@ def to_list(self):
def to_numpy(self):
""" Converts the KHIVA array to a numpy array.

The returned numpy array shape matches the Array dimensions as follows:
- For an Array with dims equal to [4, 2, 1, 1] the numpy shape will be (2, 4).
- For an Array with dims equal to [4, 3, 2, 1] the numpy shape will be (2, 3, 4).
- For an Array with dims equal to [4, 1, 2, 3] the numpy shape will be (3, 2, 1, 4).

:return: KHIVA array converted to numpy.array.
"""
return self._get_data()
Expand Down
57 changes: 57 additions & 0 deletions khiva/matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,63 @@ def find_best_n_motifs(profile, index, m, n, self_join=False):
array_reference=d)


def find_best_n_occurrences(query_time_series, time_series, number_of_occurrences):
""" Calculates the N best matches of several queries in several time series.

The result has the following structure:
- 1st dimension corresponds to the nth best match.
- 2nd dimension corresponds to the number of queries.
- 3rd dimension corresponds to the number of time series.

For example, the distance in the position (1, 2, 3) corresponds to the second best distance of the third query in the
fourth time series. The index in the position (1, 2, 3) is the index of the subsequence which leads to the
second best distance of the third query in the fourth time series.

:param query_time_series: Array whose first dimension is the length of the query time series and the second
dimension is the number of queries.
:param time_series: Array whose first dimension is the length of the time series and the second dimension is the
number of time series.
:param number_of_occurrences: Number of matches to return.
:return: KHIVA arrays with the distances and indexes.
"""

distances = ctypes.c_void_p(0)
indexes = ctypes.c_void_p(0)
KhivaLibrary().c_khiva_library.find_best_n_occurrences(ctypes.pointer(query_time_series.arr_reference),
ctypes.pointer(time_series.arr_reference),
ctypes.pointer(ctypes.c_long(number_of_occurrences)),
ctypes.pointer(distances),
ctypes.pointer(indexes))

return Array(array_reference=distances), Array(array_reference=indexes)


def mass(query_time_series, time_series):
""" Mueen's Algorithm for Similarity Search.

The result has the following structure:
- 1st dimension corresponds to the index of the subsequence in the time series.
- 2nd dimension corresponds to the number of queries.
- 3rd dimension corresponds to the number of time series.

For example, the distance in the position (1, 2, 3) correspond to the distance of the third query to the fourth time
series for the second subsequence in the time series.

:param query_time_series: Array whose first dimension is the length of the query time series and the second
dimension is the number of queries.
:param time_series: Array whose first dimension is the length of the time series and the second dimension is the
number of time series.
:return: KHIVA array with the distances.
"""

distances = ctypes.c_void_p(0)
KhivaLibrary().c_khiva_library.mass(ctypes.pointer(query_time_series.arr_reference),
ctypes.pointer(time_series.arr_reference),
ctypes.pointer(distances))

return Array(array_reference=distances)


def stomp(first_time_series, second_time_series, subsequence_length):
""" Stomp algorithm to calculate the matrix profile between `ta` and `tb` using a subsequence length of `m`.

Expand Down
67 changes: 67 additions & 0 deletions tests/unit_tests/matrix_unit_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,19 @@ def test_stomp(self):
self.assertAlmostEqual(a[i], 0, delta=1e-2)
self.assertAlmostEqual(b[i], expected_index[i])

def test_stomp_different_queries(self):
stomp_result = stomp(Array([[10, 11, 10, 8, 14], [10, 14, 10, 10, 3]]),
Array([[10, 11, 10, 11, 10, 11, 10, 7], [10, 13, 10, 10, 10, 14, 8, 7]]), 3)

a = stomp_result[0].to_numpy()
b = stomp_result[1].to_numpy()

self.assertAlmostEqual(a[1, 0, 2], 1.73205077, delta=1e-3)
self.assertAlmostEqual(a[0, 0, 0], 0.00954336, delta=1e-3)

self.assertEqual(b[0, 1, 5], 2)
self.assertEqual(b[1, 1, 1], 1)

def test_find_best_n_motifs(self):
stomp_result = stomp(Array([10, 10, 10, 10, 10, 10, 9, 10, 10, 10, 10, 10, 11, 10, 9], dtype.f32),
Array([10, 11, 10, 9], dtype.f32),
Expand Down Expand Up @@ -149,6 +162,60 @@ def test_find_best_n_discords_consecutive(self):
else:
self.assertNotEqual(a[1], 11)

def test_mass(self):
mass_result = mass(Array(np.array([4, 3, 8]), dtype.f32),
Array(np.array([10, 10, 10, 11, 12, 11, 10, 10, 11, 12, 11, 14, 10, 10]), dtype.f32))

distances = mass_result.to_numpy()
distances_expected = np.array([1.732051, 0.328954, 1.210135, 3.150851, 3.245858, 2.822044,
0.328954, 1.210135, 3.150851, 0.248097, 3.30187, 2.82205])

np.testing.assert_array_almost_equal(distances, distances_expected, decimal=self.DECIMAL)

def test_mass_multiple(self):
mass_result = mass(Array(np.array([[10, 10, 11, 11], [10, 11, 10, 10]]), dtype.f32),
Array(np.array([[10, 10, 10, 11, 12, 11, 10], [10, 11, 12, 11, 14, 10, 10]]), dtype.f32))

distances = mass_result.to_numpy()

distances_expected = np.array([[[1.83880341, 0.87391543, 1.5307337, 3.69551826],
[3.26598597, 3.48967957, 2.82842779, 1.21162188]],
[[1.5307337, 2.17577887, 2.57832384, 3.75498915],
[2.82842779, 2.82842731, 3.21592307, 0.50202721]]])

np.testing.assert_array_almost_equal(distances, distances_expected, decimal=self.DECIMAL)

def test_find_best_n_occurrences(self):
find_result = find_best_n_occurrences(
Array(np.array([10, 11, 12]), dtype.f32),
Array(np.array([[10, 10, 11, 11, 12, 11, 10, 10, 11, 12, 11, 10, 10, 11],
[10, 10, 11, 11, 12, 11, 10, 10, 11, 12, 11, 10, 10, 11]]), dtype.f32),
1)

distances = find_result[0].to_numpy()
indexes = find_result[1].to_numpy()

self.assertAlmostEqual(distances[0], 0.00069053, delta=self.DELTA)
self.assertEqual(indexes[0], 7)

def test_find_best_n_occurrences_multiple_queries(self):
find_result = find_best_n_occurrences(
Array(np.array([[11, 11, 10, 11], [10, 11, 11, 12]]), dtype.f32),
Array(np.array([[10, 10, 11, 11, 10, 11, 10, 10, 11, 11, 10, 11, 10, 10],
[11, 10, 10, 11, 10, 11, 11, 10, 11, 11, 14, 10, 11, 10]]), dtype.f32),
4)

distances = find_result[0].to_numpy()
indexes = find_result[1].to_numpy()

np.testing.assert_array_equal(find_result[0].get_dims(), np.array([4, 2, 2, 1]))

# Subsequence index of the third best distance for the second query over the first time series
self.assertEqual(indexes[0, 1, 3], 2)

# Second best distance for the first query over the second time series
self.assertAlmostEqual(distances[1, 0, 2], 1.83880329, delta=self.DELTA)


if __name__ == '__main__':
suite = unittest.TestLoader().loadTestsFromTestCase(MatrixTest)
Expand Down