forked from mrahtz/sanger-machine-learning-workshop
-
Notifications
You must be signed in to change notification settings - Fork 0
/
learn_utils.py
69 lines (60 loc) · 2.12 KB
/
learn_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/env python
"""
Helper functions for time series data learning process
"""
import numpy as np
import matplotlib.pyplot as plt
def sliding_chunker(data, window_len, slide_len):
"""
Split a list into a series of sub-lists, each sub-list window_len long,
sliding along by slide_len each time. If the list doesn't have enough
elements for the final sub-list to be window_len long, the remaining data
will be dropped.
e.g. sliding_chunker(range(6), window_len=3, slide_len=2)
gives [ [0, 1, 2], [2, 3, 4] ]
"""
chunks = []
for pos in range(0, len(data), slide_len):
chunk = np.copy(data[pos:pos+window_len])
if len(chunk) != window_len:
continue
chunks.append(chunk)
return chunks
def plot_waves(waves, step):
"""
Plot a set of 9 waves from the given set, starting from the first one
and increasing in index by 'step' for each subsequent graph
"""
plt.figure()
n_graph_rows = 3
n_graph_cols = 3
graph_n = 1
wave_n = 0
for _ in range(n_graph_rows):
for _ in range(n_graph_cols):
axes = plt.subplot(n_graph_rows, n_graph_cols, graph_n)
axes.set_ylim([-100, 150])
plt.plot(waves[wave_n])
graph_n += 1
wave_n += step
# fix subplot sizes so that everything fits
plt.tight_layout()
plt.show()
def reconstruct(data, window, clusterer):
"""
Reconstruct the given data using the cluster centers from the given
clusterer.
"""
window_len = len(window)
slide_len = window_len/2
segments = sliding_chunker(data, window_len, slide_len)
reconstructed_data = np.zeros(len(data))
for segment_n, segment in enumerate(segments):
# window the segment so that we can find it in our clusters which were
# formed from windowed data
segment *= window
nearest_match_idx = clusterer.predict(segment)[0]
nearest_match = np.copy(clusterer.cluster_centers_[nearest_match_idx])
pos = segment_n * slide_len
reconstructed_data[pos:pos+window_len] += nearest_match
return reconstructed_data