-
Notifications
You must be signed in to change notification settings - Fork 3
/
anomaly_detection_by_lstm.py
177 lines (138 loc) · 5.09 KB
/
anomaly_detection_by_lstm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
import numpy as np
import time
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
from numpy import arange, sin, pi, random
import pandas as pd
from pandas import Series , DataFrame
import matplotlib.pyplot as plt
np.random.seed(1234)
# Global hyper-parameters
sequence_length = 100
random_data_dup = 10
epochs_var = 1
batch_size = 50
def dropin(X, y):
print("X shape:", X.shape)
print("y shape:", y.shape)
X_hat = []
y_hat = []
for i in range(0, len(X)):
for j in range(0, np.random.random_integers(0, random_data_dup)):
X_hat.append(X[i, :])
y_hat.append(y[i])
return np.asarray(X_hat), np.asarray(y_hat)
def z_norm(result):
result_mean = result.mean()
result_std = result.std()
result -= result_mean
result /= result_std
return result, result_mean
def get_split_prep_data(train_start, train_end,
test_start, test_end): #0, 700. 500, 1000
data = pd.read_csv("data.txt", sep = ' ', header = None)
data.rename(columns = {0:'date',1:'time',2:'epochId',3:'moteId',4:'temp',5:'humidity',6:'light',7:'voltage'},inplace=True)
for key, d in data.groupby('moteId'):
break
data = list(d['temp'].ix[4000:5999])
print("Length of Data", len(data))
# train data
print("Creating train data...")
result = []
for index in range(train_start, train_end - sequence_length):
result.append(data[index: index + sequence_length])
result = np.array(result) # shape (samples, sequence_length)
result, result_mean = z_norm(result)
print("Mean of train data : ", result_mean)
print("Train data shape : ", result.shape)
train = result[train_start:train_end, :]
np.random.shuffle(train) # shuffles in-place
X_train = train[:, :-1]
y_train = train[:, -1]
X_train, y_train = dropin(X_train, y_train)
# test data
print("Creating test data...")
result = []
for index in range(test_start, test_end - sequence_length):
result.append(data[index: index + sequence_length])
result = np.array(result) # shape (samples, sequence_length)
result, result_mean = z_norm(result)
print("Mean of test data : ", result_mean)
print("Test data shape : ", result.shape)
X_test = result[:, :-1]
y_test = result[:, -1]
print("Shape X_train", np.shape(X_train))
print("Shape X_test", np.shape(X_test))
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
return X_train, y_train, X_test, y_test
def build_model():
model = Sequential()
layers = {'input': 1, 'hidden1': 64, 'hidden2': 256, 'hidden3': 100, 'output': 1}
model.add(LSTM(
input_length=sequence_length - 1,
input_dim=layers['input'],
output_dim=layers['hidden1'],
return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(
layers['hidden2'],
return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(
layers['hidden3'],
return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(
output_dim=layers['output']))
model.add(Activation("linear"))
start = time.time()
model.compile(loss="mse", optimizer="rmsprop")
print("Compilation Time : ", time.time() - start)
return model
def run_network(model=None, data=None):
global_start_time = time.time()
if data is None:
print('Loading data... ')
# train on first 700 samples and test on next 300 samples (has anomaly)
X_train, y_train, X_test, y_test = get_split_prep_data(0, 700, 500, 1000)
else:
X_train, y_train, X_test, y_test = data
print('\nData Loaded. Compiling...\n')
if model is None:
model = build_model()
try:
print("Training...")
model.fit(
X_train, y_train,
batch_size=batch_size, epochs=epochs_var, validation_split=0.05)
print("Predicting...")
predicted = model.predict(X_test)
print("Reshaping predicted")
predicted = np.reshape(predicted, (predicted.size,))
except KeyboardInterrupt:
print("prediction exception")
print('Training duration (s) : ', time.time() - global_start_time)
return model, y_test, 0
try:
plt.figure(1)
plt.subplot(311)
plt.title("Actual Test Signal w/Anomalies")
plt.plot(y_test[:len(y_test)], 'b')
plt.subplot(312)
plt.title("Predicted Signal")
plt.plot(predicted[:len(y_test)], 'g')
plt.subplot(313)
plt.title("Squared Error")
mse = ((y_test - predicted) ** 2)
plt.plot(mse, 'r')
print(mse)
plt.show()
except Exception as e:
print("plotting exception")
print(str(e))
print('Training duration (s) : ', time.time() - global_start_time)
print("-----------------**************FINISHED************-------------------")
return model, y_test, predicted
run_network()