In [44]:
from keras.layers import *
from keras.layers import LSTM, RepeatVector
import pandas as pd
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from keras import Sequential
import keras as kr


def read_data(data_path):
    data = pd.read_csv(data_path)
    return data


boat_csv = read_data("Data/Boat_data.csv")
boat_csv = boat_csv.drop(columns=["Unnamed: 0"])
boat_csv = boat_csv.drop(boat_csv.index[-39:])
scaler = StandardScaler()
normal_data = scaler.fit_transform(boat_csv)
print(normal_data.shape)
# 
# boat_csv = read_data("Data/Anomalous_boat_data.csv")
# boat_csv = boat_csv.drop(columns=["Unnamed: 0", "heading"])
# boat_csv = boat_csv.drop(boat_csv.index[-20:])    
# scaler = StandardScaler()
# anomalous_data = scaler.fit_transform(boat_csv)
# print(anomalous_data.shape)


boat_csv = read_data("Data/Boat_data_curved.csv")
boat_csv = boat_csv.drop(columns=["Unnamed: 0"])
boat_csv = boat_csv.drop(boat_csv.index[-39:])    
scaler = StandardScaler()
anomalous_data = scaler.fit_transform(boat_csv)
print(anomalous_data.shape)

(5700, 7)
(5700, 7)


In [45]:
batch_size = 300
interval = 15

def prepare_sequences(data):
    samples = []
    for i in range(0,data.shape[0]-batch_size, interval):
        sample = data[i:i+batch_size]	
        samples.append(sample)
        
    sequences = np.array(samples)
    
    # Batch size (Number of samples time steps and number of features
    trainX = np.reshape(sequences, (len(samples), batch_size, 7))
    
    return trainX

trainX_nominal = prepare_sequences(normal_data) 
print(trainX_nominal.shape)

trainX_anomalous = prepare_sequences(anomalous_data)
print(trainX_anomalous.shape)  

(360, 300, 7)
(360, 300, 7)


In [54]:
#TRAINING MODEL
timesteps=batch_size
input_dim=7
batch_size = batch_size
epochs=20

def get_model(n_dimensions):
    encoder = Sequential(name="encode")
    encoder.add(LSTM(timesteps,return_sequences=True))
    encoder.add(LSTM(64,return_sequences=False))
    encoder.add(Dense(10))

    decoder = Sequential(name="decode")
    decoder.add(Dense(10))
    decoder.add(Dense(64))
    decoder.add(RepeatVector(timesteps))
    decoder.add(LSTM(input_dim, return_sequences=True))
    
    autoencoder = Sequential()
    
    autoencoder.add(encoder)
    autoencoder.add(decoder)
    
    return autoencoder

print("GETTING the model")
autoencoder = get_model(7)
print("COMPILING")
autoencoder.compile(optimizer='adam', loss='mse',
                    metrics=['accuracy'])
print("FITTING")
history = autoencoder.fit(trainX_nominal, trainX_nominal, batch_size=200, 
                          epochs=epochs)

autoencoder.save("Models/Nominal_LSTM.model")
#autoencoder.save("Models/Anomalous_LSTM.model")

print("DONE")

GETTING the model
COMPILING
FITTING


Epoch 1/20






Epoch 2/20






Epoch 3/20






Epoch 4/20






Epoch 5/20






Epoch 6/20






Epoch 7/20






Epoch 8/20






Epoch 9/20






Epoch 10/20






Epoch 11/20






Epoch 12/20






Epoch 13/20






Epoch 14/20






Epoch 15/20






Epoch 16/20






Epoch 17/20






Epoch 18/20






Epoch 19/20






Epoch 20/20






DONE


In [46]:
#LOADING MODEL

nom_model = kr.models.load_model("Models/Nominal_LSTM.model")
print("Nominal MODEL LOADED")

#anom_model = kr.models.load_model("Models/Anomalous_LSTM.model")

#print("Anomalous MODEL LOADED")

Nominal MODEL LOADED


In [47]:

autoencoded_nominal = nom_model.predict(trainX_nominal)
print(autoencoded_nominal.shape)
print("NOMINAL BEHAVIOUR PREDICTED")
autoencoded_anomalous = nom_model.predict(trainX_anomalous)
print(autoencoded_anomalous.shape)
print("ANOMALOUS BEHAVIOUR PREDICTED")


(360, 300, 7)
NOMINAL BEHAVIOUR PREDICTED


(360, 300, 7)
ANOMALOUS BEHAVIOUR PREDICTED


In [48]:
anomalous_sequences = []
reconstr_error_nominal_sequences, reconstr_error_anomalous_sequences = [], []
MUs, SIGMAs = [], []
window = batch_size
interval = interval
cont = 0

for i in autoencoded_nominal:
    reconstr_error_nominal_sequences.append(abs(normal_data[cont:cont+window] - i))
    cont += interval
    MUs.append(np.average(reconstr_error_nominal_sequences[-1], axis=0))
    SIGMAs.append(np.cov(reconstr_error_nominal_sequences[-1], rowvar=False))

cont = 0
for j in autoencoded_anomalous:
    reconstr_error_anomalous_sequences.append(abs(anomalous_data[cont:cont+window] - j))
    cont += interval

print(len(reconstr_error_nominal_sequences), len(reconstr_error_nominal_sequences[0]))
print(len(MUs), MUs[0])
print("Done") 


360 300
360 [0.43522695 0.17596792 0.78263209 0.57424925 0.60483328 0.5628774
 0.40499782]
Done


In [86]:
def calculate_anomaly_score_sequence(i, element):
    scores = []
    weights = [1,1,1,1,1,1,6]
    for j, elem in enumerate(element):
        score = np.dot(np.transpose(element[j]-MUs[i])*weights, 
                       np.dot(SIGMAs[i], 
                            (element[j]-MUs[i])*weights))      
        scores.append(score)
    return np.average(scores)

anomaly_scores = []
for i,element in enumerate(reconstr_error_anomalous_sequences):
    anomaly_scores.append(calculate_anomaly_score_sequence(i, element))

anomaly_scores_nominal = []
for i, element in enumerate(reconstr_error_nominal_sequences):
    anomaly_scores_nominal.append(calculate_anomaly_score_sequence(i, element))

print(len(anomaly_scores), len(anomaly_scores_nominal))


360 360


In [87]:
plt.plot(anomaly_scores)
plt.plot(anomaly_scores_nominal)
plt.show()


In [88]:
def detectOutliers(x, outlierConstant):
    a = np.array(x)
    upper_quartile = np.percentile(a, 75)
    lower_quartile = np.percentile(a, 25)
    IQR = (upper_quartile - lower_quartile) * outlierConstant
    quartileSet = (lower_quartile - IQR, upper_quartile + IQR)
    resultList = []
    outlierList = []
    
    list = a.tolist()
    for y in range(len(list)):
        if list[y] >= quartileSet[0] and list[y] <= quartileSet[1]:
            resultList.append(list[y])
        else:
            outlierList.append((y,list[y]))
            resultList.append(list[y-1])
    return resultList, outlierList

nominal_without_outliers, outlier_list = detectOutliers(anomaly_scores_nominal,
                                                        outlierConstant=1)

anomalous_without_outliers, outliers_anomalous_list = detectOutliers(anomaly_scores,
                                                                     outlierConstant=1)

print("Done")

Done


In [89]:

#PLOT THE GRAPHS AND OUTLINE THE POINTS OF THE SUSPECTED ANOMALIES

boat_csv = read_data("Data/Boat_data.csv")
boat_csv = boat_csv.drop(columns=["Unnamed: 0"])

plt.plot(boat_csv["G_Lon"], boat_csv["G_Lat"])
plt.title("Nominal anoamalies points LSTM_AE")
for i in outlier_list:
    anomaly_position = i[0]*interval 
    plt.plot(boat_csv["G_Lon"][anomaly_position:anomaly_position+window],
             boat_csv["G_Lat"][anomaly_position:anomaly_position+window], 'bo')
    
plt.show()


an_csv = read_data("Data/Boat_data_curved.csv")
an_csv = an_csv.drop(columns=["Unnamed: 0"])

plt.plot(an_csv["G_Lon"], an_csv["G_Lat"])
plt.title("Nominal anoamalies points LSTM_AE")
for i in outliers_anomalous_list:
    anomaly_position = i[0] * interval 
    plt.plot(an_csv["G_Lon"][anomaly_position:anomaly_position+window],
             an_csv["G_Lat"][anomaly_position:anomaly_position+window], 'bo')

plt.show()

In [46]:
# 
# nominal = True
# print(autoencoded_nominal.shape)
# 
# autoenc_df = pd.DataFrame(autoencoded_nominal[0], columns= boat_csv.columns)
# print(autoenc_df)
# 
# if nominal:
#     plt.plot(boat_csv["Speed"])
#     plt.plot(autoenc_df['Speed'])
#     plt.show()
# else:
#     plt.plot(boat_csv["speed"])
#     plt.plot(autoenc_df["speed"])
#     plt.show()
# print("End")

(370, 150, 7)
        Speed   Degrees  Accelleration       M0C       M1C     G_Lat     G_Lon
0    0.103706  0.053368      -0.003267 -0.033263  0.015652 -0.199269  0.089458
1    0.147910  0.088868      -0.000346 -0.055080  0.016915 -0.322113  0.136497
2    0.165159  0.112554       0.004585 -0.069299  0.013441 -0.395346  0.161763
3    0.171146  0.128592       0.009500 -0.078585  0.008989 -0.439709  0.175953
4    0.172714  0.139658       0.013663 -0.084588  0.004908 -0.467474  0.184227
..        ...       ...            ...       ...       ...       ...       ...
145  0.169906  0.166844       0.024126 -0.089409 -0.008423 -0.527643  0.195545
146  0.169906  0.166844       0.024126 -0.089409 -0.008423 -0.527643  0.195545
147  0.169906  0.166844       0.024126 -0.089409 -0.008423 -0.527643  0.195545
148  0.169906  0.166844       0.024126 -0.089409 -0.008423 -0.527643  0.195545
149  0.169906  0.166844       0.024126 -0.089409 -0.008423 -0.527643  0.195545

[150 rows x 7 columns]
End
