In [13]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm
from keras.layers import Input, Dense, Lambda, Flatten, Reshape
from keras.layers import Conv1D,UpSampling1D
from keras.models import Model
from keras import backend as K

beta = 1
n_features = 5
boat_csv = pd.read_csv("Data/Boat_nominal_data/Boat_sequences_mix.csv")
boat_csv = boat_csv.drop(columns=["Unnamed: 0", "M0C", "M1C", "Acceleration"])
scaler = StandardScaler()
normal_data = scaler.fit_transform(boat_csv)
print(normal_data.shape)

boat_val = pd.read_csv("Data/Boat_nominal_data/Boat_sequences_mix_val.csv")
boat_val = boat_val.drop(columns=["Unnamed: 0", "M0C", "M1C", "Acceleration"])
scaler = StandardScaler()
val_nom_data = scaler.fit_transform(boat_val)

def prepare_sequences(data, batch_size):
    samples = []
    for i in range(0,data.shape[0]- batch_size, batch_size):
        sample = data[i:i+batch_size]	
        samples.append(sample)
    sequences = np.array(samples)
    trainX = np.reshape(sequences, (len(sequences), batch_size, n_features))
    return trainX


def prepare_data():    
    trainX_nominal = prepare_sequences(normal_data,1024) 
    print(trainX_nominal.shape)
    
    valX_nominal = prepare_sequences(val_nom_data,1024)
    print(valX_nominal.shape)

    return trainX_nominal, valX_nominal

trainX_nominal, valX_nominal = prepare_data()


(513024, 5)
(500, 1024, 5)
(10, 1024, 5)


In [14]:
from keras.layers import MaxPooling1D

input_shape = (1024, n_features)
kernel_size = 3
filters = 64
latent_dim = 10
use_mse = True
load_weights = False


def sampling(args):
    z_mean, z_log_var = args
    batch = K.shape(z_mean)[0]
    dim = K.int_shape(z_mean)[1]
    epsilon = K.random_normal(shape=(batch, dim))
    return z_mean + K.exp(0.5 * z_log_var) * epsilon


inputs = Input(shape=input_shape, name='encoder_input')
x = inputs
for i in range(4):
    x = Conv1D(filters=filters,
               kernel_size=7,
               padding='same')(x)
    x = MaxPooling1D(2)(x)
    filters = int(filters / 2)


shape = K.int_shape(x)

x = Flatten()(x)
z_mean = Dense(latent_dim, name='z_mean')(x)
z_log_var = Dense(latent_dim, name='z_log_var')(x)

z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])

encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
encoder.summary()

latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
x = Dense(shape[1] * shape[2], name='Dense_after_sampling')(latent_inputs)
x = Reshape((shape[1], shape[2]))(x)
filters = filters * 2

for i in range(4):
    x = Conv1D(filters=filters,kernel_size=7, padding='same')(x)
    x = UpSampling1D(size=2)(x)
    filters = filters * 2
    
    
outputs = Conv1D(filters=n_features, kernel_size=7, padding='same')(x)


decoder = Model(latent_inputs, outputs, name='decoder')
decoder.summary()

outputs = decoder(encoder(inputs)[2])
vae = Model(inputs, outputs, name='vae')

from keras.losses import mse

reconstruction_loss = mse(K.flatten(inputs), K.flatten(outputs))
kl_loss = - 0.5 * K.mean(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var))
loss = reconstruction_loss + beta*kl_loss
vae.add_loss(loss)

vae.compile(optimizer='rmsprop', metrics= ['accuracy'])


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_input (InputLayer)      (None, 1024, 5)      0                                            
__________________________________________________________________________________________________
conv1d_19 (Conv1D)              (None, 1024, 64)     2304        encoder_input[0][0]              
__________________________________________________________________________________________________
max_pooling1d_9 (MaxPooling1D)  (None, 512, 64)      0           conv1d_19[0][0]                  
__________________________________________________________________________________________________
conv1d_20 (Conv1D)              (None, 512, 32)      14368       max_pooling1d_9[0][0]            
__________________________________________________________________________________________________
max_poolin

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
z_sampling (InputLayer)      (None, 10)                0         
_________________________________________________________________
Dense_after_sampling (Dense) (None, 512)               5632      
_________________________________________________________________
reshape_3 (Reshape)          (None, 64, 8)             0         
_________________________________________________________________
conv1d_23 (Conv1D)           (None, 64, 8)             456       
_________________________________________________________________
up_sampling1d_9 (UpSampling1 (None, 128, 8)            0         
_________________________________________________________________
conv1d_24 (Conv1D)           (None, 128, 16)           912       
_________________________________________________________________
up_sampling1d_10 (UpSampling (None, 256, 16)           0         
__________

In [15]:
from keras.callbacks import ModelCheckpoint

checkpointer = ModelCheckpoint(filepath="Models/Weights/weights.hdf5", verbose=1,
                               save_best_only=True)

vae.fit(x=trainX_nominal, epochs=120, 
        batch_size=1024,
        validation_data=(valX_nominal,None),
        callbacks=[checkpointer])
vae.load_weights('Models/Weights/weights.hdf5')

# vae.save("Models/Conv1d_VAE_comp.h5")

Train on 500 samples, validate on 10 samples
Epoch 1/120





Epoch 00001: val_loss improved from inf to 4.06110, saving model to Models/Weights/weights.hdf5


Epoch 2/120





Epoch 00002: val_loss improved from 4.06110 to 1.44694, saving model to Models/Weights/weights.hdf5
Epoch 3/120





Epoch 00003: val_loss improved from 1.44694 to 1.11996, saving model to Models/Weights/weights.hdf5
Epoch 4/120





Epoch 00004: val_loss did not improve from 1.11996
Epoch 5/120





Epoch 00005: val_loss did not improve from 1.11996
Epoch 6/120





Epoch 00006: val_loss did not improve from 1.11996
Epoch 7/120





Epoch 00007: val_loss improved from 1.11996 to 1.05841, saving model to Models/Weights/weights.hdf5
Epoch 8/120





Epoch 00008: val_loss improved from 1.05841 to 1.02947, saving model to Models/Weights/weights.hdf5
Epoch 9/120





Epoch 00009: val_loss improved from 1.02947 to 0.95212, saving model to Models/Weights/weights.hdf5
Epoch 10/120





Epoch 00010: val_loss improved from 0.95212 to 0.81452, saving model to Models/Weights/weights.hdf5
Epoch 11/120





Epoch 00011: val_loss improved from 0.81452 to 0.71974, saving model to Models/Weights/weights.hdf5
Epoch 12/120





Epoch 00012: val_loss did not improve from 0.71974
Epoch 13/120





Epoch 00013: val_loss did not improve from 0.71974
Epoch 14/120





Epoch 00014: val_loss did not improve from 0.71974
Epoch 15/120





Epoch 00015: val_loss did not improve from 0.71974
Epoch 16/120





Epoch 00016: val_loss improved from 0.71974 to 0.52368, saving model to Models/Weights/weights.hdf5
Epoch 17/120





Epoch 00017: val_loss did not improve from 0.52368
Epoch 18/120





Epoch 00018: val_loss did not improve from 0.52368
Epoch 19/120





Epoch 00019: val_loss did not improve from 0.52368
Epoch 20/120





Epoch 00020: val_loss did not improve from 0.52368
Epoch 21/120





Epoch 00021: val_loss did not improve from 0.52368
Epoch 22/120





Epoch 00022: val_loss improved from 0.52368 to 0.49732, saving model to Models/Weights/weights.hdf5
Epoch 23/120





Epoch 00023: val_loss did not improve from 0.49732
Epoch 24/120





Epoch 00024: val_loss improved from 0.49732 to 0.43532, saving model to Models/Weights/weights.hdf5
Epoch 25/120





Epoch 00025: val_loss did not improve from 0.43532
Epoch 26/120





Epoch 00026: val_loss did not improve from 0.43532
Epoch 27/120





Epoch 00027: val_loss did not improve from 0.43532
Epoch 28/120





Epoch 00028: val_loss did not improve from 0.43532
Epoch 29/120





Epoch 00029: val_loss did not improve from 0.43532
Epoch 30/120





Epoch 00030: val_loss improved from 0.43532 to 0.42715, saving model to Models/Weights/weights.hdf5
Epoch 31/120





Epoch 00031: val_loss did not improve from 0.42715
Epoch 32/120





Epoch 00032: val_loss did not improve from 0.42715
Epoch 33/120





Epoch 00033: val_loss did not improve from 0.42715
Epoch 34/120





Epoch 00034: val_loss did not improve from 0.42715
Epoch 35/120





Epoch 00035: val_loss improved from 0.42715 to 0.41578, saving model to Models/Weights/weights.hdf5
Epoch 36/120





Epoch 00036: val_loss did not improve from 0.41578
Epoch 37/120





Epoch 00037: val_loss improved from 0.41578 to 0.40205, saving model to Models/Weights/weights.hdf5
Epoch 38/120





Epoch 00038: val_loss did not improve from 0.40205
Epoch 39/120





Epoch 00039: val_loss did not improve from 0.40205
Epoch 40/120





Epoch 00040: val_loss did not improve from 0.40205
Epoch 41/120





Epoch 00041: val_loss did not improve from 0.40205
Epoch 42/120





Epoch 00042: val_loss improved from 0.40205 to 0.36637, saving model to Models/Weights/weights.hdf5
Epoch 43/120





Epoch 00043: val_loss did not improve from 0.36637
Epoch 44/120





Epoch 00044: val_loss did not improve from 0.36637
Epoch 45/120





Epoch 00045: val_loss did not improve from 0.36637
Epoch 46/120





Epoch 00046: val_loss did not improve from 0.36637
Epoch 47/120





Epoch 00047: val_loss improved from 0.36637 to 0.30614, saving model to Models/Weights/weights.hdf5
Epoch 48/120





Epoch 00048: val_loss improved from 0.30614 to 0.29578, saving model to Models/Weights/weights.hdf5
Epoch 49/120





Epoch 00049: val_loss improved from 0.29578 to 0.28637, saving model to Models/Weights/weights.hdf5
Epoch 50/120





Epoch 00050: val_loss did not improve from 0.28637
Epoch 51/120





Epoch 00051: val_loss did not improve from 0.28637
Epoch 52/120





Epoch 00052: val_loss did not improve from 0.28637
Epoch 53/120





Epoch 00053: val_loss improved from 0.28637 to 0.26371, saving model to Models/Weights/weights.hdf5
Epoch 54/120





Epoch 00054: val_loss did not improve from 0.26371
Epoch 55/120





Epoch 00055: val_loss did not improve from 0.26371
Epoch 56/120





Epoch 00056: val_loss did not improve from 0.26371
Epoch 57/120





Epoch 00057: val_loss did not improve from 0.26371
Epoch 58/120





Epoch 00058: val_loss did not improve from 0.26371
Epoch 59/120





Epoch 00059: val_loss did not improve from 0.26371
Epoch 60/120





Epoch 00060: val_loss improved from 0.26371 to 0.26133, saving model to Models/Weights/weights.hdf5
Epoch 61/120





Epoch 00061: val_loss improved from 0.26133 to 0.25846, saving model to Models/Weights/weights.hdf5
Epoch 62/120





Epoch 00062: val_loss did not improve from 0.25846
Epoch 63/120





Epoch 00063: val_loss did not improve from 0.25846
Epoch 64/120





Epoch 00064: val_loss did not improve from 0.25846
Epoch 65/120





Epoch 00065: val_loss improved from 0.25846 to 0.24194, saving model to Models/Weights/weights.hdf5
Epoch 66/120





Epoch 00066: val_loss did not improve from 0.24194
Epoch 67/120





Epoch 00067: val_loss did not improve from 0.24194
Epoch 68/120





Epoch 00068: val_loss did not improve from 0.24194
Epoch 69/120





Epoch 00069: val_loss improved from 0.24194 to 0.22056, saving model to Models/Weights/weights.hdf5
Epoch 70/120





Epoch 00070: val_loss did not improve from 0.22056
Epoch 71/120





Epoch 00071: val_loss did not improve from 0.22056
Epoch 72/120





Epoch 00072: val_loss did not improve from 0.22056
Epoch 73/120





Epoch 00073: val_loss did not improve from 0.22056
Epoch 74/120





Epoch 00074: val_loss did not improve from 0.22056
Epoch 75/120





Epoch 00075: val_loss did not improve from 0.22056
Epoch 76/120





Epoch 00076: val_loss did not improve from 0.22056
Epoch 77/120





Epoch 00077: val_loss did not improve from 0.22056
Epoch 78/120





Epoch 00078: val_loss did not improve from 0.22056
Epoch 79/120





Epoch 00079: val_loss did not improve from 0.22056
Epoch 80/120





Epoch 00080: val_loss did not improve from 0.22056
Epoch 81/120





Epoch 00081: val_loss did not improve from 0.22056
Epoch 82/120





Epoch 00082: val_loss did not improve from 0.22056
Epoch 83/120





Epoch 00083: val_loss did not improve from 0.22056
Epoch 84/120





Epoch 00084: val_loss did not improve from 0.22056
Epoch 85/120





Epoch 00085: val_loss did not improve from 0.22056
Epoch 86/120





Epoch 00086: val_loss did not improve from 0.22056
Epoch 87/120





Epoch 00087: val_loss did not improve from 0.22056
Epoch 88/120





Epoch 00088: val_loss did not improve from 0.22056
Epoch 89/120





Epoch 00089: val_loss improved from 0.22056 to 0.20838, saving model to Models/Weights/weights.hdf5
Epoch 90/120





Epoch 00090: val_loss did not improve from 0.20838
Epoch 91/120





Epoch 00091: val_loss did not improve from 0.20838
Epoch 92/120





Epoch 00092: val_loss did not improve from 0.20838
Epoch 93/120





Epoch 00093: val_loss improved from 0.20838 to 0.20456, saving model to Models/Weights/weights.hdf5
Epoch 94/120





Epoch 00094: val_loss improved from 0.20456 to 0.19689, saving model to Models/Weights/weights.hdf5
Epoch 95/120





Epoch 00095: val_loss did not improve from 0.19689
Epoch 96/120





Epoch 00096: val_loss did not improve from 0.19689
Epoch 97/120





Epoch 00097: val_loss did not improve from 0.19689
Epoch 98/120





Epoch 00098: val_loss did not improve from 0.19689
Epoch 99/120





Epoch 00099: val_loss did not improve from 0.19689
Epoch 100/120





Epoch 00100: val_loss did not improve from 0.19689
Epoch 101/120





Epoch 00101: val_loss did not improve from 0.19689
Epoch 102/120





Epoch 00102: val_loss improved from 0.19689 to 0.19457, saving model to Models/Weights/weights.hdf5
Epoch 103/120





Epoch 00103: val_loss improved from 0.19457 to 0.18912, saving model to Models/Weights/weights.hdf5
Epoch 104/120





Epoch 00104: val_loss improved from 0.18912 to 0.18605, saving model to Models/Weights/weights.hdf5
Epoch 105/120





Epoch 00105: val_loss did not improve from 0.18605
Epoch 106/120





Epoch 00106: val_loss did not improve from 0.18605
Epoch 107/120





Epoch 00107: val_loss did not improve from 0.18605
Epoch 108/120





Epoch 00108: val_loss did not improve from 0.18605
Epoch 109/120





Epoch 00109: val_loss did not improve from 0.18605
Epoch 110/120





Epoch 00110: val_loss did not improve from 0.18605
Epoch 111/120





Epoch 00111: val_loss did not improve from 0.18605
Epoch 112/120





Epoch 00112: val_loss did not improve from 0.18605
Epoch 113/120





Epoch 00113: val_loss did not improve from 0.18605
Epoch 114/120





Epoch 00114: val_loss did not improve from 0.18605
Epoch 115/120





Epoch 00115: val_loss did not improve from 0.18605
Epoch 116/120





Epoch 00116: val_loss did not improve from 0.18605
Epoch 117/120





Epoch 00117: val_loss did not improve from 0.18605
Epoch 118/120





Epoch 00118: val_loss did not improve from 0.18605
Epoch 119/120





Epoch 00119: val_loss did not improve from 0.18605
Epoch 120/120





Epoch 00120: val_loss did not improve from 0.18605


In [119]:
import sys

orig_stdout = sys.stdout
f = open('Logs/out.txt', 'w')
sys.stdout = f
print(encoder.summary())
print(decoder.summary())
sys.stdout = orig_stdout
f.close()


In [6]:

nom_autoenc = vae.predict(trainX_nominal)

autoenc_df = pd.DataFrame(nom_autoenc[0], columns=boat_csv.columns)

plt.plot(boat_csv['Lon'][:1024], boat_csv['Lat'][:1024])
plt.show()
plt.plot(autoenc_df['Lon'], autoenc_df['Lat'])
plt.show()





In [16]:
nom_enc = encoder.predict(trainX_nominal)
print(len(nom_enc), nom_enc[0].shape)

3 (500, 10)


In [38]:
from sklearn.decomposition import PCA

labels = np.array(pd.read_csv("Data/Boat_nominal_data/Boat_mix_labels")['label']) 
nominals = np.squeeze(np.argwhere(labels==1))
anomalous = np.squeeze(np.argwhere(labels==0))
print(type(nominals), type(nominals[0]))

TypeError: only integer scalar arrays can be converted to a scalar index

In [39]:

scaler = StandardScaler()
enc_input = scaler.fit_transform(nom_enc[2])
pca = PCA(n_components=2)
principalComponents = pca.fit_transform(enc_input)
x_val = []
y_val=[]
for i in range(principalComponents.shape[0]):
    x_val.append(principalComponents[i][0])
    y_val.append(principalComponents[i][1])
plt.scatter(x=x_val[nominals],y=y_val[nominals], alpha=0.5)
plt.scatter(x=x_val[anomalous],y=y_val[anomalous], alpha=0.5)
plt.show()


TypeError: only integer scalar arrays can be converted to a scalar index