In [41]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm
from keras.layers import Input, Dense, Lambda, Flatten, Reshape
from keras.layers import Conv1D,UpSampling1D
from keras.models import Model
from keras import backend as K

beta = 10
n_features = 4
boat_csv = pd.read_csv("Data/Boat_nominal_data/Boat_sequences_mix.csv")
boat_csv = boat_csv.drop(columns=["Unnamed: 0", "M0C", "M1C", "Acceleration","Speed"])
scaler = StandardScaler()
normal_data = scaler.fit_transform(boat_csv)
print(normal_data.shape)

boat_val = pd.read_csv("Data/Boat_nominal_data/Boat_sequences_mix_val.csv")
boat_val = boat_val.drop(columns=["Unnamed: 0", "M0C", "M1C", "Acceleration", "Speed"])
scaler = StandardScaler()
val_nom_data = scaler.fit_transform(boat_val)

def prepare_sequences(data, batch_size):
    samples = []
    for i in range(0,data.shape[0]- batch_size, batch_size):
        sample = data[i:i+batch_size]	
        samples.append(sample)
    sequences = np.array(samples)
    trainX = np.reshape(sequences, (len(sequences), batch_size, n_features))
    return trainX


def prepare_data():    
    trainX_nominal = prepare_sequences(normal_data,1024) 
    print(trainX_nominal.shape)
    
    valX_nominal = prepare_sequences(val_nom_data,1024)
    print(valX_nominal.shape)

    return trainX_nominal, valX_nominal

trainX_nominal, valX_nominal = prepare_data()


(513024, 4)
(500, 1024, 4)
(10, 1024, 4)


In [42]:
from keras.layers import MaxPooling1D

input_shape = (1024, n_features)
kernel_size = 3
filters = 64
latent_dim = 10
use_mse = True
load_weights = False


def sampling(args):
    z_mean, z_log_var = args
    batch = K.shape(z_mean)[0]
    dim = K.int_shape(z_mean)[1]
    epsilon = K.random_normal(shape=(batch, dim))
    return z_mean + K.exp(0.5 * z_log_var) * epsilon


inputs = Input(shape=input_shape, name='encoder_input')
x = inputs
for i in range(4):
    x = Conv1D(filters=filters,
               kernel_size=7,
               padding='same')(x)
    x = MaxPooling1D(2)(x)
    filters = int(filters / 2)


shape = K.int_shape(x)

x = Flatten()(x)
z_mean = Dense(latent_dim, name='z_mean')(x)
z_log_var = Dense(latent_dim, name='z_log_var')(x)

z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])

encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
encoder.summary()

latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
x = Dense(shape[1] * shape[2], name='Dense_after_sampling')(latent_inputs)
x = Reshape((shape[1], shape[2]))(x)
filters = filters * 2

for i in range(4):
    x = Conv1D(filters=filters,kernel_size=7, padding='same')(x)
    x = UpSampling1D(size=2)(x)
    filters = filters * 2
    
    
outputs = Conv1D(filters=n_features, kernel_size=7, padding='same')(x)


decoder = Model(latent_inputs, outputs, name='decoder')
decoder.summary()

outputs = decoder(encoder(inputs)[2])
vae = Model(inputs, outputs, name='vae')

from keras.losses import mse

reconstruction_loss = mse(K.flatten(inputs), K.flatten(outputs))
kl_loss = - 0.5 * K.mean(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var))
loss = reconstruction_loss + beta*kl_loss
vae.add_loss(loss)

vae.compile(optimizer='rmsprop', metrics= ['accuracy'])


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_input (InputLayer)      (None, 1024, 4)      0                                            
__________________________________________________________________________________________________
conv1d_46 (Conv1D)              (None, 1024, 64)     1856        encoder_input[0][0]              
__________________________________________________________________________________________________
max_pooling1d_21 (MaxPooling1D) (None, 512, 64)      0           conv1d_46[0][0]                  
__________________________________________________________________________________________________
conv1d_47 (Conv1D)              (None, 512, 32)      14368       max_pooling1d_21[0][0]           
__________________________________________________________________________________________________
max_poolin

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
z_sampling (InputLayer)      (None, 10)                0         
_________________________________________________________________
Dense_after_sampling (Dense) (None, 512)               5632      
_________________________________________________________________
reshape_6 (Reshape)          (None, 64, 8)             0         
_________________________________________________________________
conv1d_50 (Conv1D)           (None, 64, 8)             456       
_________________________________________________________________
up_sampling1d_21 (UpSampling (None, 128, 8)            0         
_________________________________________________________________
conv1d_51 (Conv1D)           (None, 128, 16)           912       
_________________________________________________________________
up_sampling1d_22 (UpSampling (None, 256, 16)           0         
__________

In [43]:
from keras.callbacks import ModelCheckpoint

checkpointer = ModelCheckpoint(filepath="Models/Weights/weights.hdf5", verbose=1,
                               save_best_only=True)

vae.fit(x=trainX_nominal, epochs=120, 
        batch_size=1024,
        validation_data=(valX_nominal,None),
        callbacks=[checkpointer])
vae.load_weights('Models/Weights/weights.hdf5')

# vae.save("Models/Conv1d_VAE_comp.h5")

Train on 500 samples, validate on 10 samples
Epoch 1/120





Epoch 00001: val_loss improved from inf to 9.62690, saving model to Models/Weights/weights.hdf5


Epoch 2/120





Epoch 00002: val_loss improved from 9.62690 to 5.94172, saving model to Models/Weights/weights.hdf5
Epoch 3/120





Epoch 00003: val_loss improved from 5.94172 to 4.76880, saving model to Models/Weights/weights.hdf5
Epoch 4/120





Epoch 00004: val_loss improved from 4.76880 to 2.01750, saving model to Models/Weights/weights.hdf5
Epoch 5/120





Epoch 00005: val_loss did not improve from 2.01750
Epoch 6/120





Epoch 00006: val_loss improved from 2.01750 to 1.88016, saving model to Models/Weights/weights.hdf5
Epoch 7/120





Epoch 00007: val_loss improved from 1.88016 to 1.24080, saving model to Models/Weights/weights.hdf5
Epoch 8/120





Epoch 00008: val_loss improved from 1.24080 to 1.04293, saving model to Models/Weights/weights.hdf5
Epoch 9/120





Epoch 00009: val_loss improved from 1.04293 to 1.01081, saving model to Models/Weights/weights.hdf5
Epoch 10/120





Epoch 00010: val_loss improved from 1.01081 to 1.00472, saving model to Models/Weights/weights.hdf5
Epoch 11/120





Epoch 00011: val_loss did not improve from 1.00472
Epoch 12/120





Epoch 00012: val_loss improved from 1.00472 to 0.97999, saving model to Models/Weights/weights.hdf5
Epoch 13/120





Epoch 00013: val_loss did not improve from 0.97999
Epoch 14/120





Epoch 00014: val_loss improved from 0.97999 to 0.97049, saving model to Models/Weights/weights.hdf5
Epoch 15/120





Epoch 00015: val_loss improved from 0.97049 to 0.95025, saving model to Models/Weights/weights.hdf5
Epoch 16/120





Epoch 00016: val_loss improved from 0.95025 to 0.90931, saving model to Models/Weights/weights.hdf5
Epoch 17/120





Epoch 00017: val_loss improved from 0.90931 to 0.88637, saving model to Models/Weights/weights.hdf5
Epoch 18/120





Epoch 00018: val_loss improved from 0.88637 to 0.87900, saving model to Models/Weights/weights.hdf5
Epoch 19/120





Epoch 00019: val_loss did not improve from 0.87900
Epoch 20/120





Epoch 00020: val_loss did not improve from 0.87900
Epoch 21/120





Epoch 00021: val_loss did not improve from 0.87900
Epoch 22/120





Epoch 00022: val_loss improved from 0.87900 to 0.78463, saving model to Models/Weights/weights.hdf5
Epoch 23/120





Epoch 00023: val_loss improved from 0.78463 to 0.61575, saving model to Models/Weights/weights.hdf5
Epoch 24/120





Epoch 00024: val_loss did not improve from 0.61575
Epoch 25/120





Epoch 00025: val_loss did not improve from 0.61575
Epoch 26/120





Epoch 00026: val_loss did not improve from 0.61575
Epoch 27/120





Epoch 00027: val_loss did not improve from 0.61575
Epoch 28/120





Epoch 00028: val_loss improved from 0.61575 to 0.53815, saving model to Models/Weights/weights.hdf5
Epoch 29/120





Epoch 00029: val_loss improved from 0.53815 to 0.39178, saving model to Models/Weights/weights.hdf5
Epoch 30/120





Epoch 00030: val_loss did not improve from 0.39178
Epoch 31/120





Epoch 00031: val_loss did not improve from 0.39178
Epoch 32/120





Epoch 00032: val_loss did not improve from 0.39178
Epoch 33/120





Epoch 00033: val_loss did not improve from 0.39178
Epoch 34/120





Epoch 00034: val_loss did not improve from 0.39178
Epoch 35/120





Epoch 00035: val_loss did not improve from 0.39178
Epoch 36/120





Epoch 00036: val_loss improved from 0.39178 to 0.36519, saving model to Models/Weights/weights.hdf5
Epoch 37/120





Epoch 00037: val_loss did not improve from 0.36519
Epoch 38/120





Epoch 00038: val_loss did not improve from 0.36519
Epoch 39/120





Epoch 00039: val_loss did not improve from 0.36519
Epoch 40/120





Epoch 00040: val_loss did not improve from 0.36519
Epoch 41/120





Epoch 00041: val_loss did not improve from 0.36519
Epoch 42/120





Epoch 00042: val_loss did not improve from 0.36519
Epoch 43/120





Epoch 00043: val_loss improved from 0.36519 to 0.35688, saving model to Models/Weights/weights.hdf5
Epoch 44/120





Epoch 00044: val_loss improved from 0.35688 to 0.32940, saving model to Models/Weights/weights.hdf5
Epoch 45/120





Epoch 00045: val_loss improved from 0.32940 to 0.32189, saving model to Models/Weights/weights.hdf5
Epoch 46/120





Epoch 00046: val_loss improved from 0.32189 to 0.28929, saving model to Models/Weights/weights.hdf5
Epoch 47/120





Epoch 00047: val_loss did not improve from 0.28929
Epoch 48/120





Epoch 00048: val_loss did not improve from 0.28929
Epoch 49/120





Epoch 00049: val_loss did not improve from 0.28929
Epoch 50/120





Epoch 00050: val_loss did not improve from 0.28929
Epoch 51/120





Epoch 00051: val_loss did not improve from 0.28929
Epoch 52/120





Epoch 00052: val_loss did not improve from 0.28929
Epoch 53/120





Epoch 00053: val_loss did not improve from 0.28929
Epoch 54/120





Epoch 00054: val_loss did not improve from 0.28929
Epoch 55/120





Epoch 00055: val_loss did not improve from 0.28929
Epoch 56/120





Epoch 00056: val_loss did not improve from 0.28929
Epoch 57/120





Epoch 00057: val_loss improved from 0.28929 to 0.28261, saving model to Models/Weights/weights.hdf5
Epoch 58/120





Epoch 00058: val_loss improved from 0.28261 to 0.24978, saving model to Models/Weights/weights.hdf5
Epoch 59/120





Epoch 00059: val_loss did not improve from 0.24978
Epoch 60/120





Epoch 00060: val_loss did not improve from 0.24978
Epoch 61/120





Epoch 00061: val_loss did not improve from 0.24978
Epoch 62/120





Epoch 00062: val_loss did not improve from 0.24978
Epoch 63/120





Epoch 00063: val_loss did not improve from 0.24978
Epoch 64/120





Epoch 00064: val_loss did not improve from 0.24978
Epoch 65/120





Epoch 00065: val_loss improved from 0.24978 to 0.24318, saving model to Models/Weights/weights.hdf5
Epoch 66/120





Epoch 00066: val_loss improved from 0.24318 to 0.22902, saving model to Models/Weights/weights.hdf5
Epoch 67/120





Epoch 00067: val_loss did not improve from 0.22902
Epoch 68/120





Epoch 00068: val_loss did not improve from 0.22902
Epoch 69/120





Epoch 00069: val_loss did not improve from 0.22902
Epoch 70/120





Epoch 00070: val_loss did not improve from 0.22902
Epoch 71/120





Epoch 00071: val_loss did not improve from 0.22902
Epoch 72/120





Epoch 00072: val_loss did not improve from 0.22902
Epoch 73/120





Epoch 00073: val_loss did not improve from 0.22902
Epoch 74/120





Epoch 00074: val_loss did not improve from 0.22902
Epoch 75/120





Epoch 00075: val_loss did not improve from 0.22902
Epoch 76/120





Epoch 00076: val_loss did not improve from 0.22902
Epoch 77/120





Epoch 00077: val_loss did not improve from 0.22902
Epoch 78/120





Epoch 00078: val_loss did not improve from 0.22902
Epoch 79/120





Epoch 00079: val_loss did not improve from 0.22902
Epoch 80/120





Epoch 00080: val_loss did not improve from 0.22902
Epoch 81/120





Epoch 00081: val_loss did not improve from 0.22902
Epoch 82/120





Epoch 00082: val_loss did not improve from 0.22902
Epoch 83/120





Epoch 00083: val_loss did not improve from 0.22902
Epoch 84/120





Epoch 00084: val_loss did not improve from 0.22902
Epoch 85/120





Epoch 00085: val_loss did not improve from 0.22902
Epoch 86/120





Epoch 00086: val_loss did not improve from 0.22902
Epoch 87/120





Epoch 00087: val_loss did not improve from 0.22902
Epoch 88/120





Epoch 00088: val_loss did not improve from 0.22902
Epoch 89/120





Epoch 00089: val_loss did not improve from 0.22902
Epoch 90/120





Epoch 00090: val_loss improved from 0.22902 to 0.22749, saving model to Models/Weights/weights.hdf5
Epoch 91/120





Epoch 00091: val_loss did not improve from 0.22749
Epoch 92/120





Epoch 00092: val_loss did not improve from 0.22749
Epoch 93/120





Epoch 00093: val_loss improved from 0.22749 to 0.22316, saving model to Models/Weights/weights.hdf5
Epoch 94/120





Epoch 00094: val_loss did not improve from 0.22316
Epoch 95/120





Epoch 00095: val_loss did not improve from 0.22316
Epoch 96/120





Epoch 00096: val_loss did not improve from 0.22316
Epoch 97/120





Epoch 00097: val_loss did not improve from 0.22316
Epoch 98/120





Epoch 00098: val_loss did not improve from 0.22316
Epoch 99/120





Epoch 00099: val_loss did not improve from 0.22316
Epoch 100/120





Epoch 00100: val_loss did not improve from 0.22316
Epoch 101/120





Epoch 00101: val_loss improved from 0.22316 to 0.21441, saving model to Models/Weights/weights.hdf5
Epoch 102/120





Epoch 00102: val_loss did not improve from 0.21441
Epoch 103/120





Epoch 00103: val_loss did not improve from 0.21441
Epoch 104/120





Epoch 00104: val_loss did not improve from 0.21441
Epoch 105/120





Epoch 00105: val_loss did not improve from 0.21441
Epoch 106/120





Epoch 00106: val_loss did not improve from 0.21441
Epoch 107/120





Epoch 00107: val_loss did not improve from 0.21441
Epoch 108/120





Epoch 00108: val_loss improved from 0.21441 to 0.19936, saving model to Models/Weights/weights.hdf5
Epoch 109/120





Epoch 00109: val_loss improved from 0.19936 to 0.19460, saving model to Models/Weights/weights.hdf5
Epoch 110/120





Epoch 00110: val_loss improved from 0.19460 to 0.19174, saving model to Models/Weights/weights.hdf5
Epoch 111/120





Epoch 00111: val_loss did not improve from 0.19174
Epoch 112/120





Epoch 00112: val_loss did not improve from 0.19174
Epoch 113/120





Epoch 00113: val_loss did not improve from 0.19174
Epoch 114/120





Epoch 00114: val_loss improved from 0.19174 to 0.18462, saving model to Models/Weights/weights.hdf5
Epoch 115/120





Epoch 00115: val_loss did not improve from 0.18462
Epoch 116/120





Epoch 00116: val_loss did not improve from 0.18462
Epoch 117/120





Epoch 00117: val_loss did not improve from 0.18462
Epoch 118/120





Epoch 00118: val_loss did not improve from 0.18462
Epoch 119/120





Epoch 00119: val_loss did not improve from 0.18462
Epoch 120/120





Epoch 00120: val_loss did not improve from 0.18462


In [119]:
import sys

orig_stdout = sys.stdout
f = open('Logs/out.txt', 'w')
sys.stdout = f
print(encoder.summary())
print(decoder.summary())
sys.stdout = orig_stdout
f.close()


In [44]:

nom_autoenc = vae.predict(trainX_nominal)

autoenc_df = pd.DataFrame(nom_autoenc[0], columns=boat_csv.columns)

plt.plot(boat_csv['Lon'][:1024], boat_csv['Lat'][:1024])
plt.show()
plt.plot(autoenc_df['Lon'], autoenc_df['Lat'])
plt.show()





In [59]:
nom_enc = encoder.predict(trainX_nominal)
print(len(nom_enc), nom_enc[0].shape)

3 (500, 10)


In [60]:
from sklearn.decomposition import PCA

labels = np.array(pd.read_csv("Data/Boat_nominal_data/Boat_mix_labels.csv")['label']) 
nominals = np.squeeze(np.argwhere(labels==1))
anomalous = np.squeeze(np.argwhere(labels==0))
print(type(nominals), type(nominals[0]))



<class 'numpy.ndarray'> <class 'numpy.int64'>


In [64]:


scaler = StandardScaler()
enc_input = scaler.fit_transform(nom_enc[1])
pca = PCA(n_components=2)
principalComponents = pca.fit_transform(enc_input)
x_val = []
y_val=[]
for i in range(principalComponents.shape[0]):
    x_val.append(principalComponents[i][0])
    y_val.append(principalComponents[i][1])
x_val = np.array(x_val)
y_val = np.array(y_val)


plt.scatter(x=x_val[nominals],y=y_val[nominals], alpha=0.5)
plt.scatter(x=x_val[anomalous],y=y_val[anomalous], alpha=0.5)
plt.show()



In [74]:

from sklearn.manifold import TSNE

def tsne(data, title):
    tsne = TSNE(n_components=2, random_state=0)
    
    tsne_obj= tsne.fit_transform(data)
    tsne_df = pd.DataFrame({'X':tsne_obj[:,0],
                            'Y':tsne_obj[:,1],
                            })
    
    plt.scatter(x=tsne_df["X"][nominals],
                y=tsne_df["Y"][nominals], alpha=0.5)
    plt.scatter(x=tsne_df["X"][anomalous],
                y=tsne_df["Y"][anomalous], alpha=0.5)
    plt.title(title)
    plt.show()
    
    return tsne_df

tsne_enc_nom_df = tsne(nom_enc[2], "Decoded Nominal")


