In [8]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm
from keras.layers import Input, Dense, Lambda, Flatten, Reshape
from keras.layers import Conv1D,UpSampling1D
from keras.models import Model
from keras import backend as K

n_features = 4
boat_csv = pd.read_csv("Data/Boat_nominal_data/Boat_sequences_mix.csv")
boat_csv = boat_csv.drop(columns=["Unnamed: 0", "M0C", "M1C", "Acceleration","Speed"])
scaler = StandardScaler()
normal_data = scaler.fit_transform(boat_csv)
print(normal_data.shape)

boat_val = pd.read_csv("Data/Boat_nominal_data/Boat_sequence_mix_val.csv")
boat_val = boat_val.drop(columns=["Unnamed: 0", "M0C", "M1C", "Acceleration","Speed"])
scaler = StandardScaler()
val_nom_data = scaler.fit_transform(boat_val)

def prepare_sequences(data, batch_size):
    samples = []
    for i in range(0,data.shape[0], batch_size):
        sample = data[i:i+batch_size]	
        samples.append(sample)
    sequences = np.array(samples)
    trainX = np.reshape(sequences, (len(sequences), batch_size, n_features))
    return trainX


def prepare_data():    
    trainX_nominal = prepare_sequences(normal_data,1024) 
    print(trainX_nominal.shape)
    
    valX_nominal = prepare_sequences(val_nom_data,1024)
    print(valX_nominal.shape)

    return trainX_nominal, valX_nominal

trainX_nominal, valX_nominal = prepare_data()


(512000, 4)
(500, 1024, 4)
(10, 1024, 4)


In [10]:
from keras.layers import MaxPooling1D

input_shape = (1024, n_features)
kernel_size = 3
latent_dim = 10
use_mse = True   
load_weights = False

def create_vae(beta):
    filters = 64
    def sampling(args):
        z_mean, z_log_var = args
        batch = K.shape(z_mean)[0]
        dim = K.int_shape(z_mean)[1]
        epsilon = K.random_normal(shape=(batch, dim))
        return z_mean + K.exp(0.5 * z_log_var) * epsilon
    
    
    inputs = Input(shape=input_shape, name='encoder_input')
    x = inputs
    for i in range(4):
        x = Conv1D(filters=filters,
                   kernel_size=7,
                   padding='same')(x)
        x = MaxPooling1D(2)(x)
        filters = int(filters / 2)
    
    
    shape = K.int_shape(x)
    
    x = Flatten()(x)
    z_mean = Dense(latent_dim, name='z_mean')(x)
    z_log_var = Dense(latent_dim, name='z_log_var')(x)
    
    z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])
    
    encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
    #encoder.summary()
    
    latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
    x = Dense(shape[1] * shape[2], name='Dense_after_sampling')(latent_inputs)
    x = Reshape((shape[1], shape[2]))(x)
    filters = filters * 2

    for i in range(4):
        x = Conv1D(filters=filters,kernel_size=7, padding='same')(x)
        x = UpSampling1D(size=2)(x)
        filters = filters * 2
        
        
    outputs = Conv1D(filters=n_features, kernel_size=7, padding='same')(x)
    
    
    decoder = Model(latent_inputs, outputs, name='decoder')
    #decoder.summary()
    
    outputs = decoder(encoder(inputs)[2])
    vae = Model(inputs, outputs, name='vae')
    
    from keras.losses import mse
    
    reconstruction_loss = mse(K.flatten(inputs), K.flatten(outputs))
    kl_loss = - 0.5 * K.mean(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var))
    loss = reconstruction_loss + 3*kl_loss
    vae.add_loss(loss)
    
    vae.compile(optimizer='rmsprop', metrics= ['accuracy'])

    return (vae, encoder, decoder)



In [11]:
from keras.callbacks import ModelCheckpoint
betas = np.linspace(1,1,1)
print(betas)

vaes = []
for i, beta in enumerate(betas):
    print("Creating VAE with beta=", betas[i])
    vaes.append(create_vae(beta))
for i in range(len(vaes)):
    checkpointer = ModelCheckpoint(filepath="Models/Weights/Nominal_weights.hdf5",
                                   verbose=1, save_best_only=True)
    print("FITTING Vae with beta =", betas[i])
    vaes[i][0].fit(x=trainX_nominal, epochs=100, 
            batch_size=1024,
            validation_data=(valX_nominal,None),
            callbacks=[checkpointer])
    vaes[i][0].load_weights('Models/Weights/Nominal_weights.hdf5')


[1.]
Creating VAE with beta= 1.0


FITTING Vae with beta = 1.0


Train on 500 samples, validate on 10 samples
Epoch 1/100





Epoch 00001: val_loss improved from inf to 4.55245, saving model to Models/Weights/Nominal_weights.hdf5


Epoch 2/100





Epoch 00002: val_loss improved from 4.55245 to 2.09912, saving model to Models/Weights/Nominal_weights.hdf5
Epoch 3/100





Epoch 00003: val_loss improved from 2.09912 to 1.95416, saving model to Models/Weights/Nominal_weights.hdf5


Epoch 4/100





Epoch 00004: val_loss improved from 1.95416 to 1.68929, saving model to Models/Weights/Nominal_weights.hdf5
Epoch 5/100





Epoch 00005: val_loss improved from 1.68929 to 1.28887, saving model to Models/Weights/Nominal_weights.hdf5
Epoch 6/100





Epoch 00006: val_loss improved from 1.28887 to 1.07753, saving model to Models/Weights/Nominal_weights.hdf5
Epoch 7/100





Epoch 00007: val_loss improved from 1.07753 to 1.05262, saving model to Models/Weights/Nominal_weights.hdf5
Epoch 8/100





Epoch 00008: val_loss did not improve from 1.05262
Epoch 9/100





Epoch 00009: val_loss did not improve from 1.05262
Epoch 10/100





Epoch 00010: val_loss did not improve from 1.05262
Epoch 11/100





Epoch 00011: val_loss improved from 1.05262 to 1.02763, saving model to Models/Weights/Nominal_weights.hdf5
Epoch 12/100





Epoch 00012: val_loss improved from 1.02763 to 0.99175, saving model to Models/Weights/Nominal_weights.hdf5
Epoch 13/100





Epoch 00013: val_loss improved from 0.99175 to 0.98969, saving model to Models/Weights/Nominal_weights.hdf5
Epoch 14/100





Epoch 00014: val_loss improved from 0.98969 to 0.94969, saving model to Models/Weights/Nominal_weights.hdf5
Epoch 15/100





Epoch 00015: val_loss improved from 0.94969 to 0.92921, saving model to Models/Weights/Nominal_weights.hdf5
Epoch 16/100





Epoch 00016: val_loss improved from 0.92921 to 0.88624, saving model to Models/Weights/Nominal_weights.hdf5
Epoch 17/100





Epoch 00017: val_loss improved from 0.88624 to 0.77280, saving model to Models/Weights/Nominal_weights.hdf5
Epoch 18/100





Epoch 00018: val_loss improved from 0.77280 to 0.71862, saving model to Models/Weights/Nominal_weights.hdf5
Epoch 19/100





Epoch 00019: val_loss did not improve from 0.71862
Epoch 20/100





Epoch 00020: val_loss did not improve from 0.71862
Epoch 21/100





Epoch 00021: val_loss improved from 0.71862 to 0.64893, saving model to Models/Weights/Nominal_weights.hdf5
Epoch 22/100





Epoch 00022: val_loss improved from 0.64893 to 0.60423, saving model to Models/Weights/Nominal_weights.hdf5
Epoch 23/100





Epoch 00023: val_loss did not improve from 0.60423
Epoch 24/100





Epoch 00024: val_loss did not improve from 0.60423
Epoch 25/100





Epoch 00025: val_loss improved from 0.60423 to 0.49525, saving model to Models/Weights/Nominal_weights.hdf5
Epoch 26/100





Epoch 00026: val_loss improved from 0.49525 to 0.37223, saving model to Models/Weights/Nominal_weights.hdf5


Epoch 27/100





Epoch 00027: val_loss did not improve from 0.37223
Epoch 28/100





Epoch 00028: val_loss did not improve from 0.37223
Epoch 29/100





Epoch 00029: val_loss did not improve from 0.37223
Epoch 30/100





Epoch 00030: val_loss did not improve from 0.37223
Epoch 31/100





Epoch 00031: val_loss did not improve from 0.37223
Epoch 32/100





Epoch 00032: val_loss improved from 0.37223 to 0.34812, saving model to Models/Weights/Nominal_weights.hdf5


Epoch 33/100





Epoch 00033: val_loss improved from 0.34812 to 0.34033, saving model to Models/Weights/Nominal_weights.hdf5
Epoch 34/100





Epoch 00034: val_loss improved from 0.34033 to 0.33373, saving model to Models/Weights/Nominal_weights.hdf5
Epoch 35/100





Epoch 00035: val_loss did not improve from 0.33373
Epoch 36/100





Epoch 00036: val_loss improved from 0.33373 to 0.31742, saving model to Models/Weights/Nominal_weights.hdf5
Epoch 37/100





Epoch 00037: val_loss improved from 0.31742 to 0.30514, saving model to Models/Weights/Nominal_weights.hdf5
Epoch 38/100





Epoch 00038: val_loss did not improve from 0.30514
Epoch 39/100





Epoch 00039: val_loss did not improve from 0.30514
Epoch 40/100





Epoch 00040: val_loss did not improve from 0.30514
Epoch 41/100





Epoch 00041: val_loss did not improve from 0.30514
Epoch 42/100





Epoch 00042: val_loss did not improve from 0.30514
Epoch 43/100





Epoch 00043: val_loss did not improve from 0.30514
Epoch 44/100





Epoch 00044: val_loss did not improve from 0.30514
Epoch 45/100





Epoch 00045: val_loss improved from 0.30514 to 0.29153, saving model to Models/Weights/Nominal_weights.hdf5
Epoch 46/100





Epoch 00046: val_loss did not improve from 0.29153
Epoch 47/100





Epoch 00047: val_loss did not improve from 0.29153
Epoch 48/100





Epoch 00048: val_loss improved from 0.29153 to 0.27142, saving model to Models/Weights/Nominal_weights.hdf5
Epoch 49/100





Epoch 00049: val_loss improved from 0.27142 to 0.25473, saving model to Models/Weights/Nominal_weights.hdf5


Epoch 50/100





Epoch 00050: val_loss did not improve from 0.25473
Epoch 51/100





Epoch 00051: val_loss did not improve from 0.25473
Epoch 52/100





Epoch 00052: val_loss did not improve from 0.25473
Epoch 53/100





Epoch 00053: val_loss did not improve from 0.25473
Epoch 54/100





Epoch 00054: val_loss did not improve from 0.25473
Epoch 55/100





Epoch 00055: val_loss did not improve from 0.25473
Epoch 56/100





Epoch 00056: val_loss did not improve from 0.25473
Epoch 57/100





Epoch 00057: val_loss improved from 0.25473 to 0.24225, saving model to Models/Weights/Nominal_weights.hdf5
Epoch 58/100





Epoch 00058: val_loss did not improve from 0.24225
Epoch 59/100





Epoch 00059: val_loss did not improve from 0.24225
Epoch 60/100





Epoch 00060: val_loss did not improve from 0.24225
Epoch 61/100





Epoch 00061: val_loss did not improve from 0.24225
Epoch 62/100





Epoch 00062: val_loss did not improve from 0.24225
Epoch 63/100





Epoch 00063: val_loss did not improve from 0.24225
Epoch 64/100





Epoch 00064: val_loss did not improve from 0.24225
Epoch 65/100





Epoch 00065: val_loss did not improve from 0.24225
Epoch 66/100





Epoch 00066: val_loss did not improve from 0.24225
Epoch 67/100





Epoch 00067: val_loss improved from 0.24225 to 0.22705, saving model to Models/Weights/Nominal_weights.hdf5
Epoch 68/100





Epoch 00068: val_loss improved from 0.22705 to 0.22542, saving model to Models/Weights/Nominal_weights.hdf5


Epoch 69/100





Epoch 00069: val_loss did not improve from 0.22542
Epoch 70/100





Epoch 00070: val_loss did not improve from 0.22542
Epoch 71/100





Epoch 00071: val_loss did not improve from 0.22542
Epoch 72/100





Epoch 00072: val_loss did not improve from 0.22542
Epoch 73/100





Epoch 00073: val_loss did not improve from 0.22542
Epoch 74/100





Epoch 00074: val_loss improved from 0.22542 to 0.22362, saving model to Models/Weights/Nominal_weights.hdf5
Epoch 75/100





Epoch 00075: val_loss did not improve from 0.22362
Epoch 76/100





Epoch 00076: val_loss improved from 0.22362 to 0.22118, saving model to Models/Weights/Nominal_weights.hdf5
Epoch 77/100





Epoch 00077: val_loss improved from 0.22118 to 0.21495, saving model to Models/Weights/Nominal_weights.hdf5
Epoch 78/100





Epoch 00078: val_loss did not improve from 0.21495
Epoch 79/100





Epoch 00079: val_loss did not improve from 0.21495
Epoch 80/100





Epoch 00080: val_loss did not improve from 0.21495
Epoch 81/100





Epoch 00081: val_loss improved from 0.21495 to 0.21182, saving model to Models/Weights/Nominal_weights.hdf5
Epoch 82/100





Epoch 00082: val_loss did not improve from 0.21182
Epoch 83/100





Epoch 00083: val_loss did not improve from 0.21182
Epoch 84/100





Epoch 00084: val_loss did not improve from 0.21182
Epoch 85/100





Epoch 00085: val_loss did not improve from 0.21182
Epoch 86/100





Epoch 00086: val_loss improved from 0.21182 to 0.20464, saving model to Models/Weights/Nominal_weights.hdf5
Epoch 87/100





Epoch 00087: val_loss improved from 0.20464 to 0.20231, saving model to Models/Weights/Nominal_weights.hdf5
Epoch 88/100





Epoch 00088: val_loss improved from 0.20231 to 0.19739, saving model to Models/Weights/Nominal_weights.hdf5
Epoch 89/100





Epoch 00089: val_loss did not improve from 0.19739
Epoch 90/100





Epoch 00090: val_loss did not improve from 0.19739
Epoch 91/100





Epoch 00091: val_loss did not improve from 0.19739
Epoch 92/100





Epoch 00092: val_loss did not improve from 0.19739
Epoch 93/100





Epoch 00093: val_loss improved from 0.19739 to 0.19446, saving model to Models/Weights/Nominal_weights.hdf5
Epoch 94/100





Epoch 00094: val_loss improved from 0.19446 to 0.19393, saving model to Models/Weights/Nominal_weights.hdf5
Epoch 95/100





Epoch 00095: val_loss did not improve from 0.19393
Epoch 96/100





Epoch 00096: val_loss did not improve from 0.19393
Epoch 97/100





Epoch 00097: val_loss did not improve from 0.19393
Epoch 98/100





Epoch 00098: val_loss did not improve from 0.19393
Epoch 99/100





Epoch 00099: val_loss improved from 0.19393 to 0.19005, saving model to Models/Weights/Nominal_weights.hdf5
Epoch 100/100





Epoch 00100: val_loss improved from 0.19005 to 0.18973, saving model to Models/Weights/Nominal_weights.hdf5


In [12]:
encodings = []

for i in vaes:
    encodings.append(i[1].predict(trainX_nominal))


labels = pd.read_csv("Data/Boat_nominal_data/Boat_mix_labels.csv")
labels = labels.drop(columns="Unnamed: 0")
labels = np.array(labels)

mat_mask = np.array([labels for i in range(latent_dim)])
print(mat_mask.shape)

def get_neuron_values(encoding):
    neurons_m = []
    neurons_var = []
    for i in range(latent_dim):
        neurons_m.append(encoding[0][:, i])
        neurons_var.append(encoding[1][:, i])
    
    for i in range(latent_dim):
        plt.plot(neurons_m[i])
    plt.show()
    
    return (neurons_m, neurons_var)

neuron_values = []
for i in encodings:
 neuron_values.append(get_neuron_values(i))


(10, 500, 1)


In [68]:

def nominal_parameters(n_m, n_var):
    neuron_avg_nom = np.ma.array(n_m, mask=np.logical_not(mat_mask))
    neuron_avg_nom = np.mean(neuron_avg_nom, axis=1)
    neuron_var_nom = np.ma.array(n_var, mask=np.logical_not(mat_mask))
    neuron_var_nom = np.mean(neuron_var_nom, axis=1)
     
    neuron_avg_anom = np.ma.array(n_m, mask=mat_mask)
    neuron_avg_anom = np.mean(neuron_avg_anom, axis=1)
    neuron_var_anom = np.ma.array(n_var, mask=mat_mask)
    neuron_var_anom = np.mean(neuron_var_anom, axis=1)
    
    return (neuron_avg_nom, neuron_var_nom, neuron_avg_anom, neuron_var_anom)

enc_values = []
for i in neuron_values:
    enc_values.append(nominal_parameters(i[0], i[1]))
print(len(enc_values))

10


In [69]:
def visualize_difference(avg_n, var_n, avg_an, var_an):
    plt.plot(avg_n)
    plt.plot(avg_an)
    plt.title("MEAN")
    plt.show()
    
    # plt.plot(var_n)
    # plt.plot(var_an)
    # plt.title("STD")
    # plt.show()


for i in enc_values:
    visualize_difference(i[0], i[1], i[2], i[3])


In [8]:
boat_anom_csv = pd.read_csv("Data/Boat_anom_sequence.csv")
boat_anom_csv = boat_anom_csv.drop(columns=["Unnamed: 0","Speed", "Acceleration",
                                            "M0C", "M1C"])
anom_data_norm = scaler.fit_transform(boat_anom_csv)
anom_data_norm = np.reshape(anom_data_norm, (1, len(anom_data_norm), n_features))
anom_data_encoding = np.squeeze(encoder.predict(anom_data_norm))

anom_means = anom_data_encoding[0]
anom_var = anom_data_encoding[1]
print(anom_means, anom_var)


[ 0.08811088  0.1255463   0.01701382 -0.08566583  0.1815099  -0.00088965] [ 0.16391774  0.00667964 -0.08026419 -0.01931977 -0.09087465  0.04685581]


In [130]:
def visualize_latent_sampled(sample_nom, sample_anom):
    plt.plot(sample_nom)    
    plt.plot(sample_anom)
    plt.title("SAMPLED DIFFERENCES")
    plt.show()


def reconstruct_from_latent_vectors(latents, model_index):
    latents = np.reshape(latents, (1,len(latents)))
    run = vaes[model_index][2].predict(latents)
    run_df = pd.DataFrame(run[0], columns=boat_csv.columns)
    
    plt.plot(run_df["Lon"])#, run_df["Lat"])
    plt.plot(run_df["Lat"])
    plt.show()
    

epsilon = np.random.normal(0,1)
sample_nominal = enc_values[0][0] + np.exp(0.5*enc_values[0][1])*epsilon
sample_anomalous = enc_values[0][2] + np.exp(0.5*enc_values[0][3])*epsilon





In [131]:
changes = np.linspace(0,6,50)
neur = 0
for i in changes:
    sample_nominal[neur] = sample_anomalous[neur]*i
    #visualize_latent_sampled(sample_nominal, sample_anomalous)
    reconstruct_from_latent_vectors(sample_nominal,0)
# reconstruct_from_latent_vectors(sample_anomalous)


In [13]:

from sklearn.decomposition import PCA

labels = np.array(pd.read_csv("Data/Boat_nominal_data/Boat_mix_labels.csv")['label']) 
nominals = np.squeeze(np.argwhere(labels==1))
anomalous = np.squeeze(np.argwhere(labels==0))
print(type(nominals), type(nominals[0]))


titles = ["Mean", "Std", "Sampled"]

for i in encodings[0]:
    scaler = StandardScaler()
    enc_input = scaler.fit_transform(i)
    pca = PCA(n_components=2)
    principalComponents = pca.fit_transform(enc_input)
    x_val = []
    y_val=[]
    for i in range(principalComponents.shape[0]):
        x_val.append(principalComponents[i][0])
        y_val.append(principalComponents[i][1])
    x_val = np.array(x_val)
    y_val = np.array(y_val)
    
    
    plt.scatter(x=x_val[nominals],y=y_val[nominals], alpha=0.5)
    plt.scatter(x=x_val[anomalous],y=y_val[anomalous], alpha=0.5)
    plt.show()


from sklearn.manifold import TSNE

def tsne(data, title):
    tsne = TSNE(n_components=2, random_state=0)
    
    tsne_obj= tsne.fit_transform(data)
    tsne_df = pd.DataFrame({'X':tsne_obj[:,0],
                            'Y':tsne_obj[:,1],
                            })
    
    plt.scatter(x=tsne_df["X"][nominals],
                y=tsne_df["Y"][nominals], alpha=0.5)
    plt.scatter(x=tsne_df["X"][anomalous],
                y=tsne_df["Y"][anomalous], alpha=0.5)
    plt.title(title)
    plt.show()
    
    return tsne_df

for i, encode in enumerate(encodings[0]):
    tsne_enc_nom_df = tsne(encode, titles[i])


<class 'numpy.ndarray'> <class 'numpy.int64'>


In [46]:
import scipy.cluster.hierarchy as shc
from sklearn.cluster import AgglomerativeClustering

plt.figure(figsize=(10, 7))
plt.title("Customer Dendograms")
dend_nom = shc.dendrogram(shc.linkage(tsne_enc_nom_df, method='ward'))


In [83]:
n_clusters = 2
cluster = AgglomerativeClustering(n_clusters=n_clusters, 
                                  affinity='euclidean',
                                  linkage='ward')
cl_nom = cluster.fit_predict(tsne_enc_nom_df)
print(cl_nom)

print(labels)


In [87]:
x_val_nom = []
y_val_nom = []

x_val_anom = []
y_val_anom = []
for i, xCoord in enumerate(tsne_enc_nom_df['X']):
    if cl_nom[i] == 0:
        x_val_nom.append(xCoord)
        y_val_nom.append(tsne_enc_nom_df['Y'][i])
    else:
        x_val_anom.append(xCoord)
        y_val_anom.append(tsne_enc_nom_df['Y'][i])

plt.scatter(x=x_val_nom,
            y=y_val_nom, alpha=0.5)
plt.scatter(x=x_val_anom,
            y=y_val_anom, alpha=0.5)
plt.show()
        
    



