In [21]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm
from keras.layers import Input, Dense, Lambda, Flatten, Reshape
from keras.layers import Conv1D,UpSampling1D
from keras.models import Model
from keras import backend as K

n_features = 4
boat_csv = pd.read_csv("Data/Boat_nominal_data/Boat_sequences_mix.csv")
boat_csv = boat_csv.drop(columns=["Unnamed: 0", "M0C", "M1C", "Acceleration","Speed"])
scaler = StandardScaler()
normal_data = scaler.fit_transform(boat_csv)
print(normal_data.shape)

boat_val = pd.read_csv("Data/Boat_nominal_data/Boat_sequence_mix_val.csv")
boat_val = boat_val.drop(columns=["Unnamed: 0", "M0C", "M1C", "Acceleration","Speed"])
scaler = StandardScaler()
val_nom_data = scaler.fit_transform(boat_val)

def prepare_sequences(data, batch_size):
    samples = []
    for i in range(0,data.shape[0], batch_size):
        sample = data[i:i+batch_size]	
        samples.append(sample)
    sequences = np.array(samples)
    trainX = np.reshape(sequences, (len(sequences), batch_size, n_features))
    return trainX


def prepare_data():    
    trainX_nominal = prepare_sequences(normal_data,1024) 
    print(trainX_nominal.shape)
    
    valX_nominal = prepare_sequences(val_nom_data,1024)
    print(valX_nominal.shape)

    return trainX_nominal, valX_nominal

trainX_nominal, valX_nominal = prepare_data()


(204800, 4)
(200, 1024, 4)
(10, 1024, 4)


In [11]:
from keras.layers import MaxPooling1D

input_shape = (1024, n_features)
kernel_size = 3
filters = 64
latent_dim = 6
use_mse = True
load_weights = False


def sampling(args):
    z_mean, z_log_var = args
    batch = K.shape(z_mean)[0]
    dim = K.int_shape(z_mean)[1]
    epsilon = K.random_normal(shape=(batch, dim))
    return z_mean + K.exp(0.5 * z_log_var) * epsilon


inputs = Input(shape=input_shape, name='encoder_input')
x = inputs
for i in range(4):
    x = Conv1D(filters=filters,
               kernel_size=7,
               padding='same')(x)
    x = MaxPooling1D(2)(x)
    filters = int(filters / 2)


shape = K.int_shape(x)

x = Flatten()(x)
z_mean = Dense(latent_dim, name='z_mean')(x)
z_log_var = Dense(latent_dim, name='z_log_var')(x)

z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])

encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
encoder.summary()

latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
x = Dense(shape[1] * shape[2], name='Dense_after_sampling')(latent_inputs)
x = Reshape((shape[1], shape[2]))(x)
filters = filters * 2

for i in range(4):
    x = Conv1D(filters=filters,kernel_size=7, padding='same')(x)
    x = UpSampling1D(size=2)(x)
    filters = filters * 2
    
    
outputs = Conv1D(filters=n_features, kernel_size=7, padding='same')(x)


decoder = Model(latent_inputs, outputs, name='decoder')
decoder.summary()

outputs = decoder(encoder(inputs)[2])
vae = Model(inputs, outputs, name='vae')

from keras.losses import mse

reconstruction_loss = mse(K.flatten(inputs), K.flatten(outputs))
kl_loss = - 0.5 * K.mean(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var))
loss = reconstruction_loss + 1*kl_loss
vae.add_loss(loss)

vae.compile(optimizer='rmsprop', metrics= ['accuracy'])


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_input (InputLayer)      (None, 1024, 4)      0                                            
__________________________________________________________________________________________________
conv1d_19 (Conv1D)              (None, 1024, 64)     1856        encoder_input[0][0]              
__________________________________________________________________________________________________
max_pooling1d_9 (MaxPooling1D)  (None, 512, 64)      0           conv1d_19[0][0]                  
__________________________________________________________________________________________________
conv1d_20 (Conv1D)              (None, 512, 32)      14368       max_pooling1d_9[0][0]            
__________________________________________________________________________________________________
max_poolin

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
z_sampling (InputLayer)      (None, 6)                 0         
_________________________________________________________________
Dense_after_sampling (Dense) (None, 512)               3584      
_________________________________________________________________
reshape_3 (Reshape)          (None, 64, 8)             0         
_________________________________________________________________
conv1d_23 (Conv1D)           (None, 64, 8)             456       
_________________________________________________________________
up_sampling1d_9 (UpSampling1 (None, 128, 8)            0         
_________________________________________________________________
conv1d_24 (Conv1D)           (None, 128, 16)           912       
_________________________________________________________________
up_sampling1d_10 (UpSampling (None, 256, 16)           0         
__________

In [25]:
from keras.callbacks import ModelCheckpoint

# checkpointer = ModelCheckpoint(filepath="Models/Weights/Nominal_weights.hdf5", verbose=1,
#                                save_best_only=True)
# vae.fit(x=trainX_nominal, epochs=100, 
#         batch_size=1024,
#         validation_data=(valX_nominal,None),
#         callbacks=[checkpointer])
vae.load_weights('Models/Weights/Nominal_weights.hdf5')



In [None]:

nom_autoenc = vae.predict(trainX_nominal)

autoenc_df = pd.DataFrame(nom_autoenc[0], columns=boat_csv.columns)

plt.plot(boat_csv['Lon'][:1024], boat_csv['Lat'][:1024])
plt.show()
plt.plot(autoenc_df['Lon'], autoenc_df['Lat'])
plt.show()


[[-0.23604478 -0.0450823   0.03794606 -0.01927547 -0.21131809  0.07257051]] [[-0.02129864 -0.00735012  0.02170739 -0.00085766 -0.05373051  0.09675059]]


In [42]:
nom_enc = encoder.predict(trainX_nominal)
print(len(nom_enc), nom_enc[0].shape)

labels = pd.read_csv("Data/Boat_nominal_data/Boat_mix_labels.csv")
labels = labels.drop(columns="Unnamed: 0")
labels = np.array(labels)

mat_mask = np.array([labels for i in range(latent_dim)])
print(mat_mask.shape)

def get_neuron_values():
    neurons_m = []
    neurons_var = []
    for i in range(latent_dim):
        neurons_m.append(nom_enc[0][:, i])
        neurons_var.append(nom_enc[1][:, i])
    
    for i in range(latent_dim):
        plt.plot(neurons_m[i])
    plt.show()
    
    return neurons_m, neurons_var


neurons_m, neurons_var = get_neuron_values()


In [51]:

def nominal_parameters():
    neuron_avg_nom = np.ma.array(neurons_m, mask=np.logical_not(mat_mask))
    neuron_avg_nom = np.mean(neuron_avg_nom, axis=1)
    neuron_var_nom = np.ma.array(neurons_var, mask=np.logical_not(mat_mask))
    neuron_var_nom = np.mean(neuron_var_nom, axis=1)
     
    neuron_avg_anom = np.ma.array(neurons_m, mask=mat_mask)
    neuron_avg_anom = np.mean(neuron_avg_anom, axis=1)
    neuron_var_anom = np.ma.array(neurons_var, mask=mat_mask)
    neuron_var_anom = np.mean(neuron_var_anom, axis=1)
    
    return neuron_avg_nom, neuron_var_nom, neuron_avg_anom, neuron_var_anom


n_avg_nom, n_var_nom, n_avg_anom, n_var_anom = nominal_parameters()
print(len(n_avg_nom), len(n_var_nom), len(n_avg_nom), len(n_var_anom))


In [52]:
def visualize_difference(avg_n, var_n, avg_an, var_an):
    plt.plot(avg_n)
    plt.plot(avg_an)
    plt.title("MEAN")
    plt.show()
    
    plt.plot(var_n)
    plt.plot(var_an)
    plt.title("STD")
    plt.show()

visualize_difference(n_avg_nom, n_var_nom, n_avg_anom, n_var_anom)

In [8]:
boat_anom_csv = pd.read_csv("Data/Boat_anom_sequence.csv")
boat_anom_csv = boat_anom_csv.drop(columns=["Unnamed: 0","Speed", "Acceleration",
                                            "M0C", "M1C"])
anom_data_norm = scaler.fit_transform(boat_anom_csv)
anom_data_norm = np.reshape(anom_data_norm, (1, len(anom_data_norm), n_features))
anom_data_encoding = np.squeeze(encoder.predict(anom_data_norm))

anom_means = anom_data_encoding[0]
anom_var = anom_data_encoding[1]
print(anom_means, anom_var)


[ 0.08811088  0.1255463   0.01701382 -0.08566583  0.1815099  -0.00088965] [ 0.16391774  0.00667964 -0.08026419 -0.01931977 -0.09087465  0.04685581]


In [85]:
def visualize_latent_sampled(sample_nom, sample_anom):
    plt.plot(sample_nom)    
    plt.plot(sample_anom)
    plt.title("SAMPLED DIFFERENCES")
    plt.show()


def reconstruct_from_latent_vectors(latents):
    latents = np.reshape(latents, (1,len(latents)))
    run = decoder.predict(latents)
    run_df = pd.DataFrame(run[0], columns=boat_csv.columns)
    
    plt.plot(run_df["Sin"])
    plt.plot(run_df["Lat"])
    plt.show()
    

epsilon = np.random.normal(0,1)
sample_nominal = neuron_avg + np.exp(0.5*neuron_var)*epsilon
sample_anomalous = anom_means + np.exp(0.5*anom_var)*epsilon


In [86]:
changes = np.linspace(0,10,50)
neur = 5
for i in changes:
    sample_nominal[neur] = sample_anomalous[neur]*i
    #visualize_latent_sampled(sample_nominal, sample_anomalous)
    reconstruct_from_latent_vectors(sample_nominal)
# reconstruct_from_latent_vectors(sample_anomalous)


In [9]:

from sklearn.decomposition import PCA

labels = np.array(pd.read_csv("Data/Boat_nominal_data/Boat_mix_labels.csv")['label']) 
nominals = np.squeeze(np.argwhere(labels==1))
anomalous = np.squeeze(np.argwhere(labels==0))
print(type(nominals), type(nominals[0]))


titles = ["Mean", "Std", "Sampled"]

for i in nom_enc:
    scaler = StandardScaler()
    enc_input = scaler.fit_transform(i)
    pca = PCA(n_components=2)
    principalComponents = pca.fit_transform(enc_input)
    x_val = []
    y_val=[]
    for i in range(principalComponents.shape[0]):
        x_val.append(principalComponents[i][0])
        y_val.append(principalComponents[i][1])
    x_val = np.array(x_val)
    y_val = np.array(y_val)
    
    
    plt.scatter(x=x_val[nominals],y=y_val[nominals], alpha=0.5)
    plt.scatter(x=x_val[anomalous],y=y_val[anomalous], alpha=0.5)
    plt.show()


from sklearn.manifold import TSNE

def tsne(data, title):
    tsne = TSNE(n_components=2, random_state=0)
    
    tsne_obj= tsne.fit_transform(data)
    tsne_df = pd.DataFrame({'X':tsne_obj[:,0],
                            'Y':tsne_obj[:,1],
                            })
    
    plt.scatter(x=tsne_df["X"][nominals],
                y=tsne_df["Y"][nominals], alpha=0.5)
    plt.scatter(x=tsne_df["X"][anomalous],
                y=tsne_df["Y"][anomalous], alpha=0.5)
    plt.title(title)
    plt.show()
    
    return tsne_df

for i in range(len(nom_enc)):
    tsne_enc_nom_df = tsne(nom_enc[i], titles[i] )


<class 'numpy.ndarray'> <class 'numpy.int64'>


In [46]:
import scipy.cluster.hierarchy as shc
from sklearn.cluster import AgglomerativeClustering

plt.figure(figsize=(10, 7))
plt.title("Customer Dendograms")
dend_nom = shc.dendrogram(shc.linkage(tsne_enc_nom_df, method='ward'))


In [83]:
n_clusters = 2
cluster = AgglomerativeClustering(n_clusters=n_clusters, 
                                  affinity='euclidean',
                                  linkage='ward')
cl_nom = cluster.fit_predict(tsne_enc_nom_df)
print(cl_nom)

print(labels)


In [87]:
x_val_nom = []
y_val_nom = []

x_val_anom = []
y_val_anom = []
for i, xCoord in enumerate(tsne_enc_nom_df['X']):
    if cl_nom[i] == 0:
        x_val_nom.append(xCoord)
        y_val_nom.append(tsne_enc_nom_df['Y'][i])
    else:
        x_val_anom.append(xCoord)
        y_val_anom.append(tsne_enc_nom_df['Y'][i])

plt.scatter(x=x_val_nom,
            y=y_val_nom, alpha=0.5)
plt.scatter(x=x_val_anom,
            y=y_val_anom, alpha=0.5)
plt.show()
        
    



