In [51]:
from keras.layers import *
from keras import Model
import pandas as pd
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from keras import Sequential
import keras as kr
import keras.losses as losses

# Setup the network parameters:
original_dim = 7
input_shape = (original_dim,)
intermediate_dim = 300
batch_size = 128
latent_dim = 30
beta = 5
epochs = 5


boat_csv = pd.read_csv("Data/Boat_nominal_data/Boat_nominal_long.csv")
boat_csv = boat_csv.drop(columns=["Unnamed: 0"])
scaler = StandardScaler()
normal_data = scaler.fit_transform(boat_csv)
print(normal_data.shape)

boat_curved = pd.read_csv("Data/Anomalous_boat_data.csv")
boat_curved= boat_curved.drop(columns=["Unnamed: 0", "heading"])    
scaler = StandardScaler()
anomalous_data = scaler.fit_transform(boat_curved)
print(anomalous_data.shape)



def create_vaes(n, beta):
    vae_list = []
    for i in range(n): 
        # Map inputs to the latent distribution parameters:
        # VAE model = encoder + decoder
        # build encoder model
        inputs = Input(shape=input_shape, name='encoder_input')
        x = Dense(intermediate_dim, activation='relu')(inputs)
        z_mean = Dense(latent_dim, name='z_mean')(x)
        z_log_var = Dense(latent_dim, name='z_log_var')(x)
        
        # Use those parameters to sample new points from the latent space:
        # reparameterization trick
        # instead of sampling from Q(z|X), sample epsilon = N(0,I)
        # z = z_mean + sqrt(var) * epsilon
        def sampling(args):
            """Reparameterization trick by sampling from an isotropic unit Gaussian.
            # Arguments
                args (tensor): mean and log of variance of Q(z|X)
            # Returns
                z (tensor): sampled latent vector
            """
        
            z_mean, z_log_var = args
            batch = K.shape(z_mean)[0]
            dim = K.int_shape(z_mean)[1]
            # by default, random_normal has mean = 0 and std = 1.0
            epsilon = K.random_normal(shape=(batch, dim))
            return z_mean + K.exp(0.5 * z_log_var) * epsilon
          
        # use reparameterization trick to push the sampling out as input
        # note that "output_shape" isn't necessary with the TensorFlow backend
        z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])
        
        # Instantiate the encoder model:
        encoder = Model(inputs, z_mean) #z_mean
        
        # Build the decoder model:
        latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
        x = Dense(intermediate_dim, activation='relu')(latent_inputs)
        outputs = Dense(original_dim)(x)
        
        # Instantiate the decoder model:
        decoder = Model(latent_inputs, outputs, name='decoder')
        
        # Instantiate the VAE model:
        outputs = decoder(encoder(inputs))
        vae = Model(inputs, outputs, name='vae_mlp')
        
        # As in the Keras tutorial, we define a custom loss function:
        
        def vae_loss(x, x_decoded_mean):
            xent_loss = losses.mean_squared_error(x, x_decoded_mean)
            kl_loss = - 0.5 * K.mean(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), 
                                     axis=-1)
            return xent_loss + beta*kl_loss
        
        # We compile the model:
        vae.compile(optimizer='adam', loss=vae_loss, metrics=['accuracy'])
        
        # Finally, we train the model:
        results = vae.fit(normal_data, normal_data,
                shuffle=True,
                epochs=epochs,
                batch_size=batch_size)
        vae_list.append((vae, encoder, decoder))
        print(i)
        
    return vae_list

print("Done")

vae_list = create_vaes(n=1, beta=beta)

print("done")

def check_disentanglement(weight_modifiers):
    nom_enc = []
    for i, elem in enumerate(vae_list):
         print("ENCODING:", i+1)
         nom_enc.append(elem[1].predict(normal_data))
         
    # print(len(nom_enc))
    
    # print(len(nom_enc), nom_enc[0].shape)
    nom_enc_modified = nom_enc
    ##Modifiing values of the 1 neuron
    for i, elem in enumerate(nom_enc_modified):
        nom_enc_modified[i] = elem * weight_modifiers
    # print(len(nom_enc_modified), nom_enc_modified[0].shape)
    
    
    
    nom_dec = []
    for i, elem in enumerate(vae_list):
         print("DECODING:", i+1)
         nom_dec.append(elem[2].predict(nom_enc_modified[i]))
         
    # print(len(nom_dec),nom_dec[0].shape)
    
    df_nominal = pd.DataFrame(normal_data, columns=boat_csv.columns)
    
    for i, elem in enumerate(nom_dec):
        df = pd.DataFrame(elem, columns=boat_csv.columns)
    
        plt.subplot(421)
        plt.plot(df["Lon"][:5739])
        plt.plot(df_nominal["Lon"][:5739], 'r')
        plt.title("Longitude")
        
        plt.subplot(422)
        plt.plot(df["Lat"][:5739])
        plt.plot(df_nominal["Lat"][:5739], 'r')
        plt.title("latitude")
        
        plt.subplot(423)
        plt.plot(df["Speed"][:5739])
        plt.plot(df_nominal["Speed"][:5739], 'r')
        plt.title("Speed")        

        plt.subplot(424)
        plt.plot(df["Acceleration"][:5739])
        plt.plot(df_nominal["Acceleration"][:5739], 'r')
        plt.title("Acceleration")         
        
        plt.subplot(425)
        plt.plot(df["Degrees"][:5739])
        plt.plot(df_nominal["Degrees"][:5739], 'r')
        plt.title("Degrees")           
        
        plt.subplot(426)
        plt.plot(df["M0C"][:5739])
        plt.plot(df_nominal["M0C"][:5739], 'r')
        plt.title("M0C")
                
        plt.subplot(427)
        plt.plot(df["M1C"][:5739])
        plt.plot(df_nominal["M1C"][:5739], 'r')
        plt.title("M1C")
         
        plt.show()
        
        plt.subplot(421)
        plt.plot(df["Lon"][:5739] - df_nominal["Lon"][:5739])
        plt.title("Longitude_difference")
        
        plt.subplot(422)
        plt.plot(df["Lat"][:5739] - df_nominal["Lat"][:5739])
        plt.title("latitude_difference")
        
        plt.subplot(423)
        plt.plot(df["Speed"][:5739] - df_nominal["Speed"][:5739])
        plt.title("Speed_difference")        

        plt.subplot(424)
        plt.plot(df["Acceleration"][:5739] - df_nominal["Acceleration"][:5739])
        plt.title("Acceleration_difference")         
        
        plt.subplot(425)
        plt.plot(df["Degrees"][:5739] - df_nominal["Degrees"][:5739])
        plt.title("Degrees_difference")           
        
        plt.subplot(426)
        plt.plot(df["M0C"][:5739] - df_nominal["M0C"][:5739])
        plt.title("M0C_difference")
                
        plt.subplot(427)
        plt.plot(df["M1C"][:5739] - df_nominal["M1C"][:5739])
        plt.title("M1C_difference")
        
        plt.show()

(63129, 7)
(6620, 7)
Done


Epoch 1/5


  128/63129 [..............................] - ETA: 16:41 - loss: 1.0969 - acc: 0.1328

 1280/63129 [..............................] - ETA: 1:40 - loss: 0.9980 - acc: 0.3172 

 2432/63129 [>.............................] - ETA: 53s - loss: 0.8715 - acc: 0.3890 

 3584/63129 [>.............................] - ETA: 36s - loss: 0.7577 - acc: 0.4601

 4736/63129 [=>............................] - ETA: 27s - loss: 0.6607 - acc: 0.5224

 5888/63129 [=>............................] - ETA: 22s - loss: 0.5802 - acc: 0.5667

 7040/63129 [==>...........................] - ETA: 18s - loss: 0.5154 - acc: 0.6043

 8192/63129 [==>...........................] - ETA: 16s - loss: 0.4631 - acc: 0.6392

 9344/63129 [===>..........................] - ETA: 14s - loss: 0.4200 - acc: 0.6699

10496/63129 [===>..........................] - ETA: 12s - loss: 0.3843 - acc: 0.6964

11392/63129 [====>.........................] - ETA: 11s - loss: 0.3607 - acc: 0.7144

12544/63129 [====>.........................] - ETA: 10s - loss: 0.3345 - acc: 0.7333

13696/63129 [=====>........................] - ETA: 9s - loss: 0.3118 - acc: 0.7504 

























































































Epoch 2/5
  128/63129 [..............................] - ETA: 8s - loss: 0.0127 - acc: 0.9766

 1024/63129 [..............................] - ETA: 4s - loss: 0.0121 - acc: 0.9639

 2048/63129 [..............................] - ETA: 3s - loss: 0.0120 - acc: 0.9585

 3200/63129 [>.............................] - ETA: 3s - loss: 0.0119 - acc: 0.9559

 4352/63129 [=>............................] - ETA: 3s - loss: 0.0118 - acc: 0.9577

 5632/63129 [=>............................] - ETA: 2s - loss: 0.0116 - acc: 0.9597

 6784/63129 [==>...........................] - ETA: 2s - loss: 0.0115 - acc: 0.9602

 7936/63129 [==>...........................] - ETA: 2s - loss: 0.0114 - acc: 0.9596

 9088/63129 [===>..........................] - ETA: 2s - loss: 0.0113 - acc: 0.9602

10240/63129 [===>..........................] - ETA: 2s - loss: 0.0112 - acc: 0.9610

11392/63129 [====>.........................] - ETA: 2s - loss: 0.0112 - acc: 0.9620

12544/63129 [====>.........................] - ETA: 2s - loss: 0.0110 - acc: 0.9618

13568/63129 [=====>........................] - ETA: 2s - loss: 0.0110 - acc: 0.9623

14720/63129 [=====>........................] - ETA: 2s - loss: 0.0109 - acc: 0.9622



























































































Epoch 3/5
  128/63129 [..............................] - ETA: 7s - loss: 0.0066 - acc: 0.9766

 1152/63129 [..............................] - ETA: 3s - loss: 0.0065 - acc: 0.9635

 2304/63129 [>.............................] - ETA: 3s - loss: 0.0065 - acc: 0.9661

 3328/63129 [>.............................] - ETA: 3s - loss: 0.0064 - acc: 0.9691

 4480/63129 [=>............................] - ETA: 2s - loss: 0.0064 - acc: 0.9708

 5632/63129 [=>............................] - ETA: 2s - loss: 0.0062 - acc: 0.9700

 6784/63129 [==>...........................] - ETA: 2s - loss: 0.0061 - acc: 0.9708

 7936/63129 [==>...........................] - ETA: 2s - loss: 0.0061 - acc: 0.9704

 9088/63129 [===>..........................] - ETA: 2s - loss: 0.0060 - acc: 0.9711

10240/63129 [===>..........................] - ETA: 2s - loss: 0.0060 - acc: 0.9719

11392/63129 [====>.........................] - ETA: 2s - loss: 0.0059 - acc: 0.9721

12544/63129 [====>.........................] - ETA: 2s - loss: 0.0059 - acc: 0.9721

13696/63129 [=====>........................] - ETA: 2s - loss: 0.0059 - acc: 0.9723

14720/63129 [=====>........................] - ETA: 2s - loss: 0.0058 - acc: 0.9728























































































Epoch 4/5
  128/63129 [..............................] - ETA: 7s - loss: 0.0042 - acc: 0.9844

  768/63129 [..............................] - ETA: 5s - loss: 0.0039 - acc: 0.9766

 1664/63129 [..............................] - ETA: 4s - loss: 0.0040 - acc: 0.9754

 2176/63129 [>.............................] - ETA: 5s - loss: 0.0040 - acc: 0.9756

 2688/63129 [>.............................] - ETA: 5s - loss: 0.0040 - acc: 0.9773

 3328/63129 [>.............................] - ETA: 5s - loss: 0.0040 - acc: 0.9787

 4224/63129 [=>............................] - ETA: 4s - loss: 0.0041 - acc: 0.9796

 5376/63129 [=>............................] - ETA: 4s - loss: 0.0041 - acc: 0.9779

 6272/63129 [=>............................] - ETA: 4s - loss: 0.0041 - acc: 0.9775

 7168/63129 [==>...........................] - ETA: 3s - loss: 0.0040 - acc: 0.9767

 8192/63129 [==>...........................] - ETA: 3s - loss: 0.0041 - acc: 0.9764

 9216/63129 [===>..........................] - ETA: 3s - loss: 0.0041 - acc: 0.9769

10368/63129 [===>..........................] - ETA: 3s - loss: 0.0041 - acc: 0.9776

11136/63129 [====>.........................] - ETA: 3s - loss: 0.0040 - acc: 0.9775

11648/63129 [====>.........................] - ETA: 3s - loss: 0.0040 - acc: 0.9778

12288/63129 [====>.........................] - ETA: 3s - loss: 0.0040 - acc: 0.9775

13312/63129 [=====>........................] - ETA: 3s - loss: 0.0040 - acc: 0.9779

14464/63129 [=====>........................] - ETA: 3s - loss: 0.0039 - acc: 0.9782





























































































































Epoch 5/5
  128/63129 [..............................] - ETA: 12s - loss: 0.0043 - acc: 0.9922

  640/63129 [..............................] - ETA: 7s - loss: 0.0037 - acc: 0.9812 

 1408/63129 [..............................] - ETA: 6s - loss: 0.0035 - acc: 0.9830

 2560/63129 [>.............................] - ETA: 4s - loss: 0.0035 - acc: 0.9805

 3328/63129 [>.............................] - ETA: 4s - loss: 0.0034 - acc: 0.9805

 4096/63129 [>.............................] - ETA: 4s - loss: 0.0033 - acc: 0.9814

 4736/63129 [=>............................] - ETA: 4s - loss: 0.0033 - acc: 0.9816

 5376/63129 [=>............................] - ETA: 4s - loss: 0.0032 - acc: 0.9814

 5760/63129 [=>............................] - ETA: 4s - loss: 0.0032 - acc: 0.9821

 6272/63129 [=>............................] - ETA: 4s - loss: 0.0032 - acc: 0.9825

 6912/63129 [==>...........................] - ETA: 4s - loss: 0.0031 - acc: 0.9831

 7808/63129 [==>...........................] - ETA: 4s - loss: 0.0031 - acc: 0.9819

 8832/63129 [===>..........................] - ETA: 4s - loss: 0.0030 - acc: 0.9820

 9600/63129 [===>..........................] - ETA: 4s - loss: 0.0030 - acc: 0.9821

10240/63129 [===>..........................] - ETA: 4s - loss: 0.0030 - acc: 0.9819

11008/63129 [====>.........................] - ETA: 3s - loss: 0.0030 - acc: 0.9818

12160/63129 [====>.........................] - ETA: 3s - loss: 0.0029 - acc: 0.9818

12928/63129 [=====>........................] - ETA: 3s - loss: 0.0029 - acc: 0.9821

13696/63129 [=====>........................] - ETA: 3s - loss: 0.0029 - acc: 0.9822

14336/63129 [=====>........................] - ETA: 3s - loss: 0.0029 - acc: 0.9821































































































































0
done


In [53]:

weight_modifiers = np.ones(shape=(latent_dim,))
weight_modifiers[-1] = 1
check_disentanglement(weight_modifiers)


ENCODING: 1


DECODING: 1


In [None]:

columns = ["C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9", "C10"]
for i, elem in enumerate(nom_enc):
    column = "C1"
    df_nominal = pd.DataFrame(elem, columns=columns)
    for j in columns:
        plt.plot(df_nominal[j])
    plt.title("Column = "+str(column) + " Beta = "+ str(beta_values[i]))
    plt.show()


In [114]:
autoencoded_nominal = vae.predict(normal_data)
print(autoencoded_nominal.shape)
print("NOMINAL BEHAVIOUR PREDICTED")
autoencoded_anomalous = vae.predict(anomalous_data)
print(autoencoded_anomalous.shape)
print("ANOMALOUS BEHAVIOUR PREDICTED")


In [115]:
def calculate_nominal_reconstr_error(n_nominals):
    r_err_nom_seq = []
    SIGMAs = []
    for i in range(0,len(normal_data), 5739):
        nominal_data = normal_data[i:i+5739]
        enc_nominal = autoencoded_nominal[i:i+5739]
        diff = abs(nominal_data - enc_nominal)
        r_err_nom_seq += [diff]
        SIGMAs.append(np.cov(r_err_nom_seq[-1], rowvar=False))        
        
    r_err_nom_seq = np.array(r_err_nom_seq)
    r_err_nom_seq = np.mean(r_err_nom_seq, axis=0)
    mu = np.mean(r_err_nom_seq, axis=0)
    SIGMAs = np.array(SIGMAs)
    SIGMA = np.mean(SIGMAs, axis=0)
    
    print(mu.shape)
    print(SIGMA.shape)
    
    return r_err_nom_seq,mu, SIGMA

def calculate_anom_reconstr_error():
    return abs(anomalous_data - autoencoded_anomalous)
    
       
nom_errors, MU, SIGMA = calculate_nominal_reconstr_error(10)
anom_error = calculate_anom_reconstr_error()
print(anom_error.shape)


In [124]:
def calculate_anomaly_score_sequence(i, element):
    weights = [1, 1, 1, 1, 1, 100, 100]
    score = np.dot(np.transpose(element-MU)*weights, 
                   np.dot(SIGMA, 
                  (element-MU)*weights))          
    return score


def get_anom_scores_nominal():
    anom_scores = []
    for i, element in enumerate(nom_errors):
        anom_scores.append(calculate_anomaly_score_sequence(i, element))
    return anom_scores


def get_anom_scores():
    anom_scores = []
    for i, element in enumerate(anom_error):
        anom_scores.append(calculate_anomaly_score_sequence(i, element))
    return anom_scores


scores_list = np.array(get_anom_scores())
nom_scores_list = np.array(get_anom_scores_nominal())
#plt.plot(scores_list)
plt.plot(nom_scores_list)
plt.show()

#score_difference = abs(scores_list - nom_scores_list)
# plt.plot(score_difference)
# plt.show()
print("done")


In [125]:
def detectOutliers(x, outlierConstant):
    a = np.array(x)
    upper_quartile = np.percentile(a, 75)
    lower_quartile = np.percentile(a, 25)
    IQR = (upper_quartile - lower_quartile) * outlierConstant
    quartileSet = (lower_quartile - IQR, upper_quartile + IQR)
    resultList = []
    outlierList = []
    
    list = a.tolist()
    for y in range(len(list)):
        if list[y] >= quartileSet[0] and list[y] <= quartileSet[1]:
            resultList.append(list[y])
        else:
            outlierList.append((y,list[y]))
            resultList.append(list[y-1])
    return  outlierList


outliers_anomalous_list = detectOutliers(scores_list,
                                         outlierConstant=10)

print(len(outliers_anomalous_list))
print("Done")


In [126]:


an_csv = pd.read_csv("Data/Anomalous_boat_data.csv")
an_csv = an_csv.drop(columns=["Unnamed: 0"])

plt.plot(an_csv["longitude"], an_csv["latitude"])
plt.title("Nominal anoamalies points Autoenc_Big")
for i in outliers_anomalous_list:
    anomaly_position = i[0]
    plt.plot(an_csv["longitude"][anomaly_position],
             an_csv["latitude"][anomaly_position], 'bo')

plt.show()

In [23]:
df_nominal = pd.DataFrame(autoencoded_nominal, columns=boat_csv.columns)
df_anomalous = pd.DataFrame(autoencoded_anomalous, columns=boat_csv.columns)

plt.plot(df_nominal["Lon"][:5739], df_nominal["Lat"][:5739])
plt.show()

plt.plot(df_anomalous["Lon"], df_anomalous["Lat"])
plt.show()

