In [5]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm
from keras.layers import Input, Dense, Lambda, Flatten, Reshape
from keras.layers import Conv1D,UpSampling1D
from keras.models import Model
from keras import backend as K

n_features = 4
sequence_length = 1024
boat_csv = pd.read_csv("Data/Boat_nominal_data/Boat_sequences_mix.csv")
boat_csv = boat_csv.drop(columns=["Unnamed: 0", "M0C", "M1C", 
                                  "Acceleration","Speed"])
scaler = StandardScaler()
normal_data = scaler.fit_transform(boat_csv)
print(normal_data.shape)

boat_val = pd.read_csv("Data/Boat_nominal_data/Boat_sequence_mix_val.csv")
boat_val = boat_val.drop(columns=["Unnamed: 0", "M0C", "M1C", 
                                  "Acceleration","Speed"])
scaler = MinMaxScaler(feature_range=(0,1))
val_nom_data = scaler.fit_transform(boat_val)

def prepare_sequences(data, batch_size):
    samples = []
    for i in range(0,data.shape[0], batch_size):
        sample = data[i:i+batch_size]	
        samples.append(sample)
    sequences = np.array(samples)
    trainX = np.reshape(sequences, (len(sequences), batch_size, n_features))
    return trainX


def prepare_data():    
    trainX_nominal = prepare_sequences(normal_data,sequence_length) 
    print(trainX_nominal.shape)
    
    valX_nominal = prepare_sequences(val_nom_data,sequence_length)
    print(valX_nominal.shape)

    return trainX_nominal, valX_nominal

trainX_nominal, valX_nominal = prepare_data()
n_sequences = len(trainX_nominal)


(307200, 4)
(300, 1024, 4)
(30, 1024, 4)


In [17]:
from keras import Sequential
from keras.layers import MaxPooling1D, RepeatVector, LSTM
from keras_preprocessing.sequence import TimeseriesGenerator

input_shape = (1024, n_features)
latent_dim = 20
use_mse = True   
load_weights = False

def sampling(args):
    # Reparameterization trick by sampling from an isotropic unit Gaussian.
    # # Arguments
    #     args (tensor): mean and log of variance of Q(z|X)
    # # Returns
    #     z (tensor): sampled latent vector
    z_mean, z_log_var = args
    batch = K.shape(z_mean)[0]
    dim = K.int_shape(z_mean)[1]
    epsilon = K.random_normal(shape=(batch, dim, latent_dim))
    print(epsilon.shape)
    return z_mean + K.exp(0.5 * z_log_var) * epsilon

units = len(trainX_nominal)
inputs = Input(shape=(1024,n_features), name='Enc_input')
x = inputs
for i in range(2):
    x = Dense(units=units,activation='relu')(x)

embeddings = Dense(30, name="embeddings")(x)
z_mean = Dense(latent_dim, name='z_mean')(x)
z_log_var = Dense(latent_dim, name='z_log_var')(x)


z = Lambda(sampling, name='z')([z_mean, z_log_var])

# instantiate encoder model
encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
encoder.summary()

# build decoder model
latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
x = Dense(30, activation='relu')(latent_inputs)

for i in range(2):
    x = Dense(units=units,activation='relu')(x)
    
outputs = Dense(4)(x)

# instantiate decoder model
decoder = Model(latent_inputs, outputs, name='decoder')
decoder.summary()

# instantiate VAE model
outputs = decoder(encoder(inputs)[2])
vae = Model(inputs, outputs, name='vae_mlp')

vae.compile(optimizer='rmsprop', loss='mse', metrics= ['accuracy'])


(?, 1024, 20)
(?, 1024, 20)
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Enc_input (InputLayer)          (None, 1024, 4)      0                                            
__________________________________________________________________________________________________
dense_27 (Dense)                (None, 1024, 300)    1500        Enc_input[0][0]                  
__________________________________________________________________________________________________
dense_28 (Dense)                (None, 1024, 300)    90300       dense_27[0][0]                   
__________________________________________________________________________________________________
z_mean (Dense)                  (None, 1024, 20)     6020        dense_28[0][0]                   
_________________________________________________________________________________

In [18]:
vae.fit(trainX_nominal,
        trainX_nominal,
        epochs=50,
        batch_size=128)

vae.save_weights('Vae_time_series.h5')


Epoch 1/50








Epoch 2/50








Epoch 3/50








Epoch 4/50








Epoch 5/50








Epoch 6/50








Epoch 7/50








Epoch 8/50








Epoch 9/50








Epoch 10/50








Epoch 11/50








Epoch 12/50








Epoch 13/50








Epoch 14/50








Epoch 15/50








Epoch 16/50








Epoch 17/50








Epoch 18/50








Epoch 19/50








Epoch 20/50








Epoch 21/50








Epoch 22/50








Epoch 23/50








Epoch 24/50








Epoch 25/50








Epoch 26/50








Epoch 27/50








Epoch 28/50








Epoch 29/50








Epoch 30/50








Epoch 31/50








Epoch 32/50








Epoch 33/50








Epoch 34/50








Epoch 35/50








Epoch 36/50








Epoch 37/50








Epoch 38/50








Epoch 39/50








Epoch 40/50








Epoch 41/50








Epoch 42/50








Epoch 43/50








Epoch 44/50








Epoch 45/50








Epoch 46/50








Epoch 47/50








Epoch 48/50








Epoch 49/50








Epoch 50/50








In [19]:

def return_mask(num, labels):
    return np.squeeze(np.argwhere(labels == num))

labels = pd.read_csv("Data/Boat_nominal_data/Boat_mix_labels.csv")
labels = labels.drop(columns="Unnamed: 0")
labels = np.array(labels)

masks = [return_mask(num,labels)[:,0] for num in range(0,9)]


encodings = encoder.predict(trainX_nominal)

def check_z_sampling(encoded_values):
    m = encoded_values[0]
    var = np.exp(0.5*encoded_values[1])
    eps = np.random.normal(0,1,latent_dim)
    
    sampled = []
    index = 0
    for means in m:
        sample = means+var[index]*eps
        sampled.append(sample)
        index += 1
    
    sampled = np.array(sampled)
    return sampled

titles=['z_mean','z_log_var','z']
for i in range(3):
    latent_values = check_z_sampling(encodings)
    print(encodings[i].shape)
    for mask in masks:
        plt.scatter(x=encodings[i][:, 0][mask], 
                    y=encodings[i][:, 1][mask], alpha=0.5)
    plt.title(titles[i])
    plt.legend(labels=np.arange(0,9))
    plt.show()
    # 

(300, 1024, 20)


(300, 1024, 20)


(300, 1024, 20)


In [20]:
runs = []
for mask in masks:
    run_for_class = trainX_nominal[mask]
    print(run_for_class.shape)
    runs.append(run_for_class)

for j in range(9):    
    for i in runs[j]:
        run = np.reshape(i, (1, 1024,4))
        rec = vae.predict(run)
        rec = np.reshape(rec, (len(trainX_nominal[0]), n_features))
        reconstruction_df = pd.DataFrame(rec, columns=boat_csv.columns)
        
        plt.plot(reconstruction_df["Lon"], reconstruction_df["Lat"])
        #plt.savefig("Imgs/Latent_reconstruction/"+str(title)+".png")
        plt.show()
        break
    
    

(200, 1024, 4)
(9, 1024, 4)
(9, 1024, 4)
(14, 1024, 4)
(14, 1024, 4)
(13, 1024, 4)
(13, 1024, 4)
(15, 1024, 4)
(13, 1024, 4)
