In [20]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm
from keras.layers import Input, Dense, Lambda, Flatten, Reshape
from keras.layers import Conv1D,UpSampling1D
from keras.models import Model
from keras import backend as K

n_features = 4
sequence_length = 1024
boat_csv = pd.read_csv("Data/Boat_nominal_data/Boat_sequences_mix.csv")
boat_csv = boat_csv.drop(columns=["Unnamed: 0", "M0C", "M1C", 
                                  "Acceleration","Speed"])
scaler = StandardScaler()
normal_data = scaler.fit_transform(boat_csv)
print(normal_data.shape)

boat_val = pd.read_csv("Data/Boat_nominal_data/Boat_sequence_mix_val.csv")
boat_val = boat_val.drop(columns=["Unnamed: 0", "M0C", "M1C", 
                                  "Acceleration","Speed"])
scaler = MinMaxScaler(feature_range=(0,1))
val_nom_data = scaler.fit_transform(boat_val)

def prepare_sequences(data, batch_size):
    samples = []
    for i in range(0,data.shape[0], batch_size):
        sample = data[i:i+batch_size]	
        samples.append(sample)
    sequences = np.array(samples)
    trainX = np.reshape(sequences, (len(sequences), batch_size, n_features))
    return trainX


def prepare_data():    
    trainX_nominal = prepare_sequences(normal_data,sequence_length) 
    print(trainX_nominal.shape)
    
    valX_nominal = prepare_sequences(val_nom_data,sequence_length)
    print(valX_nominal.shape)

    return trainX_nominal, valX_nominal

trainX_nominal, valX_nominal = prepare_data()
n_sequences = len(trainX_nominal)
trainX_nominal = np.reshape(trainX_nominal, [n_sequences, sequence_length, n_features , 1])
print(trainX_nominal.shape)

(307200, 4)
(300, 1024, 4)
(30, 1024, 4)
(300, 1024, 4, 1)


In [21]:
from keras.layers import Conv2D, Conv2DTranspose

def sampling(args):
    z_mean, z_log_var = args
    batch = K.shape(z_mean)[0]
    dim = K.int_shape(z_mean)[1]
    # by default, random_normal has mean=0 and std=1.0
    epsilon = K.random_normal(shape=(batch, dim))
    return z_mean + K.exp(0.5 * z_log_var) * epsilon



# network parameters
from keras.losses import mse

input_shape = (sequence_length,n_features,1)
batch_size = 128
kernel_size = 2
filters = 16
latent_dim = 20
epochs = 30

# VAE model = encoder + decoder
# build encoder model
inputs = Input(shape=input_shape, name='encoder_input')
x = inputs
print(K.int_shape(x))
for i in range(2):
    filters *= 2
    x = Conv2D(filters=filters,
               kernel_size=kernel_size,
               strides=2,
               padding='same',
               activation='relu')(x)

# shape info needed to build decoder model
shape = K.int_shape(x)

# generate latent vector Q(z|X)
x = Flatten()(x)
x = Dense(16, activation='relu')(x)
z_mean = Dense(latent_dim, name='z_mean')(x)
z_log_var = Dense(latent_dim, name='z_log_var')(x)

# use reparameterization trick to push the sampling out as input
# note that "output_shape" isn't necessary with the TensorFlow backend
z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])

# instantiate encoder model
encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
encoder.summary()

# build decoder model
latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
x = Dense(shape[1] * shape[2]*shape[3], activation='relu')(latent_inputs)
x = Reshape((shape[1], shape[2], shape[3]))(x)

for i in range(2):
    x = Conv2DTranspose(filters=filters,
                        kernel_size=kernel_size,
                        strides=2,
                        padding='same',
                        activation='relu')(x)


outputs = Conv2DTranspose(filters=1,
                          kernel_size=kernel_size,
                          padding='same')(x)


# instantiate decoder model
decoder = Model(latent_inputs, outputs, name='decoder')
decoder.summary()

# instantiate VAE model
outputs = decoder(encoder(inputs)[1])
vae = Model(inputs, outputs, name='vae')

models = (encoder, decoder)
# data = (x_test, y_test)

# VAE loss = mse_loss or xent_loss + kl_loss
reconstruction_loss = mse(K.flatten(inputs), K.flatten(outputs))


reconstruction_loss *= 1024*4
kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= -0.5
vae_loss = K.mean(reconstruction_loss + kl_loss)
vae.add_loss(vae_loss)
vae.compile(optimizer='rmsprop')
vae.summary()


(None, 1024, 4, 1)
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_input (InputLayer)      (None, 1024, 4, 1)   0                                            
__________________________________________________________________________________________________
conv2d_7 (Conv2D)               (None, 512, 2, 32)   160         encoder_input[0][0]              
__________________________________________________________________________________________________
conv2d_8 (Conv2D)               (None, 256, 1, 64)   8256        conv2d_7[0][0]                   
__________________________________________________________________________________________________
flatten_4 (Flatten)             (None, 16384)        0           conv2d_8[0][0]                   
__________________________________________________________________________________________

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
z_sampling (InputLayer)      (None, 20)                0         
_________________________________________________________________
dense_8 (Dense)              (None, 16384)             344064    
_________________________________________________________________
reshape_4 (Reshape)          (None, 256, 1, 64)        0         
_________________________________________________________________
conv2d_transpose_10 (Conv2DT (None, 512, 2, 64)        16448     
_________________________________________________________________
conv2d_transpose_11 (Conv2DT (None, 1024, 4, 64)       16448     
_________________________________________________________________
conv2d_transpose_12 (Conv2DT (None, 1024, 4, 1)        257       
Total params: 377,217
Trainable params: 377,217
Non-trainable params: 0
_________________________________________________________________


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder_input (InputLayer)   (None, 1024, 4, 1)        0         
_________________________________________________________________
encoder (Model)              [(None, 20), (None, 20),  271256    
_________________________________________________________________
decoder (Model)              (None, 1024, 4, 1)        377217    
Total params: 648,473
Trainable params: 648,473
Non-trainable params: 0
_________________________________________________________________


In [22]:


vae.fit(trainX_nominal,
        epochs=10,
        batch_size=batch_size)
vae.save_weights('Conv2d_vae.h5')


Epoch 1/10








Epoch 2/10








Epoch 3/10








Epoch 4/10








Epoch 5/10








Epoch 6/10








Epoch 7/10








Epoch 8/10








Epoch 9/10








Epoch 10/10








In [23]:
from sklearn.decomposition import PCA

def return_mask(num, labels):
    return np.squeeze(np.argwhere(labels == num))

labels = pd.read_csv("Data/Boat_nominal_data/Boat_mix_labels.csv")
labels = labels.drop(columns="Unnamed: 0")
labels = np.array(labels)

masks = [return_mask(num,labels)[:,0] for num in range(0,9)]


encodings = encoder.predict(trainX_nominal)

def check_z_sampling(encoded_values):
    m = encoded_values[0]
    var = np.exp(0.5*encoded_values[1])
    eps = np.random.normal(0,1,latent_dim)
    
    sampled = []
    index = 0
    var_zero = np.zeros(10)
    for means in m:
        sample = means+var[index]*eps
        #sample = means+var_zero*eps 
        sampled.append(sample)
        index += 1
    
    sampled = np.array(sampled)
    return sampled

# def plot_pca(title, type): 
#     x_val = []
#     y_val= []
#     for i in range(principalComponents.shape[0]):
#         x_val.append(principalComponents[i][0])
#         y_val.append(principalComponents[i][1])
#     x_val = np.array(x_val)
#     y_val = np.array(y_val)
#     
#     for mask in masks:
#         plt.scatter(x=x_val[mask], y=y_val[mask], alpha=0.5)
# 
#     plt.legend(labels=np.arange(0,9))
#     plt.title(str(title)+""+type)
#     plt.show()
#     
titles=['z_mean','z_log_var','z']
for i in range(3):
    latent_values = check_z_sampling(encodings)
    print(encodings[i].shape)
    for mask in masks:
        plt.scatter(x=encodings[i][:, 0][mask], 
                    y=encodings[i][:, 1][mask], alpha=0.5)
    plt.title(titles[i])
    plt.legend(labels=np.arange(0,9))
    plt.show()
    # 
    # scaler = StandardScaler()
    # enc_input = scaler.fit_transform(encodings[i]) 
    # pca = PCA(2)
    # principalComponents = pca.fit_transform(enc_input)
    # plt.scatter(enc_input[0][:, 0], enc_input[0][:, 1])
    # plt.show()
    # principalComponents
    # #print(pca.explained_variance_ratio_)
    # plot_pca(' ', titles[i])





(300, 20)
(300, 20)


(300, 20)


In [7]:
runs = []
for mask in masks:
    run_for_class = trainX_nominal[mask]
    print(run_for_class.shape)
    runs.append(run_for_class)

for j in range(9):    
    for i in runs[j]:
        run = np.reshape(i, (1, 1024,4, 1))
        rec = vae.predict(run)
        rec = np.reshape(rec, (len(trainX_nominal[0]), n_features))
        reconstruction_df = pd.DataFrame(rec, columns=boat_csv.columns)
        
        plt.plot(reconstruction_df["Lon"], reconstruction_df["Lat"])
        #plt.savefig("Imgs/Latent_reconstruction/"+str(title)+".png")
        plt.show()
        break
    
    

(619, 1024, 4, 1)
(42, 1024, 4, 1)
(42, 1024, 4, 1)
(51, 1024, 4, 1)
(49, 1024, 4, 1)
(59, 1024, 4, 1)
(42, 1024, 4, 1)
(48, 1024, 4, 1)
(48, 1024, 4, 1)
