In [2]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm
from keras.layers import Input, Dense, Lambda, Flatten, Reshape
from keras.layers import Conv1D
from keras.models import Model
from keras import backend as K
import tensorflow as tf

n_features = 4
sequence_length = 180
boat_csv = pd.read_csv("Data/Boat_nominal_data/Boat_sequences_mix.csv")
boat_csv = boat_csv.drop(columns=["Unnamed: 0"])
scaler = MinMaxScaler(feature_range=(0,1))
normal_data = scaler.fit_transform(boat_csv)
print(normal_data.shape)
boat_val = pd.read_csv("Data/Boat_nominal_data/Boat_sequence_mix_val.csv")
boat_val = boat_val.drop(columns=["Unnamed: 0"])
scaler = MinMaxScaler(feature_range=(0,1))
val_nom_data = scaler.fit_transform(boat_val)
print(normal_data.shape, val_nom_data.shape)


def prepare_sequences(data, batch_size):
    samples = []
    for i in range(0,data.shape[0], batch_size):
        sample = data[i:i+batch_size]	
        samples.append(sample)
    sequences = np.array(samples)
    trainX = np.reshape(sequences, (len(sequences), batch_size, n_features))
    return trainX


def prepare_data():    
    trainX_nominal = prepare_sequences(normal_data, sequence_length) 
    print(trainX_nominal.shape)
    
    valX_nominal = prepare_sequences(val_nom_data, sequence_length)
    print(valX_nominal.shape)

    return trainX_nominal, valX_nominal


trainX_nominal, valX_nominal = prepare_data()
n_sequences = len(trainX_nominal)


Using TensorFlow backend.


(54000, 4)
(54000, 4) (5400, 4)
(300, 180, 4)
(30, 180, 4)


In [3]:
from keras.layers import Conv2DTranspose, ZeroPadding1D
from keras.losses import mse

intermediate_dimensions = 50
latent_dim = 10
filters = 50


def Conv1DTranspose(input_tensor, filters, kernel_size,last, strides=2, padding='same'):
        if last:
            activation = 'linear'
        else:
            activation = 'relu'
        x = Lambda(lambda x: K.expand_dims(x, axis=2))(input_tensor)
        x = Conv2DTranspose(filters=filters, kernel_size=(kernel_size, 1), 
                            strides=(strides, 1), padding=padding,
                            activation=activation)(x)
        x = Lambda(lambda x: K.squeeze(x, axis=2))(x)
        return x


def sampling(args):
        z_mean, z_log_var = args
        batch = K.shape(z_mean)[0]
        dim = K.int_shape(z_mean)[1]
        epsilon = K.random_normal(shape=(batch, dim))
        return z_mean + K.exp(0.5 * z_log_var) * epsilon

def create_vae(verbose):
    inputs = Input(shape=(sequence_length, n_features))
    #x = ZeroPadding1D()
    x = inputs
    
    for i in range(2):
        x = Conv1D(filters=filters,
                   kernel_size=2,
                   strides=2,
                   padding='same')(x)
    
    shape = K.int_shape(x)
    x = Flatten()(x)
    # x = Conv1D(filters=50,
    #            kernel_size=2,
    #            strides=2,
    #            padding='same')(x)
   
    #x = Reshape((shape[1]*shape[2],))(x)
    embeddings = Dense(latent_dim)(x)
    
    z_mean = Dense(latent_dim, name='z_mean',)(embeddings)
    z_log_var = Dense(latent_dim, name='z_log_var')(embeddings)
    z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])

    encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
    if verbose:
        encoder.summary()
    
    latent_inputs = Input(shape=(latent_dim,), name='latent_inputs')
    x = Dense(shape[1]*shape[2])(latent_inputs)
    x = Reshape((shape[1], shape[2]))(x)
    
    for i in range(2):
        x = Conv1DTranspose(input_tensor=x,
                            filters=filters,
                            kernel_size=2,
                            last=False,
                            padding='same')
    
    
    outputs = Conv1DTranspose(input_tensor=x,
                              filters=n_features,
                              kernel_size=2,
                              strides=1,
                              last=True,
                              padding='same')
    
    decoder = Model(latent_inputs, outputs)
    if verbose:
        decoder.summary()
    
    outputs = decoder(encoder.outputs[2])
    reconstruction_loss = mse(K.flatten(inputs), K.flatten(outputs))
    #reconstruction_loss *= sequence_length
    kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
    kl_loss = K.sum(kl_loss, axis=-1)
    kl_loss *= -0.5
    loss = K.mean(reconstruction_loss+0.0001*kl_loss)
    vae = Model(inputs, outputs, name='vae')
    vae.add_loss(loss)
    
    vae.compile(optimizer='adam')
    return (vae, encoder,decoder)


In [4]:
from keras.callbacks import ModelCheckpoint

checkpointer = ModelCheckpoint(filepath="Models/Weights/Vae_conv1d_emb.hdf5", 
                               verbose=0, save_best_only=True)

vaes = []
epochs = [1]
for i, epoch in enumerate(epochs):
    print('Training Vae with ' + str(epoch))
    vaes.append(create_vae(verbose=False))
    vaes[i][0].fit(x=trainX_nominal,
            epochs=epoch,
            batch_size=sequence_length,
            validation_data=(valX_nominal, None),
            callbacks=[checkpointer],
            verbose=1)
    vaes[i][0].load_weights("Models/Weights/Vae_conv1d_emb.hdf5")


Training Vae with 1
























Train on 300 samples, validate on 30 samples
Epoch 1/1



















In [7]:
def return_mask(num, labels):
    print(labels[0])
    return np.squeeze(np.argwhere(labels == num))
    
    
labels = pd.read_csv("Data/Boat_nominal_data/Boat_mix_labels.csv")
labels = labels.drop(columns="Unnamed: 0")
labels = np.array(labels)
masks = [return_mask(num, labels)[:, 0] for num in range(0, 9)]
encodings = []
for t in vaes:
    encodings.append(t[1].predict(trainX_nominal))

titles = ['Z_mean','Z_log_var','Z_Sampled']
def plot_encodings():
    for enc in encodings:    
        for i in range(3):
            for mask in masks:
                plt.scatter(x=enc[i][:, 0][mask], 
                            y=enc[i][:, 1][mask], alpha=0.5)
            plt.title(titles[i])
            plt.legend(labels=np.arange(0, 9))
            plt.show()


plot_encodings()
# for enc in encodings:
#     for mask in masks:
#         plt.scatter(x=enc[:, 0][mask], 
#                     y=enc[:, 1][mask], alpha=0.5)
#         plt.title(titles[i])
#         plt.legend(labels=np.arange(0, 9))
#     plt.show()



[2]
[2]
[2]
[2]
[2]
[2]
[2]
[2]
[2]


In [108]:

runs = []
for mask in masks:
    run_for_class = trainX_nominal[mask]
    print(run_for_class.shape)
    runs.append(run_for_class)

tit = ['350']
for k,mod in enumerate(vaes):
    for j in range(9):    
        for i in runs[j]:
            run = np.reshape(i, (1,sequence_length, n_features))
            rec = mod[0].predict(run)
            rec = np.reshape(rec, (len(trainX_nominal[0]), n_features))
            reconstruction_df = pd.DataFrame(rec, columns=boat_csv.columns)
            plt.plot(reconstruction_df["Lon"], reconstruction_df["Lat"])
            plt.title(tit[k])
            plt.show()
            break


(15, 180, 4)
(15, 180, 4)
(15, 180, 4)
(15, 180, 4)
(15, 180, 4)
(15, 180, 4)
(15, 180, 4)
(15, 180, 4)
(15, 180, 4)


In [107]:
def return_mask(num, labels):
        return np.squeeze(np.argwhere(labels == num))
    

boat_strange = pd.read_csv("Data/Boat_nominal_data/Boat_unseen.csv")
boat_strange = boat_strange.drop(columns=["Unnamed: 0"])
scaler = MinMaxScaler()
boat_unseen_data = scaler.fit_transform(boat_strange)

unseen_sequences = prepare_sequences(boat_unseen_data, 180)
print(unseen_sequences.shape)
  
labels = pd.read_csv("Data/Boat_nominal_data/Boat_unseen_labels.csv")
labels = labels.drop(columns="Unnamed: 0")
labels = np.array(labels)
masks = [return_mask(num, labels)[:, 0] for num in range(0, 9)]
encodings = []
for t in vaes:
    encodings.append(t[1].predict(unseen_sequences))
    
titles = ['Z_mean', 'Z_log_var', 'Z_Sampled']
for enc in encodings:    
    for i in range(3):
        for mask in masks:
            plt.scatter(x=enc[i][:, 0][mask], 
                        y=enc[i][:, 1][mask], alpha=0.5)
        plt.title(titles[i])
        plt.legend(labels=np.arange(0, 9))
        plt.show()


(135, 180, 4)


In [46]:
runs = []
for mask in masks:
    run_for_class = unseen_sequences[mask]
    print(run_for_class.shape)
    runs.append(run_for_class)

tit = ['300']
for k, mod in enumerate(vaes):
    for j in range(9):    
        for i in runs[j]:
            run = np.reshape(i, (1, sequence_length, n_features))
            rec = mod[0].predict(run)
            rec = np.reshape(rec, (len(trainX_nominal[0]), n_features))
            reconstruction_df = pd.DataFrame(rec, columns=boat_csv.columns)
            plt.plot(reconstruction_df["Lon"], reconstruction_df["Lat"])
            plt.title(tit[k])
            plt.show()
            break


(15, 180, 4)
(15, 180, 4)
(15, 180, 4)
(15, 180, 4)
(15, 180, 4)
(15, 180, 4)
(15, 180, 4)
(15, 180, 4)
(15, 180, 4)
