In [2]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm
from keras.layers import Input, Dense, Lambda, Flatten, Reshape
from keras.layers import Conv1D
from keras.models import Model
from keras import backend as K
from sklearn.utils import resample

n_features = 4
latent_dim = 10
n_runs = 200
labels = pd.read_csv("Data/Boat_nominal_data/Boat_mix_len_labels.csv")
labels = labels.drop(columns="Unnamed: 0") 
labels = np.array(labels)
max_len = 0



def prepare_training(path, n_runs):
    def closest_4(n, m):
        q = n / m
        n1 = m * q
        if (n * m) > 0:
            n2 = m * (q + 1)
        else:
            n2 = m * (q - 1)
        if abs(n-n1) < abs(n-n2):
            return int(n1)
        return int(n2)
    
    
    def extend_line(run, max_len):
        difference = abs(len(run) - max_len)
        extension = np.array([run[-1]]*difference)
        if difference != 0:
            run = np.vstack([run, extension])
        return run
    
    def get_max_len(sequence_list):
        max_len = 0
        min_len = 1000
        for seq in sequence_list:
            if len(seq) > max_len:
                max_len = len(seq)
            if len(seq) < min_len:
                min_len = len(seq)
        return max_len, min_len
    
    def construct_matrix(sequence_list):
        max_len, min_len = get_max_len(sequence_list)
        print(max_len)
        len = closest_4(max_len,4)
        len = 620
        train_matrix = np.zeros(shape=(n_runs, len, n_features))
        for index, run in enumerate(sequence_list):
            line = extend_line(run, len)
            train_matrix[index] = line
        return train_matrix
        
        
    def stadard_sequences(seqs):
        for i, seq in enumerate(seqs):
            seqs[i] = MinMaxScaler(feature_range=[0, 1]).fit_transform(seq)
        return seqs       
    
    
    def read_sequences():
        run_list_mix = []
        for index in range(n_runs):
            run_csv = pd.read_csv(path+str(index))
            run_csv = run_csv.drop(columns=['Unnamed: 0'])
            run_list_mix.append(run_csv)
        stands = stadard_sequences(run_list_mix)
        padded_matrix = construct_matrix(stands)
        return padded_matrix
    
    return read_sequences()
    

train_matrix = prepare_training("Mix_sequences_var_length/run^", n_runs=n_runs) 
print(train_matrix.shape)

606
(200, 620, 4)


In [30]:
from keras.losses import mse
from keras.models import Model
from keras.layers import Input, LSTM, Dense, Conv1D, Conv2DTranspose, Lambda
import keras.backend as K
filters = 10


def sampling(args):
    z_mean, z_log_var = args
    batch = K.shape(z_mean)[0]
    dim = K.int_shape(z_mean)[1]
    # by default, random_normal has mean=0 and std=1.0
    epsilon = K.random_normal(shape=(batch, dim))
    return z_mean + K.exp(0.5 * z_log_var) * epsilon


def Conv1DTranspose(input_tensor, filters, kernel_size, strides=1, padding='same'):
        x = Lambda(lambda x: K.expand_dims(x, axis=2))(input_tensor)
        x = Conv2DTranspose(filters=filters, kernel_size=(kernel_size, 1), 
                            strides=(strides, 1), padding=padding,
                            activation='relu')(x)
        x = Lambda(lambda x: K.squeeze(x, axis=2))(x)
        return x


# Define an input sequence and process it.
encoder_inputs = Input(shape=(train_matrix.shape[1], n_features), name='Encoder_input')
    
encoder = LSTM(latent_dim, return_state=True)
x = encoder_inputs
for i in range(3):
     x = Conv1D(filters=filters,
                kernel_size=20,
                strides=2,
                activation='relu',
                padding='same')(x)
     if i == 1:
        shape = K.int_shape(x)
   
encoder_outputs, state_h, state_c = encoder(x)
# We discard `encoder_outputs` and only keep the states.
encoder_states = [state_h, state_c]


# z_mean = Dense(latent_dim, name='z_mean',)(state_h)
# z_log_var = Dense(latent_dim, name='z_log_var')(state_h)
# z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])

encoder = Model(encoder_inputs, encoder_states, name='encoder')
encoder.summary()


# Set up the decoder, using `encoder_states` as initial state.
#decoder_inputs = Input(shape=(None,n_features))

latent_inputs = Input(shape=(latent_dim,), name='latent_inputs')
x = Dense(shape[1]*shape[2])(latent_inputs)
x = Reshape((shape[1], shape[2]))(x)

decoder_inputs = Input(shape=(None,n_features))
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)

# We set up our decoder to return full output sequences,
# and to return internal states as well. We don't use the 
# return states in the training model, but we will use them in inference.

x_dec = decoder_outputs

for i in range(2):
        x_dec = Conv1DTranspose(input_tensor=x_dec, filters=filters,
                            kernel_size=20, padding='same')
        
        


decoder_dense = Dense(n_features, name="Decoder_output")
decoder_outputs = decoder_dense(x_dec)



# reconstruction_loss = mse(K.flatten(encoder_inputs), K.flatten(decoder_outputs))
# kl_loss = - 0.5 * K.mean(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), 
#                          axis=-1)
# loss = K.mean(reconstruction_loss + kl_loss)
# model = Model([encoder_inputs, decoder_inputs], decoder_outputs, name='vae')
# model.add_loss(loss)
# model.compile(optimizer='rmsprop')


#Define the model that will turn
# `encoder_input_data` & `decoder_input_data` into `decoder_target_data`
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='adam', loss='mse')

model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Encoder_input (InputLayer)   (None, 620, 4)            0         
_________________________________________________________________
conv1d_21 (Conv1D)           (None, 310, 10)           810       
_________________________________________________________________
conv1d_22 (Conv1D)           (None, 155, 10)           2010      
_________________________________________________________________
conv1d_23 (Conv1D)           (None, 78, 10)            2010      
_________________________________________________________________
lstm_11 (LSTM)               [(None, 10), (None, 10),  840       
Total params: 5,670
Trainable params: 5,670
Non-trainable params: 0
_________________________________________________________________


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Encoder_input (InputLayer)      (None, 620, 4)       0                                            
__________________________________________________________________________________________________
conv1d_21 (Conv1D)              (None, 310, 10)      810         Encoder_input[0][0]              
__________________________________________________________________________________________________
conv1d_22 (Conv1D)              (None, 155, 10)      2010        conv1d_21[0][0]                  
__________________________________________________________________________________________________
conv1d_23 (Conv1D)              (None, 78, 10)       2010        conv1d_22[0][0]                  
__________________________________________________________________________________________________
input_6 (I


__________________________________________________________________________________________________
lambda_22 (Lambda)              (None, None, 10)     0           conv2d_transpose_11[0][0]        
__________________________________________________________________________________________________
lambda_23 (Lambda)              (None, None, 1, 10)  0           lambda_22[0][0]                  
__________________________________________________________________________________________________
conv2d_transpose_12 (Conv2DTran (None, None, 1, 10)  2010        lambda_23[0][0]                  
__________________________________________________________________________________________________
lambda_24 (Lambda)              (None, None, 10)     0           conv2d_transpose_12[0][0]        
__________________________________________________________________________________________________
Decoder_output (Dense)          (None, None, 4)      44          lambda_24[0][0]                  
Total par


__________________________________________________________________________________________________


In [31]:
model.fit([train_matrix, train_matrix],train_matrix,
          epochs=100)
model.save_weights("Models/Weights/LSTM_seq2seq_300_500.hdf5")


Epoch 1/100


 32/200 [===>..........................] - ETA: 3:13 - loss: 0.3660













Epoch 2/100


 32/200 [===>..........................] - ETA: 11s - loss: 0.3123













Epoch 3/100


 32/200 [===>..........................] - ETA: 11s - loss: 0.2265













Epoch 4/100


 32/200 [===>..........................] - ETA: 20s - loss: 0.1485













Epoch 5/100


 32/200 [===>..........................] - ETA: 13s - loss: 0.1206













Epoch 6/100


 32/200 [===>..........................] - ETA: 15s - loss: 0.1023













Epoch 7/100


 32/200 [===>..........................] - ETA: 13s - loss: 0.0928













Epoch 8/100


 32/200 [===>..........................] - ETA: 15s - loss: 0.0880













Epoch 9/100


 32/200 [===>..........................] - ETA: 12s - loss: 0.0785













Epoch 10/100


 32/200 [===>..........................] - ETA: 12s - loss: 0.0739













Epoch 11/100


 32/200 [===>..........................] - ETA: 12s - loss: 0.0684













Epoch 12/100


 32/200 [===>..........................] - ETA: 13s - loss: 0.0635













Epoch 13/100


 32/200 [===>..........................] - ETA: 12s - loss: 0.0587













Epoch 14/100


 32/200 [===>..........................] - ETA: 16s - loss: 0.0545













Epoch 15/100


 32/200 [===>..........................] - ETA: 12s - loss: 0.0499













Epoch 16/100


 32/200 [===>..........................] - ETA: 13s - loss: 0.0462













Epoch 17/100


 32/200 [===>..........................] - ETA: 13s - loss: 0.0425













Epoch 18/100


 32/200 [===>..........................] - ETA: 12s - loss: 0.0391













Epoch 19/100


 32/200 [===>..........................] - ETA: 12s - loss: 0.0363













Epoch 20/100


 32/200 [===>..........................] - ETA: 13s - loss: 0.0340













Epoch 21/100


 32/200 [===>..........................] - ETA: 13s - loss: 0.0313













Epoch 22/100


 32/200 [===>..........................] - ETA: 13s - loss: 0.0290













Epoch 23/100


 32/200 [===>..........................] - ETA: 13s - loss: 0.0268













Epoch 24/100


 32/200 [===>..........................] - ETA: 13s - loss: 0.0241













Epoch 25/100


 32/200 [===>..........................] - ETA: 13s - loss: 0.0227













Epoch 26/100


 32/200 [===>..........................] - ETA: 13s - loss: 0.0220













Epoch 27/100


 32/200 [===>..........................] - ETA: 13s - loss: 0.0195













Epoch 28/100


 32/200 [===>..........................] - ETA: 13s - loss: 0.0181













Epoch 29/100


 32/200 [===>..........................] - ETA: 14s - loss: 0.0165













Epoch 30/100


 32/200 [===>..........................] - ETA: 21s - loss: 0.0157













Epoch 31/100


 32/200 [===>..........................] - ETA: 14s - loss: 0.0148













Epoch 32/100


 32/200 [===>..........................] - ETA: 15s - loss: 0.0142













Epoch 33/100


 32/200 [===>..........................] - ETA: 18s - loss: 0.0130













Epoch 34/100


 32/200 [===>..........................] - ETA: 15s - loss: 0.0120













Epoch 35/100


 32/200 [===>..........................] - ETA: 14s - loss: 0.0113













Epoch 36/100


 32/200 [===>..........................] - ETA: 17s - loss: 0.0106













Epoch 37/100


 32/200 [===>..........................] - ETA: 13s - loss: 0.0100













Epoch 38/100


 32/200 [===>..........................] - ETA: 17s - loss: 0.0097













Epoch 39/100


 32/200 [===>..........................] - ETA: 13s - loss: 0.0090













Epoch 40/100


 32/200 [===>..........................] - ETA: 13s - loss: 0.0086













Epoch 41/100


 32/200 [===>..........................] - ETA: 13s - loss: 0.0082













Epoch 42/100


 32/200 [===>..........................] - ETA: 16s - loss: 0.0079













Epoch 43/100


 32/200 [===>..........................] - ETA: 14s - loss: 0.0075













Epoch 44/100


 32/200 [===>..........................] - ETA: 13s - loss: 0.0073













Epoch 45/100


 32/200 [===>..........................] - ETA: 12s - loss: 0.0070













Epoch 46/100


 32/200 [===>..........................] - ETA: 12s - loss: 0.0069













Epoch 47/100


 32/200 [===>..........................] - ETA: 13s - loss: 0.0066













Epoch 48/100


 32/200 [===>..........................] - ETA: 14s - loss: 0.0065













Epoch 49/100


 32/200 [===>..........................] - ETA: 13s - loss: 0.0062













Epoch 50/100


 32/200 [===>..........................] - ETA: 13s - loss: 0.0061













Epoch 51/100


 32/200 [===>..........................] - ETA: 15s - loss: 0.0059













Epoch 52/100


 32/200 [===>..........................] - ETA: 14s - loss: 0.0057













Epoch 53/100


 32/200 [===>..........................] - ETA: 14s - loss: 0.0056













Epoch 54/100


 32/200 [===>..........................] - ETA: 13s - loss: 0.0054













Epoch 55/100


 32/200 [===>..........................] - ETA: 13s - loss: 0.0053













Epoch 56/100


 32/200 [===>..........................] - ETA: 13s - loss: 0.0052













Epoch 57/100


 32/200 [===>..........................] - ETA: 13s - loss: 0.0051













Epoch 58/100


 32/200 [===>..........................] - ETA: 15s - loss: 0.0048













Epoch 59/100


 32/200 [===>..........................] - ETA: 14s - loss: 0.0047













Epoch 60/100


 32/200 [===>..........................] - ETA: 16s - loss: 0.0047













Epoch 61/100


 32/200 [===>..........................] - ETA: 13s - loss: 0.0045













Epoch 62/100


 32/200 [===>..........................] - ETA: 13s - loss: 0.0045













Epoch 63/100


 32/200 [===>..........................] - ETA: 13s - loss: 0.0043













Epoch 64/100


 32/200 [===>..........................] - ETA: 14s - loss: 0.0043













Epoch 65/100


 32/200 [===>..........................] - ETA: 13s - loss: 0.0042













Epoch 66/100


 32/200 [===>..........................] - ETA: 18s - loss: 0.0041













Epoch 67/100


 32/200 [===>..........................] - ETA: 15s - loss: 0.0040













Epoch 68/100


 32/200 [===>..........................] - ETA: 18s - loss: 0.0039













Epoch 69/100


 32/200 [===>..........................] - ETA: 16s - loss: 0.0038













Epoch 70/100


 32/200 [===>..........................] - ETA: 17s - loss: 0.0038













Epoch 71/100


 32/200 [===>..........................] - ETA: 15s - loss: 0.0037













Epoch 72/100


 32/200 [===>..........................] - ETA: 15s - loss: 0.0036













Epoch 73/100


 32/200 [===>..........................] - ETA: 15s - loss: 0.0035













Epoch 74/100


 32/200 [===>..........................] - ETA: 17s - loss: 0.0035













Epoch 75/100


 32/200 [===>..........................] - ETA: 16s - loss: 0.0034













Epoch 76/100


 32/200 [===>..........................] - ETA: 21s - loss: 0.0034













Epoch 77/100


 32/200 [===>..........................] - ETA: 18s - loss: 0.0033













Epoch 78/100


 32/200 [===>..........................] - ETA: 15s - loss: 0.0032













Epoch 79/100


 32/200 [===>..........................] - ETA: 17s - loss: 0.0032













Epoch 80/100


 32/200 [===>..........................] - ETA: 17s - loss: 0.0032













Epoch 81/100


 32/200 [===>..........................] - ETA: 22s - loss: 0.0030













Epoch 82/100


 32/200 [===>..........................] - ETA: 16s - loss: 0.0031













Epoch 83/100


 32/200 [===>..........................] - ETA: 15s - loss: 0.0030













Epoch 84/100


 32/200 [===>..........................] - ETA: 18s - loss: 0.0030













Epoch 85/100


 32/200 [===>..........................] - ETA: 24s - loss: 0.0029













Epoch 86/100


 32/200 [===>..........................] - ETA: 16s - loss: 0.0028













Epoch 87/100


 32/200 [===>..........................] - ETA: 16s - loss: 0.0028













Epoch 88/100


 32/200 [===>..........................] - ETA: 16s - loss: 0.0028













Epoch 89/100


 32/200 [===>..........................] - ETA: 15s - loss: 0.0027













Epoch 90/100


 32/200 [===>..........................] - ETA: 16s - loss: 0.0027













Epoch 91/100


 32/200 [===>..........................] - ETA: 16s - loss: 0.0026













Epoch 92/100


 32/200 [===>..........................] - ETA: 16s - loss: 0.0026













Epoch 93/100


 32/200 [===>..........................] - ETA: 17s - loss: 0.0026













Epoch 94/100


 32/200 [===>..........................] - ETA: 14s - loss: 0.0026













Epoch 95/100


 32/200 [===>..........................] - ETA: 16s - loss: 0.0025













Epoch 96/100


 32/200 [===>..........................] - ETA: 18s - loss: 0.0025













Epoch 97/100


 32/200 [===>..........................] - ETA: 14s - loss: 0.0025













Epoch 98/100


 32/200 [===>..........................] - ETA: 14s - loss: 0.0024













Epoch 99/100


 32/200 [===>..........................] - ETA: 14s - loss: 0.0025













Epoch 100/100


 32/200 [===>..........................] - ETA: 15s - loss: 0.0024













In [None]:

encoder_model = Model(encoder_inputs, encoder_states)

decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
decoder_outputs, state_h, state_c = decoder_lstm(
    decoder_inputs, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]
decoder_outputs = decoder_dense(decoder_outputs)
decoder_model = Model(
    [decoder_inputs] + decoder_states_inputs,
    [decoder_outputs] + decoder_states)

decoder_model.summary()

encoded = encoder.predict(train_matrix)


(200, 420, 4)


In [74]:

run = decoder_model.predict([train_matrix,encoded[0], encoded[1]])


(200, 420, 4)


In [30]:
model.load_weights("Models/Weights/LSTM_seq2seq_300_500.hdf5")


In [32]:
from sklearn.decomposition import PCA

encodings = encoder.predict(train_matrix)
enc_mean, enc_var, z_enc = encodings[0], encodings[1], encodings[0]
print(enc_mean.shape, enc_var.shape, z_enc.shape)


def return_mask(num, labs):
    arg = np.squeeze(np.argwhere(labs == num))
    
    return arg


masks = [return_mask(num, np.array(labels))[:, 0] for num in range(0, 9)]

from mpl_toolkits.mplot3d import Axes3D  
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

import numpy as np
from scipy.stats import norm


def plot_pca(title, i): 
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    markers = ['o', 'o', 'o', 'o', '^', '^', '^', '^', '^', '^']
    for index, mask in enumerate(masks):
        ax.scatter(principalComponents[:, 0][mask], 
                   principalComponents[:, 1][mask],  
                   principalComponents[:, 2][mask], marker=markers[index])

    plt.legend(labels=np.arange(0, 9))
    plt.title(str(title))
    plt.show()
    for mask in masks:
        plt.scatter(x=principalComponents[:, 0][mask], 
                    y=principalComponents[:, 1][mask])

        #break
    
    plt.legend(labels=np.arange(0, 9))
    plt.title(str(title))
    plt.show()


enc_list = [enc_mean, enc_var, z_enc]
titles = ["MEAN","LOG_VAR","SAMPLED"]
for i,enc in enumerate(enc_list[:2]):
    scaler = StandardScaler()
    enc_input = scaler.fit_transform(enc) 
    pca = PCA(3)
    principalComponents = pca.fit_transform(enc_input)
    print(principalComponents.shape)
    print(pca.explained_variance_ratio_) 
    plot_pca('Sequences'+titles[i], 0)


(200, 10) (200, 10) (200, 10)
(200, 3)
[0.39505213 0.3514468  0.20381144]


(200, 3)
[0.48722023 0.21576585 0.15873528]


In [27]:

unseen_labs = pd.read_csv("Data/Boat_nominal_data/Boat_unseen_labels_mix.csv")
unseen_labs = unseen_labs.drop(columns="Unnamed: 0") 
unseen_labs = np.array(unseen_labs)
unseen_mask = [return_mask(num, np.array(unseen_labs))[:, 0] for num in range(0, 9)]

unseen_sequences_matrix = prepare_training("Mix_sequences_var_length/run_unseen^", 
                                           n_runs=200)
print(unseen_sequences_matrix.shape)

unseen_encoding = encoder.predict(unseen_sequences_matrix)
print(len(unseen_encoding), unseen_encoding[0].shape)


607
(200, 620, 4)


2 (200, 10)


In [28]:
reconstruction = model.predict([train_matrix,train_matrix])
reconstruction_unseen = model.predict([unseen_sequences_matrix,unseen_sequences_matrix])

#RECONSTRUCTION ERROR
def get_reconstructed_matrix(input_matrix, reconstrut):
    return_matrix = np.zeros(shape=input_matrix.shape)
    for i,run in enumerate(input_matrix):
        mask_seq = np.squeeze(np.argwhere(np.mean(run, axis=1) != 0))
        return_matrix[i][mask_seq] = reconstrut[i][mask_seq]
    return reconstrut


train_error = abs(train_matrix - get_reconstructed_matrix(train_matrix, reconstruction))
unseen_runs_error = abs(unseen_sequences_matrix - get_reconstructed_matrix(train_matrix,reconstruction_unseen))

train_error_avg = np.mean(train_error, axis=2)
unseen_error_avg = np.mean(unseen_runs_error, axis=2)
train_error_avg = np.mean(train_error_avg, axis=1)
unseen_error_avg = np.mean(unseen_error_avg, axis=1)
print(train_error_avg.shape, unseen_error_avg.shape)

for mask in masks:
    plt.scatter(np.linspace(1,300,300)[mask],train_error_avg[mask])
plt.title('ERROR ON TRAIN')
plt.show()

for mask in unseen_mask:
    plt.scatter(np.linspace(1,300,300)[mask],unseen_error_avg[mask])
plt.title('ERROR ON Unseen')
plt.show()

(200,) (200,)


In [29]:
# def reconstruct_sequence(seq_index):
#     run = train_matrix[seq_index]
#     #mask_seq = np.squeeze(np.argwhere(np.mean(run, axis=1) != 0))
# 
#     input = np.reshape(run, (1, run.shape[0], run.shape[1]))
#     reconstr_run = model.predict([input,input])
#     reconstr_run = np.reshape(reconstr_run, (run.shape[0], run.shape[1]))
#     reconstr_run = reconstr_run
#     df = pd.DataFrame(reconstr_run, columns=["Timestep","Sin", "Cosin", "Lat", "Lon"])
#     df_orig = 
#     plt.plot(df['Lon'], df['Lat'])
#     plt.show()
# 
# for i in range(len(train_matrix)):
#     reconstruct_sequence(i)
#     if i > 10:
#         break

#RECONSTRUCTION
def reconstruct_sequence(seq_index):
    run = train_matrix[seq_index]
    #mask_seq = np.squeeze(np.argwhere(np.mean(run, axis=1) != 0))
    rec_run = reconstruction[seq_index]#[mask_seq]
    #print(rec_run[:,0])
    df = pd.DataFrame(rec_run[:-15], columns=["Sin", "Cosin", "Lat", "Lon"])
    df_original = pd.DataFrame(run[:-15], columns=["Sin", "Cosin", "Lat", "Lon"])
    plt.plot(df_original['Lon'], df_original['Lat'])
    plt.plot(df['Lon'], df['Lat'])
    plt.show()

for i in range(len(train_matrix)):
    reconstruct_sequence(i)
    if i == 20:
        break