In [2]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm
from keras.layers import Input, Dense, Lambda, Flatten, Reshape
from keras.layers import Conv1D,UpSampling1D
from keras.models import Model
from keras import backend as K

n_features = 4
sequence_length = 180
boat_csv = pd.read_csv("Data/Boat_nominal_data/Boat_sequences_mix.csv")
boat_csv = boat_csv.drop(columns=["Unnamed: 0"])
scaler = MinMaxScaler(feature_range=(0,1))
normal_data = scaler.fit_transform(boat_csv)
print(normal_data.shape)

boat_val = pd.read_csv("Data/Boat_nominal_data/Boat_sequence_mix_val.csv")
boat_val = boat_val.drop(columns=["Unnamed: 0"])
scaler = MinMaxScaler(feature_range=(0,1))
val_nom_data = scaler.fit_transform(boat_val)

print(normal_data.shape, val_nom_data.shape)
def prepare_sequences(data, batch_size):
    samples = []
    for i in range(0,data.shape[0], batch_size):
        sample = data[i:i+batch_size]	
        samples.append(sample)
    sequences = np.array(samples)
    trainX = np.reshape(sequences, (len(sequences), batch_size, n_features))
    return trainX


def prepare_data():    
    trainX_nominal = prepare_sequences(normal_data,sequence_length) 
    print(trainX_nominal.shape)
    
    valX_nominal = prepare_sequences(val_nom_data,sequence_length)
    print(valX_nominal.shape)

    return trainX_nominal, valX_nominal

trainX_nominal, valX_nominal = prepare_data()
n_sequences = len(trainX_nominal)


(54000, 4)
(54000, 4) (5400, 4)
(300, 180, 4)
(30, 180, 4)


In [29]:
from keras.layers import LSTM, RepeatVector, TimeDistributed
from tensorflow_core.python.keras.models import Sequential

latent_dim = 20
inputs = Input(shape=(sequence_length,n_features))
x = inputs
x = LSTM(latent_dim, activation='relu')(x)

embeddings = x

encoder = Model(inputs,embeddings)
encoder.summary()

latent_inputs = Input(shape=(latent_dim,))
x = RepeatVector(sequence_length)(latent_inputs)
x = LSTM(100, activation='relu', return_sequences=True)(x)
output = TimeDistributed(Dense(4))(x)

decoder = Model(latent_inputs, output)
decoder.summary()
output = decoder(encoder.output)


model = Model(inputs, output)
model.summary()
model.compile(optimizer='adam', loss='mse')


# 
# inputs = Input(shape=(sequence_length,n_features))
# x = inputs
# 
# for i in range(2):
#     x = LSTM(sequence_length, return_sequences=True)(x)
# 
# lstm_state_last, state_h, state_c = LSTM(sequence_length, return_state=True)(x)
# shape = K.int_shape(lstm_state_last)
# embeddings = Dense(latent_dim)(lstm_state_last)
# 
# encoder = Model(inputs, embeddings)
# encoder.summary()
# 
# latent_inputs = Input(shape=(latent_dim,), name='latent_inputs')
# x = Dense(sequence_length)(latent_inputs)
# x = RepeatVector(sequence_length)(x)
# 
# for i in range(1):
#     x = LSTM(sequence_length, return_sequences=True)(x)
# 
# outputs = LSTM(n_features, return_sequences=True)(x)
# 
# decoder = Model(latent_inputs, outputs)
# decoder.summary()
# 
# outputs = decoder(encoder.outputs)
# vae = Model(inputs, outputs)
# 
# vae.compile(optimizer='rmsprop', loss='mse')

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_14 (InputLayer)        (None, 180, 4)            0         
_________________________________________________________________
lstm_27 (LSTM)               (None, 20)                2000      
Total params: 2,000
Trainable params: 2,000
Non-trainable params: 0
_________________________________________________________________


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_15 (InputLayer)        (None, 20)                0         
_________________________________________________________________
repeat_vector_10 (RepeatVect (None, 180, 20)           0         
_________________________________________________________________
lstm_28 (LSTM)               (None, 180, 100)          48400     
_________________________________________________________________
time_distributed_8 (TimeDist (None, 180, 4)            404       
Total params: 48,804
Trainable params: 48,804
Non-trainable params: 0
_________________________________________________________________
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_14 (InputLayer)        (None, 180, 4)            0         
_________________________________________________________________
lstm_2

In [30]:

model.fit(x=trainX_nominal,
        y=trainX_nominal,
        epochs=20,
        batch_size=sequence_length)

Epoch 1/20






Epoch 2/20






Epoch 3/20






Epoch 4/20






Epoch 5/20






Epoch 6/20






Epoch 7/20






Epoch 8/20






Epoch 9/20






Epoch 10/20






Epoch 11/20






Epoch 12/20






Epoch 13/20






Epoch 14/20






Epoch 15/20






Epoch 16/20






Epoch 17/20






Epoch 18/20






Epoch 19/20






Epoch 20/20






<keras.callbacks.History at 0x1dce3173780>

In [31]:
def return_mask(num, labels):
    return np.squeeze(np.argwhere(labels == num))


labels = pd.read_csv("Data/Boat_nominal_data/Boat_mix_labels.csv")
labels = labels.drop(columns="Unnamed: 0")
labels = np.array(labels)
masks = [return_mask(num, labels)[:, 0] for num in range(0, 9)]
encodings = encoder.predict(trainX_nominal)

print(encodings.shape)
for mask in masks:
    plt.scatter(x=encodings[:, 0][mask], 
                y=encodings[:, 1][mask], alpha=0.5)
plt.title('Encodings')
plt.legend(labels=np.arange(0, 9))
plt.show()


(300, 20)


In [32]:

runs = []
for mask in masks:
    run_for_class = trainX_nominal[mask]
    print(run_for_class.shape)
    runs.append(run_for_class)

for j in range(9):    
    for i in runs[j]:
        run = np.reshape(i, (1,sequence_length, n_features))
        rec = model.predict(run)
        rec = np.reshape(rec, (len(trainX_nominal[0]), n_features))
        reconstruction_df = pd.DataFrame(rec, columns=boat_csv.columns)
        plt.plot(reconstruction_df["Lon"], reconstruction_df["Lat"])
        plt.show()
        break


(189, 180, 4)
(13, 180, 4)
(16, 180, 4)
(11, 180, 4)
(15, 180, 4)
(14, 180, 4)
(12, 180, 4)
(15, 180, 4)
(15, 180, 4)
