In [11]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm
from keras.layers import Input, Dense, Lambda, Flatten, Reshape
from keras.layers import Conv1D,UpSampling1D
from keras.models import Model
from keras import backend as K
from keras import metrics

# Setup the network parameters:
intermediate_dim = 300
latent_dim = 10
beta = 5
n_sequences = 200
n_features = 8

boat_csv = pd.read_csv("Data/Boat_nominal_data/Boat_nominal_long.csv")
boat_csv = boat_csv.drop(columns=["Unnamed: 0"])
scaler = StandardScaler()
normal_data = scaler.fit_transform(boat_csv)
print(normal_data.shape)

boat_val = pd.read_csv("Data/Boat_nominal_data/Boat_nom_validation.csv")
boat_val = boat_val.drop(columns=["Unnamed: 0"])
scaler = StandardScaler()
val_nom_data = scaler.fit_transform(boat_val)


boat_anom_csv = pd.read_csv("Data/Boat_anomalous_big.csv")
boat_anom_csv= boat_anom_csv.drop(columns=["Unnamed: 0"])    
scaler = StandardScaler()
anomalous_data = scaler.fit_transform(boat_anom_csv)
print(anomalous_data.shape)

boat_anom_val_csv = pd.read_csv("Data/Boat_anomalous_validation.csv")
boat_anom_val_csv= boat_anom_val_csv.drop(columns=["Unnamed: 0"])    
scaler = StandardScaler()
anomalous_val_data = scaler.fit_transform(boat_anom_val_csv)
print(anomalous_val_data.shape)

def prepare_sequences(data, batch_size, interval):
    samples = []
    for i in range(0,data.shape[0]- batch_size, interval):
        sample = data[i:i+batch_size]	
        samples.append(sample)

    sequences = np.array(samples)

    # Batch size (Number of samples time steps and number of features
    trainX = np.reshape(sequences, (len(sequences), batch_size, n_features))

    return trainX


def prepare_data():    
    trainX_nominal = prepare_sequences(normal_data,656,656) 
    print(trainX_nominal.shape)
    
    valX_nominal = prepare_sequences(val_nom_data, 656,656)
    print(valX_nominal.shape)
    
    trainX_anomalous = prepare_sequences(anomalous_data,6600,6600)
    print(trainX_anomalous.shape)  
    
    valX_anom = prepare_sequences(anomalous_val_data,6600,6600)
    print(valX_anom.shape)

    return trainX_nominal, valX_nominal, trainX_anomalous, valX_anom



trainX_nominal, valX_nominal, trainX_anomalous, valX_anom = prepare_data()



(430992, 8)


(72600, 8)
(13200, 8)
(656, 656, 8)
(10, 656, 8)
(10, 6600, 8)
(1, 6600, 8)


In [21]:


num_conv = 7
latent_dim = 10
intermediate_dim = 200
epsilon_std = 1.0
epochs = 5

x = Input(batch_shape=(None ,656, n_features))
conv_1 = Conv1D(filters=200,
                kernel_size=num_conv,
                padding='same')(x)
conv_2 = Conv1D(filters=100,
                kernel_size=num_conv,
                padding='same')(conv_1)
flat = Flatten()(conv_2) # Since we are passing flat data anyway, we probably don't need this.
hidden = Dense(intermediate_dim,name="Dense_Flatten", activation='relu')(flat)
z_mean = Dense(latent_dim, name='z_mean')(hidden)
z_log_var = Dense(latent_dim, name='z_var')(hidden)

def sampling(args):
    z_mean, z_log_var = args
    epsilon = K.random_normal(shape=(656,latent_dim),
                              mean=0., stddev=epsilon_std)
    return z_mean + K.exp(z_log_var- K.square(z_mean) - K.exp(z_log_var)) * epsilon 
    # the original VAE divides z_log_var with two -- why?


z = Lambda(sampling,input_shape=(656,latent_dim), 
           output_shape=(latent_dim,))([z_mean, z_log_var])

decoder_h = Dense(intermediate_dim, name='dense_after_sampling')
decoder_mean = Dense(n_features,name="decoder_output")

h_decoded = decoder_h(z)
x_decoded_mean = decoder_mean(h_decoded)
x_decoded_mean = Reshape([1,n_features])(x_decoded_mean)
x_decoded_mean = UpSampling1D(size=100)(x_decoded_mean)

def vae_loss(x, x_decoded_mean):
    xent_loss = n_features * metrics.mean_squared_error(x, x_decoded_mean)
    kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
    return xent_loss + 10*kl_loss

vae = Model(x, x_decoded_mean)
vae.compile(optimizer='adam', loss=vae_loss) # 'rmsprop'
vae.summary()


vae.fit(x=trainX_nominal, y=trainX_nominal, batch_size=656, epochs=5)



__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_16 (InputLayer)           (None, 656, 8)       0                                            
__________________________________________________________________________________________________
conv1d_31 (Conv1D)              (None, 656, 200)     11400       input_16[0][0]                   
__________________________________________________________________________________________________
conv1d_32 (Conv1D)              (None, 656, 100)     140100      conv1d_31[0][0]                  
__________________________________________________________________________________________________
flatten_16 (Flatten)            (None, 65600)        0           conv1d_32[0][0]                  
__________________________________________________________________________________________________
Dense_Flat

Epoch 1/5


InvalidArgumentError: Incompatible shapes: [656,100,8] vs. [656,656,8]
	 [[{{node training_15/Adam/gradients/loss_15/up_sampling1d_16_loss/sub_grad/BroadcastGradientArgs}}]]