In [55]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm
from keras.layers import Input, Dense, Lambda, Flatten, Reshape
from keras.layers import Conv1D
from keras.models import Model
from keras import backend as K


n_features = 7
run_list = []

def read_csvs():
    scaler = StandardScaler()
    for i in range(8):
        title = "Data/Spain_csv/"+ str(i+1) + "^_RUN.csv"
        data = pd.read_csv(title)
        data = data.drop(columns=['Unnamed: 0'])
        v = int(len(data)/100) * 100
        data = data[:v]
        data = scaler.fit_transform(data)
        run_list.append(data)
       
        
read_csvs()

def prepare_sequences(data, batch_size, interval):
    samples = []
    for i in range(0,data.shape[0] - batch_size, interval):
        sample = data[i:i+batch_size]	
        samples.append(sample)

    sequences = np.array(samples)

    trainX = np.reshape(sequences, (len(sequences), batch_size, n_features))
    print(trainX.shape)
    return trainX


for i, elem in enumerate(run_list):
    run_list[i] = prepare_sequences(elem, 500, 500)
    
    
data_list = np.array(run_list)

val_data = data_list[-1]

data_list = np.ravel(data_list[:7])
print(data_list[0].shape)
print(val_data.shape)


(35, 500, 7)
(34, 500, 7)
(31, 500, 7)
(35, 500, 7)
(37, 500, 7)
(34, 500, 7)
(30, 500, 7)
(35, 500, 7)
(35, 500, 7)
(35, 500, 7)


In [54]:
columns = ['seconds','x','y','z','speed','acceleration','degrees']


for i in run_list[1]:
    df = pd.DataFrame(i, columns=columns)
    plt.plot(df["x"], df['y'])
plt.show()

In [58]:
from keras import objectives
from keras.layers import RepeatVector, MaxPooling1D, LSTM, UpSampling1D

input_shape = (500, n_features,)
kernel_size = 7
filters = 200
latent_dim = 10
beta = 1
use_mse = True
load_weights = False


def sampling(args):
    z_mean, z_log_var = args
    batch = K.shape(z_mean)[0]
    dim = K.int_shape(z_mean)[1]
    epsilon = K.random_normal(shape=(batch, dim))
    return z_mean + K.exp(0.5 * z_log_var) * epsilon


inputs = Input(shape=input_shape, name='encoder_input')
x = inputs

for i in range(1):
    x = LSTM(100, return_sequences=True)(x)

shape = K.int_shape(x)

x = LSTM(latent_dim, return_sequences=False)(x)
# for i in range(2):
#     x = Conv1D(filters=filters,
#                kernel_size=7,
#                padding='same')(x)
#     x = MaxPooling1D(2)(x)
#     filters = int(filters / 2)

# 
# x = Flatten()(x)
z_mean = Dense(latent_dim, name='z_mean')(x)
z_log_var = Dense(latent_dim, name='z_log_var')(x)

z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])

encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
encoder.summary()

filters = filters * 2

latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
x = Dense(500 * latent_dim, name='Dense_after_sampling')(latent_inputs)
x = Reshape((500, latent_dim))(x)

# for i in range(2):
#     x = Conv1D(filters=filters,kernel_size=7, padding='same')(x)
#     x = UpSampling1D(size=2)(x)
#     filters = filters * 2
    
print(K.int_shape(x))   
# x = RepeatVector(100)(x)    
# outputs = LSTM(100)(x)
outputs = Dense(7)(x)
print(K.int_shape(outputs))


decoder = Model(latent_inputs, outputs, name='decoder')
decoder.summary()

outputs = decoder(encoder(inputs)[2])
vae = Model(inputs, outputs, name='vae')

from keras.losses import mse

reconstruction_loss = mse(K.flatten(inputs), K.flatten(outputs))
kl_loss = - 0.5 * K.mean(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var))
loss = reconstruction_loss + beta*kl_loss
vae.add_loss(loss)

vae.compile(optimizer='rmsprop', metrics= ['accuracy'])


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_input (InputLayer)      (None, 500, 7)       0                                            
__________________________________________________________________________________________________
lstm_15 (LSTM)                  (None, 500, 100)     43200       encoder_input[0][0]              
__________________________________________________________________________________________________
lstm_16 (LSTM)                  (None, 10)           4440        lstm_15[0][0]                    
__________________________________________________________________________________________________
z_mean (Dense)                  (None, 10)           110         lstm_16[0][0]                    
__________________________________________________________________________________________________
z_log_var 

In [60]:
from keras.callbacks import ModelCheckpoint

epochs = 1

checkpointer = ModelCheckpoint(filepath="Models/Weights/LSTM_weigths_spain.hdf5", 
                               verbose=1, save_best_only=True)

for i, run in enumerate(run_list):
    vae.fit(x=run, epochs=epochs,
            batch_size=500,
            callbacks=[checkpointer],
            validation_data=(val_data, None))
    vae.load_weights('Models/Weights/LSTM_weigths_spain.hdf5')



Train on 35 samples, validate on 35 samples
Epoch 1/1





Epoch 00001: val_loss improved from inf to 1.00264, saving model to Models/Weights/LSTM_weigths_spain.hdf5
Train on 34 samples, validate on 35 samples
Epoch 1/1





Epoch 00001: val_loss improved from 1.00264 to 1.00133, saving model to Models/Weights/LSTM_weigths_spain.hdf5
Train on 31 samples, validate on 35 samples
Epoch 1/1





Epoch 00001: val_loss improved from 1.00133 to 1.00099, saving model to Models/Weights/LSTM_weigths_spain.hdf5
Train on 35 samples, validate on 35 samples
Epoch 1/1





Epoch 00001: val_loss improved from 1.00099 to 1.00006, saving model to Models/Weights/LSTM_weigths_spain.hdf5
Train on 37 samples, validate on 35 samples
Epoch 1/1





Epoch 00001: val_loss improved from 1.00006 to 0.99905, saving model to Models/Weights/LSTM_weigths_spain.hdf5
Train on 34 samples, validate on 35 samples
Epoch 1/1





Epoch 00001: val_loss did not improve from 0.99905
Train on 30 samples, validate on 35 samples
Epoch 1/1





Epoch 00001: val_loss improved from 0.99905 to 0.99881, saving model to Models/Weights/LSTM_weigths_spain.hdf5
Train on 35 samples, validate on 35 samples
Epoch 1/1





Epoch 00001: val_loss did not improve from 0.99881


In [28]:
pred = vae.predict(run_list[0])

print(pred.shape)


(584, 100, 7)


In [41]:

data = []
for i in range(pred.shape[0]):
    data.append(pred[i][:30])     

data = np.array(data)
data = data.reshape(17500, 7)
print(data.shape)


ValueError: cannot reshape array of size 122640 into shape (17500,7)

In [75]:
columns = ['seconds','x','y','z','speed','acceleration','degrees']


data_df = pd.DataFrame(data, columns=columns )
print(data_df)



        seconds         x         y         z     speed  acceleration  \
0      0.378276 -0.653099  0.866958 -0.302582  0.032526     -0.002846   
1      0.371704 -0.637209  0.893095 -0.286900  0.004704      0.037235   
2      0.348831 -0.683477  0.856158 -0.279485  0.053193      0.017735   
3      0.388676 -0.654716  0.845928 -0.290576  0.045179     -0.073679   
4      0.383645 -0.659170  0.859524 -0.298999 -0.077745      0.075377   
...         ...       ...       ...       ...       ...           ...   
17495  0.962260 -1.121237  1.357784 -0.147709 -0.082481      0.038414   
17496  0.963994 -1.125476  1.339069 -0.151385 -0.041723      0.029066   
17497  0.966044 -1.108822  1.355389 -0.139897 -0.043795      0.010309   
17498  0.951388 -1.129818  1.338037 -0.150493 -0.005272      0.022997   
17499  0.970920 -1.099926  1.356977 -0.140783 -0.038879     -0.018608   

        degrees  
0     -0.395030  
1     -0.391574  
2     -0.393896  
3     -0.405676  
4     -0.393763  
...         ...

In [84]:
plt.plot(data_df['degrees'])
plt.show()