In [20]:
%matplotlib inline

import pickle
import pandas as pd
import numpy as np
import os
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt

from pandas import concat
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.preprocessing import MinMaxScaler

from keras.models import Sequential
from keras.layers import LSTM, Dense, Input, Embedding, Masking

dataPath = '../PedestrianData/IntentionData'
finalPath = '../PedestrianData/final.pickle'


In [2]:
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    """
    Frame a time series as a supervised learning dataset.
    Arguments:
        data: Sequence of observations as a list or NumPy array.
        n_in: Number of lag observations as input (X).
        n_out: Number of observations as output (y).
        dropnan: Boolean whether or not to drop rows with NaN values.
    Returns:
        Pandas DataFrame of series framed for supervised learning.
    """
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [44]:
files = os.listdir(dataPath)
final = []
for i in range(len(files)):
    file = dataPath + "/" + files[i]
    
    # Reading trial csv to headers
    df = pd.read_csv(file, sep=",", header=None, 
                 names=["Time", 
                        "Pos_x", "Pos_y", "Pos_z", 
                        "Gaz_x", "Gaz_y", "Gaz_z", 
                        "Vel_x", "Vel_z",
                        "Acc_x", "Acc_z",
                        "Vel_Mag", "Acc_Mag",
                        "Vel_Ang", "Acc_Ang", "Gaz_Ang",
                        "Vel_Bin", "Acc_Bin", "Gaz_Bin", "Chg_Bin"          
                       ])
    
    # Transforming data to shifted data (for t+1, etc)
    data = series_to_supervised(df, 1, 2, False)
    
    # Sequence includes all of the features we want
    sequence = data[[
        'var2(t+1)', 'var4(t+1)', # Coordinate at t+1
        'var2(t)', 'var4(t)', # Position X and Z at t
        'var5(t)', 'var6(t)', 'var7(t)', # Gaze X Y Z
        'var8(t)', 'var9(t)', # Velocity X and Z
        'var10(t)', 'var11(t)', # Acceleration X and Z
        'var12(t)', 'var13(t)', # Velocity and Acceleration Magnitudes
        'var17(t)', 'var18(t)', 'var19(t)' # Vel, Acc, Gaz bins
    ]]
    
    # Replace NaNs with 0
    sequence.fillna(0, inplace=True)
    
    final.append(sequence)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  downcast=downcast, **kwargs)


KeyboardInterrupt: 

In [3]:
final_data = pickle.load(open(finalPath,"rb"))
final_data[1].head()


Unnamed: 0,var2(t+1),var4(t+1),var2(t),var4(t),var5(t),var6(t),var7(t),var8(t),var9(t),var10(t),var11(t),var12(t),var13(t),var17(t),var18(t),var19(t)
0,289.9,653.2,290.0,653.1,1.0,-0.2,-0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3
1,289.9,653.2,289.9,653.2,1.0,-0.2,-0.2,-0.905756,0.905756,-8.203941,8.203941,1.280933,11.602124,8.0,8.0,3
2,289.8,653.2,289.9,653.2,1.0,-0.2,-0.2,0.0,0.0,8.074304,-8.074304,0.0,11.418791,0.0,4.0,3
3,289.8,653.3,289.8,653.2,0.9,-0.2,-0.3,-0.90867,0.0,-8.25682,0.0,0.90867,8.25682,7.0,7.0,3
4,289.8,653.3,289.8,653.3,0.9,-0.2,-0.3,0.0,0.99746,9.063628,9.949274,0.99746,13.45873,1.0,1.0,3


In [25]:
final = []
for df in final_data:
    final.append(np.array(df))
final = np.array(final)

# Final is an array of crossing trials (sequences)
print("First trial:")
print(final[0])
print("Final shape: ", final.shape)

# Each trial is an array of timesteps
print("\nFirst row of trial:")
print(final[0][0])
print("Trial1 shape: ", final[0].shape)
print("Trial2 shape: ", final[1].shape)
print("Trial100 shape: ", final[101].shape)

ValueError: setting an array element with a sequence.

In [5]:
X = []
y = []
for arr in final:
    X.append(arr[:, 3:])
    y.append(arr[:, 1:3])
X = np.array(X)
y = np.array(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

print("Total sequences: ", X.shape)
print("Training sequences: ", X_train.shape)
print("Testing sequences: ", X_test.shape)

print("\nSequences are of different lengths:")
print("X_train-Trial1 shape: ", X_train[0].shape)
print("X_train-Trial2 shape: ", X_train[1].shape)


train_x = np.reshape(X_train[0],
                        (X_train[0].shape[0], 1, X_train[0].shape[1])) 
test_x = np.reshape(X_test[0], 
                       (X_test[0].shape[0], 1, X_test[0].shape[1]))

train_y = np.reshape(y_train[0],
                        (y_train[0].shape[0], 1, y_train[0].shape[1])) 
test_y = np.reshape(y_test[0], 
                       (y_test[0].shape[0], 1, y_test[0].shape[1]))


print(train_x.shape)
print(train_y.shape)
print(test_x.shape)
print(test_y.shape)

Total sequences:  (2333,)
Training sequences:  (1866,)
Testing sequences:  (467,)

Sequences are of different lengths:
X_train-Trial1 shape:  (143, 13)
X_train-Trial2 shape:  (158, 13)
(143, 1, 13)
(143, 1, 2)
(173, 1, 13)
(173, 1, 2)


In [43]:
from keras.preprocessing.sequence import pad_sequences

# pad sequences
padded = pad_sequences(final, dtype='float64', padding='post')
print("Before padding: ", final.shape, final.dtype)
print("After padding: ", padded.shape, padded.dtype)

X = []
y = []
for seq in padded:
    X.append(seq[:, 3:])
    y.append(seq[:, 1:3])
X = np.array(X)
y = np.array(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

print("\nX shape: ", X.shape)
print("y shape: ", y.shape)
print("X[0] shape: ", X[0].shape)
print("y[0] shape: ", y[0].shape)
print("\nX_train shape: ", X_train.shape)
print("y_train shape: ", y_train.shape)
print("X_train[0] shape: ", X_train[0].shape)
print("y_train[0] shape: ", y_train[0].shape)

X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1)) 
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1)) 
y_train = np.reshape(y_train, (y_train.shape[0], y_train.shape[1], 1)) 
y_test = np.reshape(y_test, (y_test.shape[0], y_test.shape[1], 1)) 

n_timesteps, n_features, n_outputs = 1, 13, 2
print(n_timesteps, n_features, n_outputs)

Before padding:  (2333,) object
After padding:  (2333, 1031, 16) float64

X shape:  (2333, 1031, 13)
y shape:  (2333, 1031, 2)
X[0] shape:  (1031, 13)
y[0] shape:  (1031, 2)

X_train shape:  (1866, 1031, 13)
y_train shape:  (1866, 1031, 2)
X_train[0] shape:  (1031, 13)
y_train[0] shape:  (1031, 2)


ValueError: cannot reshape array of size 25009998 into shape (1866,1031,1)

In [42]:
model = Sequential()
model.add(LSTM(32, activation='relu', return_sequences=True, input_shape=(n_timesteps, n_features)))
model.add(Dense(8, activation='relu'))
model.add(Dense(n_outputs))
model.compile(loss='mse', optimizer='adam')

# fit network
model.fit(X_train, y_train, epochs=50, batch_size=1, verbose=2)

ValueError: Error when checking input: expected lstm_16_input to have 3 dimensions, but got array with shape (824, 13)

In [160]:
model = Sequential()
model.add(LSTM(32, return_sequences=True, input_shape=(None, 13)))
model.add(LSTM(8, return_sequences=True))
model.add(TimeDistributed(Dense(2, activation='sigmoid')))


# For a mean squared error regression problem
model.compile(optimizer='rmsprop', loss='mse')

model.fit(X_train, y_train, epochs=100, batch_size=None, verbose=2)

__________________________________________________________________________________________
Layer (type)                            Output Shape                        Param #       
lstm_38 (LSTM)                          (None, None, 32)                    5888          
__________________________________________________________________________________________
lstm_39 (LSTM)                          (None, None, 8)                     1312          
__________________________________________________________________________________________
time_distributed_18 (TimeDistributed)   (None, None, 2)                     18            
Total params: 7,218
Trainable params: 7,218
Non-trainable params: 0
__________________________________________________________________________________________
None


ValueError: Error when checking input: expected lstm_38_input to have 3 dimensions, but got array with shape (1866, 1)

In [None]:
def seq_generator():
    while True:
        for arr in final:
            X_train = np.array(arr[:, 3:])
            y_train = np.array(arr[:, 1:3]) 
            X_train = X_train.reshape((X_train.shape[0]), 1, X_train.shape[1])
            y_train = y_train.reshape((y_train.shape[0]), 1, y_train.shape[1])
            yield X_train, y_train

model.fit_generator(seq_generator(), steps_per_epoch=30, epochs=50, verbose=1)