In [53]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import tensorflow as tf  # Version 1.0.0 (some previous versions are used in past commits)
from sklearn import metrics
import pandas as pd
# For one-hot encoding
from keras.utils import to_categorical 
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
import os

In [54]:
# Import all labeled files
path = os.path.expanduser("~/multiPartyHRI/data/labeledData/")
fileInList = list(os.path.join(path+f) for f in os.listdir(path))
data = pd.concat((pd.read_csv(f) for f in fileInList))

# Import individual labeled files
# data = pd.read_csv('./data/processedData/10-07-20-08.csv')
# data2 = pd.read_csv('./data/processedData/12-07-18-08.csv')
# data = pd.concat([data1, data2])

In [3]:
# Concise approach for standardizing data with pandas
# standardizedData = (data - data.mean()) / data.std()

## Do some basic processing to labeled data

In [55]:
# Convert timestamp from string format to float format
def timeStampToFloat(stringTime):
    splitTime = stringTime.split(':')
    hour = float(splitTime[0]) * 3600
    minutes = float(splitTime[1]) * 60
    return hour + minutes + float(splitTime[2])

data['timeStamp'] = data['timeStamp'].apply(timeStampToFloat) # 

In [56]:
# Create a list of features and remove labels i.e. "Activity" from it
featureNames = list(data.columns)
featureNames.remove("Activity")
# featureNames.remove("timeStamp")
# Remove features such as face_engaged, face_glasses, etc..
len(featureNames)

# Normalize features
def feature_normalize(dataset):
    mu = np.mean(dataset,axis = 0)
    sigma = np.std(dataset,axis = 0)
    return (dataset - mu)/sigma

# Drop any 'na' from data
data.dropna(axis=0, how='any', inplace= True)

for i in range(1,len(featureNames)):
    if featureNames[i] == 'timeStamp': # Timestamp is not normalized
        pass
    else:
        data[featureNames[i]] = feature_normalize(data[featureNames[i]]) 


In [57]:
# Useful Constants

# Those are separate normalised input features for the neural network
INPUT_SIGNAL_TYPES = list(data.columns)
INPUT_SIGNAL_TYPES = INPUT_SIGNAL_TYPES.remove('Activity')

# Output classes to learn how to classify
LABELS = [
    "Approaching",
    "Interacting",
    "Leaving",
    "Uninterested"
] 

In [58]:
train_size = int(len(data) * 0.63)
train, test = data[0:train_size], data[train_size:len(data)]
print('Observations: %d' % (len(data)))
print('Training Observations: %d' % (len(train)))
print('Testing Observations: %d' % (len(test)))

Observations: 19577
Training Observations: 12333
Testing Observations: 7244


In [59]:
X_train = train.drop('Activity', axis = 1)
y_train = train.Activity
# y_train = pd.get_dummies(train.Activity)
X_test = test.drop('Activity', axis = 1)
y_test = test.Activity
# y_test = pd.get_dummies(test.Activity)

TypeError: 'numpy.ndarray' object is not callable

In [60]:
def one_hot_encode(y_):
    values = np.array(y_test)
    label_encoder = LabelEncoder()
    integer_encoded = label_encoder.fit_transform(values)
#     print(integer_encoded)
    # binary encode
    onehot_encoder = OneHotEncoder(sparse=False)
    integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
    return onehot_encoder.fit_transform(integer_encoded)
#     print(onehot_encoded)

In [63]:
encodedYTest = one_hot_encode(y_test)

In [64]:
encodedYTest.shape

(7244, 4)

In [30]:
windowSize = 100
numOfRowsToDrop = X_train.shape[0] % windowSize
X_train = X_train[:-numOfRowsToDrop]
y_train = y_train[:-numOfRowsToDrop]
X_test = X_test[:-numOfRowsToDrop]
y_test = y_test[:-numOfRowsToDrop]

# Get data values only
X_train = X_train.values()
X_test = X_test.values()

# Reshape input 
nCut = int(Xtrain.shape[0] / windowSize)
X_train = np.array(np.split(X_trainMatrix, pCut, axis=0))
print(X_train.shape)
X_test = np.array(np.split(X_trainMatrix, pCut, axis=0))
print(X_test.shape)


In [32]:
X_trainMatrix = X_train.as_matrix()

In [33]:
pCut = int(X_trainMatrix.shape[0] / 100)  

In [34]:
X_trainMatrix = np.array(np.split(X_trainMatrix, pCut, axis=0))

In [40]:
len(X_trainMatrix)

76

## Chevalier LSTM Model

In [41]:
# Input Data 

training_data_count = len(X_train)  # 7352 training series (with 50% overlap between each serie)
test_data_count = len(X_test)  # 2947 testing series
n_steps = len(X_train[0])  # 128 timesteps per series
n_input = len(X_train[0][0])  # 9 input parameters per timestep


# LSTM Neural Network's internal structure

n_hidden = 32 # Hidden layer num of features
n_classes = 6 # Total classes (should go up, or should go down)


# Training 

learning_rate = 0.0025
lambda_loss_amount = 0.0015
training_iters = training_data_count * 300  # Loop 300 times on the dataset
batch_size = 1500
display_iter = 30000  # To show test set accuracy during training


# Some debugging info

print("Some useful info to get an insight on dataset's shape and normalisation:")
print("(X shape, y shape, every X's mean, every X's standard deviation)")
print(X_test.shape, y_test.shape, np.mean(X_test), np.std(X_test))
print("The dataset is therefore properly normalised, as expected, but not yet one-hot encoded.")

KeyError: 0