In [1]:
import os
import pandas as pd
import math

def magnitude(X, Y, Z):
    return math.sqrt(float(X**2) + float(Y**2) + float(Z**2))


path = 'gestures-dataset'

dataset = None

for subject in os.listdir(path):
    if os.path.isfile(os.path.join(path, subject)):
        continue
    if subject in ('U01', 'U02', 'U03', 'U04', 'U05', 'U06', 'U07', 'U08'):
        for gesture in os.listdir(os.path.join(path, subject)):
            if os.path.isfile(os.path.join(path, subject, gesture)):
                continue
            gesture = str(gesture)
            for samplefile in os.listdir(os.path.join(path, subject, gesture)):
                if os.path.isfile(os.path.join(path, subject, gesture, samplefile)):
                    df = pd.read_csv(os.path.join(path, subject, gesture, samplefile), \
                        sep = ' ', \
                        names = ['System.currentTimeMillis()', \
                        'System.nanoTime()', \
                        'sample.timestamp', \
                        'X', \
                        'Y', \
                        'Z' \
                        ])
                    df = df[["sample.timestamp", "X", "Y", "Z"]]
                                        
                    start = df["sample.timestamp"][0]
                    df["sample.timestamp"] -= start
                    df["sample.timestamp"] /= 10000000
                    df["subject"] = subject
                    df["gesture"] = gesture
                    df["sample"] = str(samplefile[:-4])
                    #print(df)
                    if dataset is None:
                        dataset = df.copy()
                    else:
                        dataset = pd.concat([dataset, df])

dataset = dataset.sort_values(by=['gesture','subject','sample','sample.timestamp'])
data = dataset
#print(dataset)
print(dataset.head(10))
print(dataset.tail(10))

                    
            
            

   sample.timestamp         X         Y          Z subject gesture sample
0               0.0  1.532289 -0.919373  10.113108     U01      01     01
1              11.0  0.919373 -0.919373   9.959879     U01      01     01
2              22.0  1.838747 -0.153229  10.726024     U01      01     01
3              33.0  5.822699  3.371036  10.879252     U01      01     01
4              44.0  6.435614  1.532289   9.193734     U01      01     01
5              55.0  2.758120 -7.967903   9.193734     U01      01     01
6              66.0  0.612916 -6.588843   8.887277     U01      01     01
7              77.0  0.000000 -2.145205   9.653421     U01      01     01
8              88.0 -0.153229 -1.685518   9.653421     U01      01     01
9              99.0  0.306458 -0.306458   9.959879     U01      01     01
    sample.timestamp         X         Y         Z subject gesture sample
10             110.0 -1.991976 -8.887277  7.201759     U08      20     20
11             121.0 -0.459687 -2.9113

In [2]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
dataset_scaled = None

for i, gesture in enumerate(['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20']):
    for j, subject in enumerate(['U01', 'U02', 'U03', 'U04', 'U05', 'U06', 'U07', 'U08']):
        for k, sample in enumerate(['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20']):
                    
            df = dataset[dataset['gesture']==gesture]
            df = df[df['subject']==subject]
            df = df[df['sample']==sample]
            df.sort_values(by=['sample.timestamp'])

            sc = scaler
            sc = sc.fit_transform(df[["X", "Y", "Z"]])
            sc = pd.DataFrame(data=sc, columns=["X", "Y", "Z"])
            df["X"] = sc["X"]
            df["Y"] = sc["Y"]
            df["Z"] = sc["Z"]
            #df["magnitude"] = sc.apply(lambda row: magnitude(row['X'], row['Y'], row['Z']), axis=1)
            if dataset_scaled is None:
                dataset_scaled = df.copy()
            else:
                dataset_scaled = pd.concat([dataset_scaled, df])
                
#print(dataset_scaled)
data = dataset_scaled
print(dataset_scaled.head(10))
print(dataset_scaled.tail(10))



   sample.timestamp         X         Y         Z subject gesture sample
0               0.0  0.046662  0.178172  0.383203     U01      01     01
1              11.0 -0.243681  0.178172  0.139347     U01      01     01
2              22.0  0.191834  0.455329  1.358630     U01      01     01
3              33.0  2.079060  1.730249  1.602485     U01      01     01
4              44.0  2.369403  1.065073 -1.079937     U01      01     01
5              55.0  0.627347 -2.371668 -1.079937     U01      01     01
6              66.0 -0.388851 -1.872786 -1.567648     U01      01     01
7              77.0 -0.679194 -0.265279 -0.348367     U01      01     01
8              88.0 -0.751780 -0.098985 -0.348367     U01      01     01
9              99.0 -0.534023  0.399897  0.139347     U01      01     01
    sample.timestamp         X         Y         Z subject gesture sample
10             110.0 -0.487195 -1.474219 -1.639716     U08      20     20
11             121.0 -0.109525 -0.296052  0.43587

In [3]:
import numpy as np

dataset_cleaned = None

for i, gesture in enumerate(dataset_scaled['gesture'].unique()):
    df_gesture = dataset_scaled[dataset_scaled['gesture']==gesture]
    for j, subject in enumerate(df_gesture['subject'].unique()):
        df_subject = df_gesture[df_gesture['subject']==subject]
        
        time_mean = df_subject.groupby(["gesture","subject", "sample"]).count().groupby(["gesture","subject"]).agg({'sample.timestamp': ['mean']})
        time_std = df_subject.groupby(["gesture","subject", "sample"]).count().groupby(["gesture","subject"]).agg({'sample.timestamp': ['std']})
        time_max = time_mean['sample.timestamp'].iloc[0]['mean'] + 1.0 * time_std['sample.timestamp'].iloc[0]['std']
        #print(time_max)
        time_min = time_mean['sample.timestamp'].iloc[0]['mean'] - 1.0 * time_std['sample.timestamp'].iloc[0]['std']
        #print(time_min)
        for i, sample in enumerate(df_subject['sample'].unique()):
            df_sample_count = df_subject[df_subject['sample']==sample].count()['sample.timestamp']
            #print(df_sample_count)
            if df_sample_count < time_min or df_sample_count > time_max:
                df_subject = df_subject[df_subject['sample'] != sample]
                
        if dataset_cleaned is None:
            dataset_cleaned = df_subject.copy()
        else:
            dataset_cleaned = pd.concat([dataset_cleaned, df_subject])

data = dataset_cleaned
print(dataset_cleaned.head(10))
print(dataset_cleaned.tail(10))


   sample.timestamp         X         Y         Z subject gesture sample
0               0.0  0.046662  0.178172  0.383203     U01      01     01
1              11.0 -0.243681  0.178172  0.139347     U01      01     01
2              22.0  0.191834  0.455329  1.358630     U01      01     01
3              33.0  2.079060  1.730249  1.602485     U01      01     01
4              44.0  2.369403  1.065073 -1.079937     U01      01     01
5              55.0  0.627347 -2.371668 -1.079937     U01      01     01
6              66.0 -0.388851 -1.872786 -1.567648     U01      01     01
7              77.0 -0.679194 -0.265279 -0.348367     U01      01     01
8              88.0 -0.751780 -0.098985 -0.348367     U01      01     01
9              99.0 -0.534023  0.399897  0.139347     U01      01     01
    sample.timestamp         X         Y         Z subject gesture sample
10             110.0 -0.487195 -1.474219 -1.639716     U08      20     20
11             121.0 -0.109525 -0.296052  0.43587

In [4]:
import numpy as np

dataset_timecut = None

for i, gesture in enumerate(dataset_cleaned['gesture'].unique()):
    df_gesture = dataset_cleaned[dataset_cleaned['gesture']==gesture]
    for j, subject in enumerate(df_gesture['subject'].unique()):
        df_subject = df_gesture[df_gesture['subject']==subject] 
        time_max = 19 # 18 * 11 = 198
        for i, sample in enumerate(df_subject['sample'].unique()):
            df_sample = df_subject[df_subject['sample']==sample]
            df_sample_count = df_sample.count()['sample.timestamp']
            #print(df_sample_count)
            if df_sample_count >= time_max:
                df_sample = df_sample[df_sample['sample.timestamp'] <= (11 * (time_max-1))]
                df_sample_count = df_sample.count()['sample.timestamp']
                #print(df_sample_count)
            elif df_sample_count < time_max:
                for tmp in range(df_sample_count * 11, (time_max) * 11, 11):
                    df = pd.DataFrame([[tmp, 0.0, 0.0, 0.0, gesture, subject, sample]], columns=['sample.timestamp', 'X', 'Y', 'Z', 'gesture', 'subject', 'sample'])
                    df_sample = df_sample.append(df, ignore_index=True)            
            #print(df_sample)
            df_sample_count = df_sample.count()['sample.timestamp']
            #print(df_sample_count)
            if df_sample_count != time_max:
                continue
            if dataset_timecut is None:
                dataset_timecut = df_sample.copy()
            else:
                dataset_timecut = pd.concat([dataset_timecut, df_sample])

data = dataset_timecut
print(dataset_timecut.head(10))
print(dataset_timecut.tail(10))

   sample.timestamp         X         Y         Z subject gesture sample
0               0.0  0.046662  0.178172  0.383203     U01      01     01
1              11.0 -0.243681  0.178172  0.139347     U01      01     01
2              22.0  0.191834  0.455329  1.358630     U01      01     01
3              33.0  2.079060  1.730249  1.602485     U01      01     01
4              44.0  2.369403  1.065073 -1.079937     U01      01     01
5              55.0  0.627347 -2.371668 -1.079937     U01      01     01
6              66.0 -0.388851 -1.872786 -1.567648     U01      01     01
7              77.0 -0.679194 -0.265279 -0.348367     U01      01     01
8              88.0 -0.751780 -0.098985 -0.348367     U01      01     01
9              99.0 -0.534023  0.399897  0.139347     U01      01     01
    sample.timestamp         X         Y         Z subject gesture sample
9               99.0 -0.487195 -1.927360  0.435874     U08      20     20
10             110.0 -0.487195 -1.474219 -1.63971

In [5]:
import numpy as np
import math

features_sample = None

for i, gesture in enumerate(dataset_timecut['gesture'].unique()):
    df_gesture = dataset_timecut[dataset_timecut['gesture']==gesture]
    for j, subject in enumerate(df_gesture['subject'].unique()):
        df_subject = df_gesture[df_gesture['subject']==subject]
        for k, sample in enumerate(df_subject['sample'].unique()):
            df_sample = df_subject[df_subject['sample']==sample]
            df_sample.sort_values(by=['sample.timestamp'])

            df_feature = pd.DataFrame(columns = ["gesture","subject", "sample"])
            df_feature = df_feature.append({'gesture' :gesture, 'subject' : subject, 'sample' : sample, \
                                        'meanx': df_sample["X"].mean(), 'meany': df_sample["Y"].mean(), 'meanz': df_sample["Z"].mean(), \
                                        'stdx': df_sample["X"].std(), 'stdy': df_sample["Y"].std(), 'stdz': df_sample["Z"].std(), \
                                        'madx': df_sample["X"].mad(), 'mady': df_sample["Y"].mad(), 'madz': df_sample["Z"].mad(), \
                                        'semx': df_sample["X"].sem(), 'semy': df_sample["Y"].sem(), 'semz': df_sample["Z"].sem(), \
                                        'kurtx': df_sample["X"].kurt(), 'kurty': df_sample["Y"].kurt(), 'kurtz': df_sample["Z"].kurt(), \
                                        'skewx': df_sample["X"].skew(), 'skewy': df_sample["Y"].skew(), 'skewz': df_sample["Z"].skew(), \
                                        'corr9x': df_sample["X"].autocorr(9), 'corr9y': df_sample["Y"].autocorr(9), 'corr9z': df_sample["Z"].autocorr(9), \
                                        'corr6x': df_sample["X"].autocorr(6), 'corr9y': df_sample["Y"].autocorr(6), 'corr9z': df_sample["Z"].autocorr(6), \
                                        'corr3x': df_sample["X"].autocorr(3), 'corr9y': df_sample["Y"].autocorr(3), 'corr9z': df_sample["Z"].autocorr(3), \
                                        'corr2x': df_sample["X"].autocorr(2), 'corr9y': df_sample["Y"].autocorr(2), 'corr9z': df_sample["Z"].autocorr(2), \
                                       }, \
                                       ignore_index=True)
            if features_sample is None:
                features_sample = df_feature.copy()
            else:
                features_sample = pd.concat([features_sample, df_feature], ignore_index=True)

print(features_sample.head(10))
print(features_sample.tail(10))


  gesture subject sample    corr2x    corr3x    corr6x    corr9x    corr9y  \
0      01     U01     01  0.160966 -0.079454 -0.480912 -0.643806 -0.324033   
1      01     U01     02  0.315309  0.012690 -0.132506 -0.596610 -0.084808   
2      01     U01     03  0.218859 -0.130593 -0.504833 -0.462442 -0.010936   
3      01     U01     04  0.388717  0.070343 -0.527360 -0.797602  0.028793   
4      01     U01     05  0.394696  0.135435 -0.191448 -0.709376  0.139838   
5      01     U01     06  0.288933 -0.111583 -0.543804 -0.531821 -0.044989   
6      01     U01     08  0.294426  0.037376 -0.207872 -0.592945 -0.025232   
7      01     U01     09  0.329980  0.010261 -0.577985 -0.561376 -0.199393   
8      01     U01     10  0.445986  0.197228 -0.382248 -0.533073  0.143473   
9      01     U01     11  0.287218 -0.083508 -0.701710 -0.408478 -0.283030   

     corr9z     kurtx  ...         meanz      semx      semy      semz  \
0  0.068059  3.153300  ...  1.402387e-16  0.202326  0.202326  0.202

In [6]:
from keras.models import Sequential
from keras.layers import Bidirectional
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import Dropout
from keras.optimizers import adam_v2
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import StratifiedGroupKFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from keras.utils import np_utils
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
import numpy
 
# fix random seed for reproducibility
seed = 1000
numpy.random.seed(seed)
# create the dataset
def get_dataset():
	X_train = []
	Y_train = []
	groups = []
	for i, gesture in enumerate(dataset_timecut['gesture'].unique()):
		df_gesture = dataset_timecut[dataset_timecut['gesture']==gesture]
		for j, subject in enumerate(df_gesture['subject'].unique()):
			df_subject = df_gesture[df_gesture['subject']==subject]
			for k, sample in enumerate(df_subject['sample'].unique()):
				df_sample = df_subject[df_subject['sample']==sample]
				accel_vector = []
				for index, row in df_sample.sort_values(by='sample.timestamp').iterrows():
					accel_vector.append([row['X'],row['Y'],row['Z']])
				accel_vector = np.asarray(accel_vector)
				X_train.append(accel_vector)
				Y_train.append(gesture)
				groups.append(subject)
	X_train = np.asarray(X_train)
	Y_train = LabelEncoder().fit_transform(Y_train)
	print(Y_train)
	return X_train, Y_train, groups

# Function to create model, required for KerasClassifier
def create_model(epochs=32, dropout_rate=0.2, units=32, activation='relu', optimizer=adam_v2.Adam(learning_rate=0.001)):
	model = Sequential()
	model.add(
    	Bidirectional(
      		LSTM(
        		units=units, 
        		input_shape=[19, 3]
			)
		)
    )
	model.add(Dropout(rate=dropout_rate))
	model.add(Dense(units=units, activation=activation))
	model.add(Dense(20, activation='softmax'))
	model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])	

	return model

model = KerasClassifier(build_fn=create_model, verbose=0)
cv = StratifiedGroupKFold(n_splits=5, shuffle=True, random_state=1000)

# get the dataset
X, y, g = get_dataset()
cv = cv.split(X, y, g)

print(X.shape)
print(y.shape)

# define the grid search parameters
batch_size = [16, 32, 64, 128, 256]
epochs = [32, 64, 128, 256]
optimizer = ['RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
activation = ['softmax', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear']
dropout_rate = [0.2, 0.5, 0.8]
units = [32, 64, 128, 256]
param_grid = dict(epochs=epochs, batch_size=batch_size, units=units, dropout_rate=dropout_rate, optimizer=optimizer, activation=activation)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1, cv=cv, verbose=3)
grid_result = grid.fit(X, y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))








[ 0  0  0 ... 19 19 19]
(2400, 19, 3)
(2400,)
Fitting 5 folds for each of 11520 candidates, totalling 57600 fits


exception calling callback for <Future at 0x231ebdd6490 state=finished raised TerminatedWorkerError>
Traceback (most recent call last):
  File "c:\dev\repos\CSCE5380\P1\env\lib\site-packages\joblib\externals\loky\_base.py", line 625, in _invoke_callbacks
    callback(self)
  File "c:\dev\repos\CSCE5380\P1\env\lib\site-packages\joblib\parallel.py", line 359, in __call__
    self.parallel.dispatch_next()
  File "c:\dev\repos\CSCE5380\P1\env\lib\site-packages\joblib\parallel.py", line 792, in dispatch_next
    if not self.dispatch_one_batch(self._original_iterator):
  File "c:\dev\repos\CSCE5380\P1\env\lib\site-packages\joblib\parallel.py", line 859, in dispatch_one_batch
    self._dispatch(tasks)
  File "c:\dev\repos\CSCE5380\P1\env\lib\site-packages\joblib\parallel.py", line 777, in _dispatch
    job = self._backend.apply_async(batch, callback=cb)
  File "c:\dev\repos\CSCE5380\P1\env\lib\site-packages\joblib\_parallel_backends.py", line 531, in apply_async
    future = self._workers.sub

TerminatedWorkerError: A worker process managed by the executor was unexpectedly terminated. This could be caused by a segmentation fault while calling the function or by an excessive memory usage causing the Operating System to kill the worker.
