In [1]:
import os
import pandas as pd
import math

def magnitude(X, Y, Z):
    return math.sqrt(float(X**2) + float(Y**2) + float(Z**2))


path = 'gestures-dataset'

dataset = None

for subject in os.listdir(path):
    if os.path.isfile(os.path.join(path, subject)):
        continue
    if subject in ('U01', 'U02', 'U03', 'U04', 'U05', 'U06', 'U07', 'U08'):
        for gesture in os.listdir(os.path.join(path, subject)):
            if os.path.isfile(os.path.join(path, subject, gesture)):
                continue
            gesture = str(gesture)
            for samplefile in os.listdir(os.path.join(path, subject, gesture)):
                if os.path.isfile(os.path.join(path, subject, gesture, samplefile)):
                    df = pd.read_csv(os.path.join(path, subject, gesture, samplefile), \
                        sep = ' ', \
                        names = ['System.currentTimeMillis()', \
                        'System.nanoTime()', \
                        'sample.timestamp', \
                        'X', \
                        'Y', \
                        'Z' \
                        ])
                    df = df[["sample.timestamp", "X", "Y", "Z"]]
                                        
                    start = df["sample.timestamp"][0]
                    df["sample.timestamp"] -= start
                    df["sample.timestamp"] /= 10000000
                    df["subject"] = subject
                    df["gesture"] = gesture
                    df["sample"] = str(samplefile[:-4])
                    #print(df)
                    if dataset is None:
                        dataset = df.copy()
                    else:
                        dataset = pd.concat([dataset, df])

dataset = dataset.sort_values(by=['gesture','subject','sample','sample.timestamp'])
data = dataset
#print(dataset)
print(dataset.head(10))
print(dataset.tail(10))

                    
            
            

   sample.timestamp         X         Y          Z subject gesture sample
0               0.0  1.532289 -0.919373  10.113108     U01      01     01
1              11.0  0.919373 -0.919373   9.959879     U01      01     01
2              22.0  1.838747 -0.153229  10.726024     U01      01     01
3              33.0  5.822699  3.371036  10.879252     U01      01     01
4              44.0  6.435614  1.532289   9.193734     U01      01     01
5              55.0  2.758120 -7.967903   9.193734     U01      01     01
6              66.0  0.612916 -6.588843   8.887277     U01      01     01
7              77.0  0.000000 -2.145205   9.653421     U01      01     01
8              88.0 -0.153229 -1.685518   9.653421     U01      01     01
9              99.0  0.306458 -0.306458   9.959879     U01      01     01
    sample.timestamp         X         Y         Z subject gesture sample
10             110.0 -1.991976 -8.887277  7.201759     U08      20     20
11             121.0 -0.459687 -2.9113

In [2]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
dataset_scaled = None

for i, gesture in enumerate(['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20']):
    for j, subject in enumerate(['U01', 'U02', 'U03', 'U04', 'U05', 'U06', 'U07', 'U08']):
        for k, sample in enumerate(['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20']):
                    
            df = dataset[dataset['gesture']==gesture]
            df = df[df['subject']==subject]
            df = df[df['sample']==sample]
            df.sort_values(by=['sample.timestamp'])

            sc = scaler
            sc = sc.fit_transform(df[["X", "Y", "Z"]])
            sc = pd.DataFrame(data=sc, columns=["X", "Y", "Z"])
            df["X"] = sc["X"]
            df["Y"] = sc["Y"]
            df["Z"] = sc["Z"]
            #df["magnitude"] = sc.apply(lambda row: magnitude(row['X'], row['Y'], row['Z']), axis=1)
            if dataset_scaled is None:
                dataset_scaled = df.copy()
            else:
                dataset_scaled = pd.concat([dataset_scaled, df])
                
#print(dataset_scaled)
data = dataset_scaled
print(dataset_scaled.head(10))
print(dataset_scaled.tail(10))



   sample.timestamp         X         Y         Z subject gesture sample
0               0.0  0.046662  0.178172  0.383203     U01      01     01
1              11.0 -0.243681  0.178172  0.139347     U01      01     01
2              22.0  0.191834  0.455329  1.358630     U01      01     01
3              33.0  2.079060  1.730249  1.602485     U01      01     01
4              44.0  2.369403  1.065073 -1.079937     U01      01     01
5              55.0  0.627347 -2.371668 -1.079937     U01      01     01
6              66.0 -0.388851 -1.872786 -1.567648     U01      01     01
7              77.0 -0.679194 -0.265279 -0.348367     U01      01     01
8              88.0 -0.751780 -0.098985 -0.348367     U01      01     01
9              99.0 -0.534023  0.399897  0.139347     U01      01     01
    sample.timestamp         X         Y         Z subject gesture sample
10             110.0 -0.487195 -1.474219 -1.639716     U08      20     20
11             121.0 -0.109525 -0.296052  0.43587

In [3]:
import numpy as np

dataset_cleaned = None
dataset_outliers = None

for i, gesture in enumerate(data['gesture'].unique()):
    df_gesture = data[data['gesture']==gesture]
    for j, subject in enumerate(df_gesture['subject'].unique()):
        df_subject = df_gesture[df_gesture['subject']==subject]
        
        time_mean = df_subject.groupby(["gesture","subject", "sample"]).count().groupby(["gesture","subject"]).agg({'sample.timestamp': ['mean']})
        time_std = df_subject.groupby(["gesture","subject", "sample"]).count().groupby(["gesture","subject"]).agg({'sample.timestamp': ['std']})
        time_max = time_mean['sample.timestamp'].iloc[0]['mean'] + 1.0 * time_std['sample.timestamp'].iloc[0]['std']
        #print(time_max)
        time_min = time_mean['sample.timestamp'].iloc[0]['mean'] - 1.0 * time_std['sample.timestamp'].iloc[0]['std']
        #print(time_min)
        for i, sample in enumerate(df_subject['sample'].unique()):
            df_sample = df_subject[df_subject['sample']==sample]
            df_sample_count = df_sample.count()['sample.timestamp']
            #print(df_sample_count)
            if df_sample_count < time_min or df_sample_count > time_max:
                if dataset_outliers is None:
                    dataset_outliers = df_sample.copy()
                else:
                    dataset_outliers = pd.concat([dataset_outliers, df_sample])
                #Delete same from training set
                df_subject = df_subject[df_subject['sample'] != sample]
                
        if dataset_cleaned is None:
            dataset_cleaned = df_subject.copy()
        else:
            dataset_cleaned = pd.concat([dataset_cleaned, df_subject])

data = dataset_cleaned
print(dataset_cleaned.head(10))
print(dataset_cleaned.tail(10))


   sample.timestamp         X         Y         Z subject gesture sample
0               0.0  0.046662  0.178172  0.383203     U01      01     01
1              11.0 -0.243681  0.178172  0.139347     U01      01     01
2              22.0  0.191834  0.455329  1.358630     U01      01     01
3              33.0  2.079060  1.730249  1.602485     U01      01     01
4              44.0  2.369403  1.065073 -1.079937     U01      01     01
5              55.0  0.627347 -2.371668 -1.079937     U01      01     01
6              66.0 -0.388851 -1.872786 -1.567648     U01      01     01
7              77.0 -0.679194 -0.265279 -0.348367     U01      01     01
8              88.0 -0.751780 -0.098985 -0.348367     U01      01     01
9              99.0 -0.534023  0.399897  0.139347     U01      01     01
    sample.timestamp         X         Y         Z subject gesture sample
10             110.0 -0.487195 -1.474219 -1.639716     U08      20     20
11             121.0 -0.109525 -0.296052  0.43587

In [4]:
import numpy as np

dataset_timecut = None

for i, gesture in enumerate(data['gesture'].unique()):
    df_gesture = data[data['gesture']==gesture]
    for j, subject in enumerate(df_gesture['subject'].unique()):
        df_subject = df_gesture[df_gesture['subject']==subject] 
        time_max = 19 # 18 * 11 = 198
        for i, sample in enumerate(df_subject['sample'].unique()):
            df_sample = df_subject[df_subject['sample']==sample]
            df_sample_count = df_sample.count()['sample.timestamp']
            #print(df_sample_count)
            if df_sample_count >= time_max:
                df_sample = df_sample[df_sample['sample.timestamp'] <= (11 * (time_max-1))]
                df_sample_count = df_sample.count()['sample.timestamp']
                #print(df_sample_count)
            elif df_sample_count < time_max:
                for tmp in range(df_sample_count * 11, (time_max) * 11, 11):
                    df = pd.DataFrame([[tmp, 0.0, 0.0, 0.0, gesture, subject, sample]], columns=['sample.timestamp', 'X', 'Y', 'Z', 'gesture', 'subject', 'sample'])
                    df_sample = df_sample.append(df, ignore_index=True)            
            #print(df_sample)
            df_sample_count = df_sample.count()['sample.timestamp']
            #print(df_sample_count)
            if df_sample_count != time_max:
                continue
            if dataset_timecut is None:
                dataset_timecut = df_sample.copy()
            else:
                dataset_timecut = pd.concat([dataset_timecut, df_sample])

data = dataset_timecut
print(dataset_timecut.head(10))
print(dataset_timecut.tail(10))

   sample.timestamp         X         Y         Z subject gesture sample
0               0.0  0.046662  0.178172  0.383203     U01      01     01
1              11.0 -0.243681  0.178172  0.139347     U01      01     01
2              22.0  0.191834  0.455329  1.358630     U01      01     01
3              33.0  2.079060  1.730249  1.602485     U01      01     01
4              44.0  2.369403  1.065073 -1.079937     U01      01     01
5              55.0  0.627347 -2.371668 -1.079937     U01      01     01
6              66.0 -0.388851 -1.872786 -1.567648     U01      01     01
7              77.0 -0.679194 -0.265279 -0.348367     U01      01     01
8              88.0 -0.751780 -0.098985 -0.348367     U01      01     01
9              99.0 -0.534023  0.399897  0.139347     U01      01     01
    sample.timestamp         X         Y         Z subject gesture sample
9               99.0 -0.487195 -1.927360  0.435874     U08      20     20
10             110.0 -0.487195 -1.474219 -1.63971

In [5]:
from keras.models import Sequential
from keras.layers import Bidirectional
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import Dropout
from keras.optimizers import adam_v2
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import StratifiedGroupKFold
from sklearn.model_selection import cross_validate
from sklearn.model_selection import GridSearchCV
from keras.utils import np_utils
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
import numpy


# fix random seed for reproducibility
seed = 1000
numpy.random.seed(seed)
# create the dataset
def get_dataset():
    X_train = []
    Y_train = []
    groups = []
    for i, gesture in enumerate(data['gesture'].unique()):
        df_gesture = data[data['gesture']==gesture]
        for j, subject in enumerate(df_gesture['subject'].unique()):
            df_subject = df_gesture[df_gesture['subject']==subject]
            for k, sample in enumerate(df_subject['sample'].unique()):
                df_sample = df_subject[df_subject['sample']==sample]
                accel_vector = []
                for index, row in df_sample.sort_values(by='sample.timestamp').iterrows():
                    accel_vector.append([row['X'],row['Y'],row['Z']])
                accel_vector = np.asarray(accel_vector)
                X_train.append(accel_vector)
                Y_train.append(gesture)
                groups.append(subject)
    X_train = np.asarray(X_train)
    Y_train = LabelEncoder().fit_transform(Y_train)
    #print(Y_train)
    return X_train, Y_train, groups

# Function to create model, required for KerasClassifier
def create_model(epochs=128, dropout_rate=0.8, units=128):
    model = Sequential()
    model.add(
        Bidirectional(
              LSTM(
                units=units, 
                input_shape=[19, 3]
            )
        )
    )
    model.add(Dropout(rate=dropout_rate))
    model.add(Dense(units=units, activation='relu'))
    model.add(Dense(20, activation='softmax'))
    model.compile(loss='sparse_categorical_crossentropy', optimizer=adam_v2.Adam(learning_rate=0.001), metrics=['accuracy'])

    return model

model = KerasClassifier(build_fn=create_model, epochs=128, batch_size=19)
cv = StratifiedGroupKFold(n_splits=5, shuffle=True, random_state=1000)

# get the dataset
X, y, g = get_dataset()
cv = cv.split(X, y, g)

results = cross_validate(model, X=X, y=y, groups=g, scoring=('accuracy'), cv=cv, verbose=1, return_train_score=True, return_estimator=True)


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Epoch 1/128
Epoch 2/128
Epoch 3/128
Epoch 4/128
Epoch 5/128
Epoch 6/128
Epoch 7/128
Epoch 8/128
Epoch 9/128
Epoch 10/128
Epoch 11/128
Epoch 12/128
Epoch 13/128
Epoch 14/128
Epoch 15/128
Epoch 16/128
Epoch 17/128
Epoch 18/128
Epoch 19/128
Epoch 20/128
Epoch 21/128
Epoch 22/128
Epoch 23/128
Epoch 24/128
Epoch 25/128
Epoch 26/128
Epoch 27/128
Epoch 28/128
Epoch 29/128
Epoch 30/128
Epoch 31/128
Epoch 32/128
Epoch 33/128
Epoch 34/128
Epoch 35/128
Epoch 36/128
Epoch 37/128
Epoch 38/128
Epoch 39/128
Epoch 40/128
Epoch 41/128
Epoch 42/128
Epoch 43/128
Epoch 44/128
Epoch 45/128
Epoch 46/128
Epoch 47/128
Epoch 48/128
Epoch 49/128
Epoch 50/128
Epoch 51/128
Epoch 52/128
Epoch 53/128
Epoch 54/128
Epoch 55/128
Epoch 56/128
Epoch 57/128
Epoch 58/128
Epoch 59/128
Epoch 60/128
Epoch 61/128
Epoch 62/128
Epoch 63/128
Epoch 64/128
Epoch 65/128
Epoch 66/128
Epoch 67/128
Epoch 68/128
Epoch 69/128
Epoch 70/128
Epoch 71/128
Epoch 72/128
Epoch 73/128
Epoch 74/128
Epoch 75/128
Epoch 76/128
Epoch 77/128
Epoch 78

[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:  8.8min finished


In [6]:
print(results)

{'fit_time': array([100.9526999 , 112.05299973, 119.66807127, 101.80736375,
        92.74166822]), 'score_time': array([0.45850015, 0.44400072, 0.58249784, 0.43350029, 0.6674974 ]), 'estimator': [<keras.wrappers.scikit_learn.KerasClassifier object at 0x000001E4054F5040>, <keras.wrappers.scikit_learn.KerasClassifier object at 0x000001E4054F50A0>, <keras.wrappers.scikit_learn.KerasClassifier object at 0x000001E4054F5130>, <keras.wrappers.scikit_learn.KerasClassifier object at 0x000001E4054F51F0>, <keras.wrappers.scikit_learn.KerasClassifier object at 0x000001E4054BFE80>], 'test_score': array([0.95909091, 0.85144928, 0.97841727, 0.92047377, 0.93445378]), 'train_score': array([0.99770115, 0.99952919, 0.99952875, 0.99944721, 1.        ])}


In [7]:
data = dataset_outliers

In [8]:
import numpy as np

dataset_timecut = None

for i, gesture in enumerate(data['gesture'].unique()):
    df_gesture = data[data['gesture']==gesture]
    for j, subject in enumerate(df_gesture['subject'].unique()):
        df_subject = df_gesture[df_gesture['subject']==subject] 
        time_max = 19 # 18 * 11 = 198
        for i, sample in enumerate(df_subject['sample'].unique()):
            df_sample = df_subject[df_subject['sample']==sample]
            df_sample_count = df_sample.count()['sample.timestamp']
            #print(df_sample_count)
            if df_sample_count >= time_max:
                df_sample = df_sample[df_sample['sample.timestamp'] <= (11 * (time_max-1))]
                df_sample_count = df_sample.count()['sample.timestamp']
                #print(df_sample_count)
            elif df_sample_count < time_max:
                for tmp in range(df_sample_count * 11, (time_max) * 11, 11):
                    df = pd.DataFrame([[tmp, 0.0, 0.0, 0.0, gesture, subject, sample]], columns=['sample.timestamp', 'X', 'Y', 'Z', 'gesture', 'subject', 'sample'])
                    df_sample = df_sample.append(df, ignore_index=True)            
            #print(df_sample)
            df_sample_count = df_sample.count()['sample.timestamp']
            #print(df_sample_count)
            if df_sample_count != time_max:
                continue
            if dataset_timecut is None:
                dataset_timecut = df_sample.copy()
            else:
                dataset_timecut = pd.concat([dataset_timecut, df_sample])

data = dataset_timecut
print(dataset_timecut.head(10))
print(dataset_timecut.tail(10))

   sample.timestamp         X         Y         Z subject gesture sample
0               0.0  0.476846  0.311937  0.992948     U01      01     07
1              11.0  0.315910  0.607456  0.046727     U01      01     07
2              22.0  1.844796  1.198494  0.677541     U01      01     07
3              33.0  2.569006  2.380571  0.362134     U01      01     07
4              44.0  2.488538  1.050734  0.992948     U01      01     07
5              55.0  2.005732 -2.717134 -0.899493     U01      01     07
6              66.0 -0.247364 -2.421615 -3.422747     U01      01     07
7              77.0 -1.052040 -0.352981 -0.899493     U01      01     07
8              88.0 -0.649702  0.016418  0.677541     U01      01     07
9              99.0 -0.649702  0.311937  0.362134     U01      01     07
    sample.timestamp         X         Y         Z subject gesture sample
9               99.0  0.101073  0.354159  0.170351     U08      20     18
10             110.0 -0.020215  1.151017 -1.82274

In [9]:
# fix random seed for reproducibility
seed = 1000
numpy.random.seed(seed)
# create the dataset
def get_dataset():
    X_train = []
    Y_train = []
    groups = []
    for i, gesture in enumerate(data['gesture'].unique()):
        df_gesture = data[data['gesture']==gesture]
        for j, subject in enumerate(df_gesture['subject'].unique()):
            df_subject = df_gesture[df_gesture['subject']==subject]
            for k, sample in enumerate(df_subject['sample'].unique()):
                df_sample = df_subject[df_subject['sample']==sample]
                accel_vector = []
                for index, row in df_sample.sort_values(by='sample.timestamp').iterrows():
                    accel_vector.append([row['X'],row['Y'],row['Z']])
                accel_vector = np.asarray(accel_vector)
                X_train.append(accel_vector)
                Y_train.append(gesture)
                groups.append(subject)
    X_train = np.asarray(X_train)
    Y_train = LabelEncoder().fit_transform(Y_train)
    #print(Y_train)
    return X_train, Y_train, groups

X_test, y_test, g = get_dataset()
y_predicted = []
y_predicted.append(results['estimator'][0].predict(X_test))
y_predicted.append(results['estimator'][1].predict(X_test))
y_predicted.append(results['estimator'][2].predict(X_test))
y_predicted.append(results['estimator'][3].predict(X_test))
y_predicted.append(results['estimator'][4].predict(X_test))



In [10]:
from sklearn.metrics import classification_report, confusion_matrix
#Print Classification Report
print('Classification Report')
print(classification_report(y_test, y_predicted[0]))

print('Classification Report')
print(classification_report(y_test, y_predicted[1]))

print('Classification Report')
print(classification_report(y_test, y_predicted[2]))

print('Classification Report')
print(classification_report(y_test, y_predicted[3]))

print('Classification Report')
print(classification_report(y_test, y_predicted[4]))



Classification Report
              precision    recall  f1-score   support

           0       0.95      0.97      0.96        39
           1       0.97      0.94      0.96        34
           2       0.97      0.93      0.95        41
           3       1.00      0.95      0.97        38
           4       0.96      1.00      0.98        43
           5       0.96      1.00      0.98        48
           6       1.00      0.88      0.94        41
           7       1.00      0.95      0.98        42
           8       0.94      0.97      0.95        32
           9       0.93      1.00      0.96        38
          10       0.95      0.98      0.96        42
          11       1.00      1.00      1.00        39
          12       0.93      1.00      0.96        26
          13       1.00      0.93      0.96        41
          14       0.95      0.95      0.95        44
          15       0.96      0.98      0.97        44
          16       1.00      1.00      1.00        44
     