In [93]:
import pandas as pd
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import math
import shutil
from scipy import signal
from sklearn.model_selection import train_test_split

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.optimizers import Adam
from keras.models import load_model
from keras.callbacks import ModelCheckpoint

# num of frames in a window. 50 * 5ms = 250ms
WINDOW_SIZE = 50
FRAME_BEFORE_PEAK = 35

pwd = Path(os.path.dirname(os.path.realpath('__file__')))
data_dir = pwd/'sliced_data'

data_dir

PosixPath('/Users/senyuuri/workspace/gesture-drumkit/Model/sliced_data')

## Load Dataset

In [100]:
dataset = list()
target = list()
label_mapping = {}

label_count = 0
for dir_name in os.listdir(data_dir):
    if os.path.isdir(data_dir/dir_name):
        print('Loading folder %s...' % dir_name)
        # use folder name to classify
        label = dir_name
        
        for f_name in os.listdir(data_dir/dir_name):
            df = pd.read_csv(data_dir/dir_name/f_name)
            # drop index and timestamp
            cols = [0, 5, 6]
            df.drop(df.columns[cols], axis=1, inplace=True)
            
            # map sensor type to numerical values (maybe embeddings' better?)
            df.columns = ['sensor_type', 'x', 'y', 'z']
            df['sensor_type'] = df['sensor_type'].map({'ACCELEROMETER': 0, 'GYROSCOPE': 1})
            values = df.values
            dataset.append(values)
            target.append(label_count)
        
        label_mapping[label_count] = label
        label_count += 1

# dataset is a python list of (100+, 4)-shape np.arrays
dataset[:1], target[:1], len(dataset), len(target), dataset[:1][0].shape

Loading folder gesture-down...
Loading folder gesture-up...


([array([[   0.        ,    2.376073  ,   -4.046263  ,    5.0129633 ],
         [   0.        ,    2.376073  ,   -4.046263  ,    5.0129633 ],
         [   0.        ,    3.235096  ,   -4.429115  ,    5.7044888 ],
         [   0.        ,    3.235096  ,   -4.429115  ,    5.7044888 ],
         [   0.        ,    4.2137604 ,   -4.4937205 ,    6.312266  ],
         [   0.        ,    4.2137604 ,   -4.4937205 ,    6.312266  ],
         [   0.        ,    5.3072805 ,   -4.3190446 ,    6.802795  ],
         [   0.        ,    5.3072805 ,   -4.3190446 ,    6.802795  ],
         [   0.        ,    6.396015  ,   -4.3357944 ,    7.267002  ],
         [   0.        ,    6.396015  ,   -4.3357944 ,    7.267002  ],
         [   0.        ,    7.3770723 ,   -4.5631127 ,    7.977671  ],
         [   0.        ,    7.3770723 ,   -4.5631127 ,    7.977671  ],
         [   0.        ,    8.226524  ,   -4.934     ,    9.159724  ],
         [   0.        ,    8.226524  ,   -4.934     ,    9.159724  ],
      

## Normalisation

In [35]:
# find out the number of frames in each sequence, then normalise all sequences to the minimum common length
# the length is roughly equivalent to 2 * WINDOW_SIZE (acce+gyro), but varies due to the slicing method used
len_sequences = []
for one_seq in dataset:
    len_sequences.append(len(one_seq))

pd.Series(len_sequences).describe()

count    104.0
mean     101.0
std        0.0
min      101.0
25%      101.0
50%      101.0
75%      101.0
max      101.0
dtype: float64

In [37]:
# (continued) drop the extra frames at the end
# NOTE: the code below has not been tested
min_len = pd.Series(len_sequences).min()
print(min_len, dataset[0].shape)

for one_seq in dataset:
    if len(one_seq) > min_len:
        one_seq.shape(min_len, 4)

# check if it works
# len_sequences = []
# for one_seq in dataset:
#     len_sequences.append(len(one_seq))
# pd.Series(len_sequences).describe()

101 (101, 4)


In [101]:
# calculate min, max in each column
tmp_data = np.array(dataset)
concat_data = tmp_data.reshape(-1, tmp_data.shape[-1])
col_min = concat_data.min(axis=0)
col_max = concat_data.max(axis=0)

# apply min-max normalisation
for i, row in enumerate(dataset):
    dataset[i] = 2*(row - col_min)/(col_max - col_min) -1

dataset[0]


array([[-1.        , -0.12847594,  0.17804034,  0.43735942],
       [-1.        , -0.12847594,  0.17804034,  0.43735942],
       [-1.        , -0.12352666,  0.17726948,  0.44267639],
       [-1.        , -0.12352666,  0.17726948,  0.44267639],
       [-1.        , -0.11788805,  0.17713939,  0.44734944],
       [-1.        , -0.11788805,  0.17713939,  0.44734944],
       [-1.        , -0.1115877 ,  0.1774911 ,  0.451121  ],
       [-1.        , -0.1115877 ,  0.1774911 ,  0.451121  ],
       [-1.        , -0.10531493,  0.17745738,  0.45469018],
       [-1.        , -0.10531493,  0.17745738,  0.45469018],
       [-1.        , -0.09966253,  0.17699967,  0.46015434],
       [-1.        , -0.09966253,  0.17699967,  0.46015434],
       [-1.        , -0.09476839,  0.17625289,  0.46924286],
       [-1.        , -0.09476839,  0.17625289,  0.46924286],
       [-1.        , -0.09162511,  0.17527485,  0.48440269],
       [-1.        , -0.09162511,  0.17527485,  0.48440269],
       [-1.        , -0.

In [102]:
train, test, train_target, test_target = train_test_split(dataset, target, test_size=0.2)
train = np.array(train)
train_target = np.array(train_target)
test = np.array(test)
test_target = np.array(test_target)

train.shape, train_target.shape, test.shape, test_target.shape

((83, 101, 4), (83,), (21, 101, 4), (21,))

In [103]:
model = Sequential()
model.add(LSTM(256, input_shape=(min_len, 4)))
model.add(Dense(1, activation='sigmoid'))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_3 (LSTM)                (None, 256)               267264    
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 257       
Total params: 267,521
Trainable params: 267,521
Non-trainable params: 0
_________________________________________________________________


In [None]:
adam = Adam(lr=0.0001)
chk = ModelCheckpoint('best_model.pkl', monitor='val_acc', save_best_only=True, mode='max', verbose=1)
model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy'])
model.fit(train, train_target, epochs=10, batch_size=8, callbacks=[chk], validation_split=0.1)

Train on 74 samples, validate on 9 samples
Epoch 1/10

Epoch 00001: val_acc improved from -inf to 0.77778, saving model to best_model.pkl
Epoch 2/10
16/74 [=====>........................] - ETA: 0s - loss: 0.5073 - acc: 0.9375    