# Imports and initialization

In [1]:
import pandas as pd
import numpy as np
import os
import math
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization, Bidirectional
from sklearn.utils import shuffle
from sklearn.preprocessing import MinMaxScaler, StandardScaler
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

Choose one of the two below cells depending on the environment.

In [2]:
# dir_path = './' #uncomment if in local environment

In [3]:
#uncomment in google colab environment
from google.colab import drive
drive.mount('/content/drive') #path to root directory in Drive
dir_path = './drive/MyDrive/BAKA/' #colab
df = pd.read_csv(dir_path + 'DataCollection' + '/' + '1' + '/' + '0.txt', header=None, sep=' ')
if len(df.columns) > 31:
  df = df.drop(columns=[31])

Mounted at /content/drive


In [4]:
files = os.listdir(dir_path + 'DataCollection')
files

['4', '7', '8', '0', '5', '2', '3', '6', '1']

# Dataset Split
Split samples into Test sets and Training sets <br>
Use 20% of available samples for Testing and the remaining for Training

In [5]:
x_train = [];
y_train = [];

x_test = [];
y_test = [];
for i in files:
    samples = os.listdir(dir_path + 'DataCollection' + '/' + i)
    # num_tests = int(len(samples)/5);
    num_tests = len(samples);
    shuffle(samples, random_state = 0)
    # for k in range(0, num_tests):
    #     df = pd.read_csv(dir_path + 'DataCollection' + '/' + i + '/' + samples[k], header=None, sep=' ')
    #     df = df.drop(df.index[60:])
    #     if len(df.columns) > 31:
    #       df = df.drop(columns=[31])
    #     if df.isnull().values.any():
    #       print(i, " ", samples[k])
    #     x_test.append(df.to_numpy())
    #     y_test.append(int(i));
    
    for k in range(0, num_tests):
        df = pd.read_csv(dir_path + 'DataCollection' + '/' + i + '/' + samples[k], header=None, sep=' ')
        df = df.drop(df.index[60:])
        if len(df.columns) > 31:
          df = df.drop(columns=[31])
        if df.isnull().values.any():
          print(i, " ", samples[k])
        x_train.append(df.to_numpy())
        y_train.append(int(i));
    
    print(len(samples), ' ', num_tests, ' ', len(samples)- num_tests)
x_train = np.array(x_train)
y_train = np.array(y_train);

# x_test = np.array(x_test)
# y_test = np.array(y_test)
print("x_train.shape: ", x_train.shape)
print("y_train.shiape: ", y_train.shape)
# print("x_test.shape: ", x_test.shape)
# print("y_test.shape: ", y_test.shape)


204   204   0
264   264   0
233   233   0
253   253   0
254   254   0
320   320   0
285   285   0
266   266   0
319   319   0
x_train.shape:  (2398, 60, 31)
y_train.shiape:  (2398,)


In [6]:
print("x_train.shape: ", x_train.shape)
print("y_train.shiape: ", y_train.shape)
# print("x_test.shape: ", x_test.shape)
# print("y_test.shape: ", y_test.shape)



x_train.shape:  (2398, 60, 31)
y_train.shiape:  (2398,)


### Scaling and feature labels
Apply min-max scaling technique and relable features corresponding to the description in bachelor thesis's text

In [7]:
def scale_data(data, min_max_scaler):
    for i in range(len(data)):
        data[i] = min_max_scaler.transform(data[i])
    return data

Form preprocessed datasets to corresponding shapes as well as shuffle samples

In [8]:
min_max_scaler = MinMaxScaler(feature_range=(0,1))

num_instances, num_time_steps, num_features = x_train.shape
x_train = np.reshape(x_train, newshape=(-1, num_features))
x_train = min_max_scaler.fit_transform(x_train)
x_train = np.reshape(x_train, newshape=(num_instances, num_time_steps, num_features))

x_train, y_train = shuffle(x_train, y_train, random_state=0)

# num_instances, num_time_steps, num_features = x_test.shape
# x_test = np.reshape(x_test, newshape=(-1, num_features))
# x_test = min_max_scaler.transform(x_test)
# x_test = np.reshape(x_test, newshape=(num_instances, num_time_steps, num_features))

# x_test, y_test = shuffle(x_test, y_test, random_state=0)


In [9]:
print("x_train.shape: ", x_train.shape)
print("y_train.shiape: ", y_train.shape)
# print("x_test.shape: ", x_test.shape)
# print("y_test.shape: ", y_test.shape)

x_train.shape:  (2398, 60, 31)
y_train.shiape:  (2398,)


# Model definition

Define Two-layered bidirectional LSTM<br>
Each layer consists of `Bidirectional(LSTM)` cell with addition of `Dropout()` and `BatchNormalization()` to minimize overfitting and decrease learning time

In [10]:
model = Sequential()
model.add(Bidirectional(LSTM(units=60, return_sequences=True ,dtype='float64'),input_shape=x_train.shape[1:],dtype='float64'))
model.add(BatchNormalization())
model.add(Dropout(0.6))

model.add(Bidirectional(LSTM(units=60 ,dtype='float64') ,dtype='float64'))
model.add(BatchNormalization())
model.add(Dropout(0.6))

model.add(Dense(len(files), activation='softmax',dtype='float64'))
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional (Bidirectional (None, 60, 120)           44160     
_________________________________________________________________
batch_normalization (BatchNo (None, 60, 120)           480       
_________________________________________________________________
dropout (Dropout)            (None, 60, 120)           0         
_________________________________________________________________
bidirectional_1 (Bidirection (None, 120)               86880     
_________________________________________________________________
batch_normalization_1 (Batch (None, 120)               480       
_________________________________________________________________
dropout_1 (Dropout)          (None, 120)               0         
_________________________________________________________________
dense (Dense)                (None, 9)                 1

# Training

Model training using 200 epochs <br>
Marking checkpoints(models with best `val_accuracy`) to `./Checkpoints` directory

In [11]:
opt = tf.keras.optimizers.Adam(lr=0.0001, decay=1e-5)

checkpoint_filepath = dir_path + 'Checkpoints/'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)

model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=opt,
    metrics=['accuracy'],
)


gestures = model.fit(x = x_train,
            y = y_train,
            epochs=200,
            # validation_split=0.1, #split 10% of the trainning set for the validation set,
            batch_size=24,
            # callbacks=[model_checkpoint_callback],
            shuffle=True
         )

Epoch 1/200


  "The `lr` argument is deprecated, use `learning_rate` instead.")


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 7

In [16]:
# print("Evaluate on test data")
# results = model.evaluate(x_test, y_test, batch_size=24)
# print("test loss, test acc:", results)
model.save(dir_path + 'Models/gestures_bidir', save_format='tf')



INFO:tensorflow:Assets written to: ./drive/MyDrive/BAKA/Models/gestures_bidir/assets


INFO:tensorflow:Assets written to: ./drive/MyDrive/BAKA/Models/gestures_bidir/assets


In [13]:
min_max_scaler.data_min_

array([   0.    ,    0.    ,    0.    ,    0.    ,    0.    ,    0.    ,
          0.    ,    0.    ,    0.    ,    0.    , -591.101 ,  -12.7777,
       -190.142 , -626.233 ,    0.    , -238.535 , -648.629 ,    0.    ,
       -251.495 , -643.156 ,    0.    , -243.005 , -675.481 ,    0.    ,
       -220.466 , -607.075 ,    0.    , -158.459 ,    0.    ,    0.    ,
          0.    ])

In [14]:
min_max_scaler.data_max_

array([179.999, 180.   , 180.   , 179.999, 179.997, 180.   , 179.968,
       180.   , 179.985, 179.96 , 539.053, 468.363, 720.07 , 581.654,
       522.811, 709.384, 585.01 , 508.522, 705.543, 567.779, 478.013,
       702.861, 544.45 , 478.722, 693.173, 499.893, 466.308, 707.228,
       107.126, 161.082, 127.977])

In [15]:
model.input

<KerasTensor: shape=(None, 60, 31) dtype=float64 (created by layer 'bidirectional_input')>