In [None]:
import os
import numpy as np
import pandas as pd
import json
import datetime

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers, optimizers
from keras.models import Sequential
from keras.layers import Activation, Dense, Flatten, Dropout, LSTM, Softmax, Bidirectional, LayerNormalization, BatchNormalization, Conv1D, MaxPooling1D, Input
from keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

import matplotlib.pyplot as plt
import seaborn as sns
import tqdm

os.sys.path.append('/home/ubuntu/FYP-ROS/src/utils/utils')
from data_loading import load_data

In [None]:
print(f"{tf.__version__=}")
print(f"{np.__version__=}")
print("nvidia-smi")
!nvidia-smi
print("nvcc version")
!nvcc --version
print("nvinfer version")
!dpkg -l | grep nvinfer
print("TensorRT version")
!dpkg -l | grep TensorRT

# Load & Prepare Data

In [None]:
GESTURES = ["STATIC", "SLIDE_UP", "SLIDE_DOWN", "SLIDE_LEFT", "SLIDE_RIGHT", "RELEASE", "GRASP", "HIGHLIGHT", "ON_YES", "OFF_NO", "NONE", "POINTING"]
GESTURES_KEPT = ["STATIC", "SLIDE_UP", "SLIDE_DOWN", "RELEASE", "GRASP", "POINTING"]
GESTURES_DISCARDED = [g for g in GESTURES if g not in GESTURES_KEPT]
NUM_CLASSES = len(GESTURES)

FEATURES = ["timestamp", 
        "Imu0_linear_acceleration_x", "Imu0_linear_acceleration_y", "Imu0_linear_acceleration_z",
        "Imu0_angular_velocity_x", "Imu0_angular_velocity_y", "Imu0_angular_velocity_z",
        "Imu0_orientation_x", "Imu0_orientation_y", "Imu0_orientation_z", "Imu0_orientation_w",
        "Imu1_linear_acceleration_x", "Imu1_linear_acceleration_y", "Imu1_linear_acceleration_z",
        "Imu1_angular_velocity_x", "Imu1_angular_velocity_y", "Imu1_angular_velocity_z",
        "Imu1_orientation_x", "Imu1_orientation_y", "Imu1_orientation_z", "Imu1_orientation_w",
        "Imu2_linear_acceleration_x", "Imu2_linear_acceleration_y", "Imu2_linear_acceleration_z",
        "Imu2_angular_velocity_x", "Imu2_angular_velocity_y", "Imu2_angular_velocity_z",
        "Imu2_orientation_x", "Imu2_orientation_y", "Imu2_orientation_z", "Imu2_orientation_w",
        "imu0_to_imu1_rotation_x", "imu0_to_imu1_rotation_y", "imu0_to_imu1_rotation_z", "imu0_to_imu1_rotation_w",
        "imu0_to_imu2_rotation_x", "imu0_to_imu2_rotation_y", "imu0_to_imu2_rotation_z", "imu0_to_imu2_rotation_w",
        "imu0_to_imu1_translation_x", "imu0_to_imu1_translation_y", "imu0_to_imu1_translation_z",
        "imu0_to_imu2_translation_x", "imu0_to_imu2_translation_y", "imu0_to_imu2_translation_z",]
FEATURES_KEPT = ["Imu0_linear_acceleration_x", "Imu0_linear_acceleration_y", "Imu0_linear_acceleration_z",
                 "Imu1_linear_acceleration_x", "Imu1_linear_acceleration_y", "Imu1_linear_acceleration_z",
                 "Imu2_linear_acceleration_x", "Imu2_linear_acceleration_y", "Imu2_linear_acceleration_z"]
FEATURES_DROPPED = [f for f in FEATURES if f not in FEATURES_KEPT]
FEATURES_DIM = len(FEATURES_KEPT)

print(f"{NUM_CLASSES=}")
print(f"{FEATURES_DIM=}")

In [None]:
DATA_BUF_LEN = 50
def get_data_by_id(data_id, augment):
    data_root = "/home/ubuntu/FYP-ROS/rosbag/data"

    x_raw = pd.read_csv(f"{data_root}/data_clean_augment/{data_id}_data{augment}.csv")
    x_raw = x_raw.drop(FEATURES_DROPPED, axis=1)
    if 'timestamp' in x_raw.columns:
        x_raw['timestamp'] = x_raw['timestamp'] - x_raw['timestamp'].iloc[0]
    
    x_raw = x_raw.to_numpy()
    
    if x_raw.shape[0] < DATA_BUF_LEN:
        last_row = np.repeat([x_raw[-1]], repeats=DATA_BUF_LEN-x_raw.shape[0], axis=0)
        x_raw = np.vstack([x_raw, last_row])
    else:
        x_raw = x_raw[:DATA_BUF_LEN]

    y_label = pd.read_csv(f"{data_root}/label_clean_augment/{data_id}_label{augment}.csv")['label']
    y_label = tf.keras.utils.to_categorical(y_label, num_classes=NUM_CLASSES)[0]
    return x_raw, y_label

df = pd.DataFrame()
metadata, cnt = load_data(ignore_classes=GESTURES_DISCARDED, ignore_dates=["Mar 23"])

for gesture_type in tqdm.tqdm(metadata):
    for data_id in metadata[gesture_type]:
        x_raw, y_label = get_data_by_id(data_id, augment="")
        df = pd.concat([df, pd.DataFrame({"data_id": data_id, "data": [x_raw], "label": [y_label]})], ignore_index=False)
        
        x_jitter, y_label = get_data_by_id(data_id, augment="_jitter")
        df = pd.concat([df, pd.DataFrame({"data_id": data_id, "data": [x_jitter], "label": [y_label]})], ignore_index=False)

        x_time_warp, y_label = get_data_by_id(data_id, augment="_time_warp")
        df = pd.concat([df, pd.DataFrame({"data_id": data_id, "data": [x_time_warp], "label": [y_label]})], ignore_index=False)

        x_time_warp2, y_label = get_data_by_id(data_id, augment="_time_warp2")
        df = pd.concat([df, pd.DataFrame({"data_id": data_id, "data": [x_time_warp2], "label": [y_label]})], ignore_index=False)

In [None]:
X_train, X_valid, y_train, y_valid = train_test_split(np.array(df["data"].to_list()), np.array(df["label"].to_list()), test_size=0.1)

print(f"{X_train.shape=}")
print(f"{X_valid.shape=}")
print(f"{y_train.shape=}")
print(f"{y_valid.shape=}")

In [None]:
# plot
y_train_sum = np.sum(y_train, axis=0)
y_train_sum = y_train_sum[y_train_sum != 0]
plt.figure(figsize=(13, 5))
plt.bar(GESTURES_KEPT, y_train_sum)
plt.title("Number of samples per gesture", fontdict={'fontsize': 14})
plt.xlabel("Gesture", fontdict={'fontsize': 12})
plt.xticks(fontsize=12)
plt.ylabel("Number of samples", fontdict={'fontsize': 12})
for i, v in enumerate(y_train_sum):
    plt.text(i, v+1, str(v), ha='center', fontsize=12)

y_valid_sum = np.sum(y_valid, axis=0)
y_valid_sum = y_valid_sum[y_valid_sum != 0]
plt.bar(GESTURES_KEPT, y_valid_sum)
for i, v in enumerate(y_valid_sum):
    plt.text(i, v+1, str(v), ha='center', fontsize=12)

plt.legend(["train", "valid"],prop={'size': 12})
plt.show()

# Build Model

In [None]:
model = Sequential()

kernal_reg = 1e-2
bias_reg = 1e-2

model.add(Input(shape=(DATA_BUF_LEN, FEATURES_DIM)))
model.add(LSTM(units=64, kernel_regularizer=regularizers.L2(kernal_reg), bias_regularizer=regularizers.L2(bias_reg), return_sequences=True, name='LSTM1'))
model.add(Dropout(0.5, name='Dropout1'))
model.add(LayerNormalization())
model.add(Flatten())
model.add(Dense(64, kernel_regularizer=regularizers.L2(kernal_reg), bias_regularizer=regularizers.L2(bias_reg), activation='relu', name='Dense1'))
model.add(Dropout(0.5, name='Dropout2'))
model.add(LayerNormalization())
model.add(Dense(NUM_CLASSES, activation='softmax', kernel_regularizer=regularizers.L2(kernal_reg), bias_regularizer=regularizers.L2(bias_reg), name='Dense2'))

model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.Adam(learning_rate=0.0005),
              metrics=['accuracy'])
print(model.summary())

In [None]:
# model.load_weights("/home/ubuntu/FYP-ROS/weights/model_lstm_weights-2023_2_24-14_51-acc0.96.h5")
# model = keras.models.load_model("/home/ubuntu/FYP-ROS/weights/model_lstm-2023_3_31-15_34-acc0.99.h5")

# Training

In [None]:
ACCURACY_THRESHOLD = 0.99
class accuryThreasholdCallback(tf.keras.callbacks.Callback): 
    def on_epoch_end(self, epoch, logs={}): 
        if(logs.get('accuracy') > ACCURACY_THRESHOLD and logs.get('val_accuracy') > ACCURACY_THRESHOLD):   
            print("\nReached %2.2f%% accuracy, so stopping training!!" %(ACCURACY_THRESHOLD*100))   
            self.model.stop_training = True

accuracy_threashold_monitor = accuryThreasholdCallback()
early_stopping_monitor = EarlyStopping(patience=3)

history = model.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=30, batch_size=32, callbacks=[accuracy_threashold_monitor])

In [None]:
print(history.history.keys())

fig, ax = plt.subplots(1, 2, figsize=(12, 4))
ax[0].plot(history.history['accuracy'])
ax[0].plot(history.history['val_accuracy'])
ax[0].set_ylabel('accuracy')
ax[0].set_xlabel('epoch')

ax[1].plot(history.history['loss'])
ax[1].plot(history.history['val_loss'])
ax[1].set_ylabel('loss')
ax[1].set_xlabel('epoch')
fig.legend(['train', 'val'], loc='upper right')
fig.suptitle('Model training history')
plt.show()

# Evaluate

In [None]:
scores = model.evaluate(X_valid, y_valid, verbose=0)
print(f"{scores=}")

In [None]:
y_pred = model.predict(X_valid)

# print(f"{np.argmax(y_pred, axis=1)=}")
# print(f"{np.argmax(y_valid, axis=1)=}")

error_data_index = []
for i, (yp, yt) in enumerate(zip(np.argmax(y_pred, axis=1), np.argmax(y_valid, axis=1))):
    if yp != yt:
        print(f"index: {i}, truth: {GESTURES[yt]}, predicted: {GESTURES[yp]}")
        error_data_index.append(i)

In [None]:
y_valid_not_onehot = np.argmax(y_valid, axis=1)
y_pred_not_onehot = np.argmax(y_pred, axis=1)
plt.figure(figsize=(10, 10))
matrix_confusion = confusion_matrix(y_pred_not_onehot, y_valid_not_onehot)
sns.heatmap(matrix_confusion, square=True, annot=False, cmap='Blues', fmt='d')

for i in range(matrix_confusion.shape[0]):
    for j in range(matrix_confusion.shape[1]):
        plt.text(j+0.5, i+0.5, f'{matrix_confusion[i, j]}/{np.sum(matrix_confusion[i, :])}', 
                 horizontalalignment='center', verticalalignment='center', fontsize=12)

plt.xlabel('predictions')
plt.ylabel('ground truth')
plt.xticks(np.arange(0.5, matrix_confusion.shape[0]+0.5), GESTURES_KEPT, rotation=45, fontsize=12)
plt.yticks(np.arange(0.5, matrix_confusion.shape[0]+0.5), GESTURES_KEPT, rotation=45, fontsize=12)
plt.show()

In [None]:
print(f"total error data size: {len(error_data_index)}")
if(len(error_data_index)>0):
    index = np.random.choice(error_data_index)

    true_label = np.argmax(y_valid[index])
    predicted_class = np.argmax(y_pred[index])

    print(f"{GESTURES[true_label]=}")
    print(f"{GESTURES[predicted_class]=}")

    fig, axs = plt.subplots(3, 3, figsize=(15, 10))

    raw_data = X_valid[index]
    acc_axes = axs[:, :3].ravel()
    acc_data = raw_data
    acc_titles = [f'Imu{i}_acc_{xyz}' for i in range(3) for xyz in ['x', 'y', 'z']]

    for ax, data, title in zip(acc_axes, acc_data.T, acc_titles):
        ax.plot(data)
        ax.set_title(title)
        ax.set_ylim([-1.5 * 9.8, 1.5 * 9.8])

    fig.suptitle(f'labeled: {GESTURES[true_label]}, predicted: {GESTURES[predicted_class]}')
    plt.plot()

# Explaination 

## Feature importance

In [None]:
feature_importance = np.zeros(FEATURES_DIM+1)
for feature in range(FEATURES_DIM):
    X_valid_f = X_valid.copy()
    X_valid_f[:, :, feature] = 0
    feature_importance[feature] = model.evaluate(X_valid_f, y_valid, verbose=0)[0]
feature_importance[-1] = model.evaluate(X_valid, y_valid, verbose=0)[0]

feature_importance = feature_importance / feature_importance[-1]
print(f"{feature_importance=}")

In [None]:
titles = FEATURES_KEPT + ['Original']
plt.figure(figsize=(10, 5))
plt.bar(titles, feature_importance)
plt.xticks(rotation=90)
plt.title('Feature Importance of different discarded features')
plt.ylabel('Normalized Importance Ratio')
plt.xlabel('Discarded Feature')
for i in range(FEATURES_DIM+1):
    plt.text(i, feature_importance[i], f'{feature_importance[i]:.2f}', horizontalalignment='center', verticalalignment='bottom', rotation=90, fontsize=12)
plt.show()

## Timing Importance

In [None]:
block_len = 10

index = np.random.choice(len(df))
data_id = df['data_id'].iloc[index]
data = df['data'].iloc[index]
label = df['label'].iloc[index]

fig, axs = plt.subplots(3, 3, figsize=(15, 10), sharex=True, sharey=True,)
acc_axes = axs[:, :3].ravel()
acc_data = data.copy()
acc_titles = [f'Imu{i}_acc_{xyz}' for i in range(3) for xyz in ['x', 'y', 'z']]

crossentropy_losses = np.zeros(DATA_BUF_LEN)
for i in range(0, DATA_BUF_LEN):
    mask_data = data.copy()
    start_index = max(0, i-block_len//2)
    end_index = min(DATA_BUF_LEN, i+block_len//2)
    mask_data[start_index:end_index, :] = mask_data[max(0, start_index-1),:]
    pred = model.predict(mask_data.reshape(1, DATA_BUF_LEN, FEATURES_DIM), verbose=0)[0]
    crossentropy_losses[i] = -np.sum(label * np.log(pred))

# label the highest loss
max_loss_index = np.argmax(crossentropy_losses)
start_index = max(0, max_loss_index-block_len//2)
end_index = min(DATA_BUF_LEN, max_loss_index+block_len//2)

# plot the data
lines = []
for ax, a_data, title in zip(acc_axes, acc_data.T, acc_titles):
    lines += ax.plot(a_data, label='acceleration')
    ax.set_title(title)
    ax.set_ylim([-1.5 * 9.8, 1.5 * 9.8])
    ax.axvspan(start_index, end_index, alpha=0.5, color='red')

    ax2 = ax.twinx()
    lines += ax2.plot(crossentropy_losses, color='green', linewidth=0.7, linestyle='--', label='crossentropy loss')

# get the unique colors in the plot
colors_ = pd.unique([l.get_color() for l in lines])
linewidths = pd.unique([l.get_linewidth() for l in lines])
linestyles = pd.unique([l.get_linestyle() for l in lines])
handles = [plt.Line2D([], [], color=c, linewidth=lw, linestyle=ls) for c, lw, ls in zip(colors_, linewidths, linestyles)]
labels = [l.get_label() for l in lines]
fig.legend(handles, labels)
fig.suptitle(f'data_id: {data_id}\nlabeled: {GESTURES[np.argmax(label)]}')
plt.plot()

# Save model

In [None]:
t = datetime.datetime.now()
t_str = f"{t.year}_{t.month}_{t.day}-{t.hour}_{t.minute}"
acc_str = f"{scores[1]:.2f}" 
# model.save_weights(f'/home/ubuntu/FYP-ROS/weights/model_lstm_weights-{t_str}-acc{acc_str}.h5')
# model.save(f'/home/ubuntu/FYP-ROS/weights/model_lstm-{t_str}-acc{acc_str}.h5')