In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import json
import cv2
import sklearn
from tensorflow.keras import regularizers
from tensorflow.keras import layers
import os

In [2]:
GESTURE_TYPES = 11
LABEL_DICT = {k:i for i,k in enumerate([21, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33])}
CONNECTION_LABELS = [
    (0, 1), (1, 2), (2, 3), (3, 4),
    (5, 6), (6, 7), (7, 8),
    (9, 10), (10, 11), (11, 12),
    (13, 14), (14, 15), (15, 16),
    (17, 18), (18, 19), (19, 20),
    (0, 5), (5, 9), (9, 13), (13, 17), (0, 17)
]

In [3]:
def load_keypoints(data_path='gesture_recognition/Fall 2020/data_30'):
    keypoints = []
    labels = []
    for subjectName in os.listdir(data_path):
        if not (subjectName.startswith("Subject") or subjectName.startswith("subject")): continue
        # subjectNum = int(re.findall(r'(\d+)', subjectName)[0])
        for sceneName in os.listdir(os.path.join(data_path, subjectName)):
            if not (sceneName.startswith("Scene") or subjectName.startswith("scene")): continue
            for groupEntry in os.scandir(os.path.join(data_path, subjectName, sceneName)):
                with open(groupEntry, 'r') as f:
                    groupData = json.load(f)
                    for gesture in groupData:
                        for i in range(len(gesture['keypoints'])):
                            if not gesture['keypoints'][i]:
                                continue
                            keypoints.append(gesture['keypoints'][i])
                            labels.append(LABEL_DICT[gesture['label']])
    keypoints = np.array(keypoints)
    labels = np.array(labels)
    return keypoints, labels
keypoints, labels = load_keypoints()
print(keypoints.shape, labels.shape)

(98231, 21, 3) (98231,)


In [4]:
def generate_connection_angles(keypoints, keypoints_num=21, keypoints_dimensions=3):
    connections = []
    for connection in CONNECTION_LABELS:
        connections.append(keypoints[..., connection[1], :] - keypoints[..., connection[0], :])
    connections = np.stack(connections, axis = -2)
    tensor1 = connections[..., np.newaxis].repeat(keypoints_num, -1).transpose(0,1,3,2)
    tensor2 = connections[..., np.newaxis].repeat(keypoints_num, -1).transpose(0,3,1,2)
    angles = (tensor1*tensor2).sum(axis=-1)/np.linalg.norm(tensor1,axis=-1)/np.linalg.norm(tensor2,axis=-1)
    angles = angles.transpose(1,2,0)[np.triu_indices(21, k = 1)].transpose(1,0)
    return np.arccos(angles)
angles = generate_connection_angles(keypoints)
print(angles.shape)

(98231, 210)


In [5]:
def visualize_keypoints(keypoints):
    for points in keypoints:
        img = np.zeros((480, 640, 3))
        for point in points:
            x, y, z = point
            if not x:
                continue
            cv2.circle(img, (int(x), int(y)), 4, (255, 0, 0), 2)
        for connection in CONNECTION_LABELS:
            if not points[connection[0]][0]:
                continue
            x0, y0, z0 = points[connection[0]]
            x1, y1, z1 = points[connection[1]]
            cv2.line(img, (int(x0), int(y0)), (int(x1), int(y1)), (0, 255, 0), 2)
        cv2.imshow("Key Points", img)
        key = cv2.waitKey(1)
        if key == 27:
            cv2.destroyAllWindows()
            cv2.waitKey(1) # cannot close window on macOS without this line
            return
# visualize_keypoints(keypoints)

In [6]:
def process_features(keypoints, angles):
    data_length = keypoints.shape[0]
    keypoints = keypoints.reshape(data_length, -1)
    angles = angles.reshape(data_length, -1)
    features = np.concatenate((keypoints, angles), -1)
    df = pd.DataFrame(features)
    df = (df-df.mean())/df.std()
    df = df.fillna(0)
    features = df.to_numpy().reshape(data_length, -1)
    return features
X = process_features(keypoints, angles)
#normalizer = tf.keras.layers.experimental.preprocessing.Normalization()
#normalizer.adapt(X)
# X_train, X_val, y_train, y_val = train_test_split(processed_keypoints, labels, test_size=0.2, random_state=0)
print(X.shape)

(98231, 273)


In [7]:
from sklearn.model_selection import train_test_split
model_mlp = tf.keras.Sequential([
    tf.keras.layers.Dense(GESTURE_TYPES, activation=None), 
    tf.keras.layers.Activation('softmax')])
model_mlp.compile(loss=tf.losses.SparseCategoricalCrossentropy(), optimizer=tf.optimizers.Adam(), metrics=['accuracy'])
history = model_mlp.fit(X, labels, epochs=50, validation_split=0.2)

Train on 78584 samples, validate on 19647 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [8]:
from sklearn.model_selection import train_test_split
model_mlp = tf.keras.Sequential([
    tf.keras.layers.Dense(GESTURE_TYPES, activation=None), 
    tf.keras.layers.Dense(256, activation=None), 
    tf.keras.layers.Activation('softmax')])
model_mlp.compile(loss=tf.losses.SparseCategoricalCrossentropy(), optimizer=tf.optimizers.Adam(), metrics=['accuracy'])
history = model_mlp.fit(X, labels, epochs=30, validation_split=0.2)

Train on 78584 samples, validate on 19647 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
