In [2]:
import tensorflow as tf
import numpy as np
import pickle


# Import squat example input

In [118]:
filename = "/home/garbade/libs/aisc/action-recognition/demos/skeletons_as_numpy.npy"
filename_neg = "/home/garbade/libs/aisc/action-recognition/demos/skeletons_as_numpy_negative.npy"

In [121]:
data = np.load(filename)
data_neg = np.load(filename_neg)

In [122]:
print(data.shape)
print(data_neg.shape)

(1030, 17, 3)
(33874, 17, 3)


## Add neck joint

In [126]:
native_keypoint_names: [
            "Nose",
            "RightShoulder",
            "RightElbow",
            "RightWrist",
            "LeftShoulder",
            "LeftElbow",
            "LeftWrist",
            "RightHip",
            "RightKnee",
            "RightAnkle",
            "LeftHip",
            "LeftKnee",
            "LeftAnkle",
            "RightEye",
            "LeftEye",
            "RightEar",
            "LeftEar"]

neck keypoint is missing -> should come second after "Nose"

In [135]:
def add_neck_to_keypoints(skeletons):
    new_skeletons = []
    for skeleton_frame in skeletons:
        nose = skeleton_frame[:1,:]
        neck = (skeleton_frame[1,:] + skeleton_frame[4,:]) / 2.0
        rest = skeleton_frame[1:,:]
        pose_concat = np.concatenate((nose, np.expand_dims(neck, 0), rest), axis=0)
        new_skeletons.append(pose_concat)
    return np.array(new_skeletons)


In [35]:
nose.shape

(1, 2)

In [36]:
neck.shape

(2,)

In [37]:
rest.shape

(16, 2)

In [136]:
skeletons_corr = add_neck_to_keypoints(data)
skeletons_corr_neg = add_neck_to_keypoints(data_neg)

In [137]:
print(skeletons_corr.shape)
print(skeletons_corr_neg.shape)

(1030, 18, 3)
(33874, 18, 3)


# Create single NN input

In [141]:
single_nn_input = skeletons_corr[:79,:,:2]

In [142]:
print(single_nn_input.shape)

(79, 18, 2)


# Load tflite AR model

In [26]:
model_file = "/media/data_ssd/libs/mediapipe_v0.8.9/mediapipe/models/model_ar_v18s_01_mediapipe_tflite.tflite"
interpreter = tf.lite.Interpreter(model_path=model_file)
interpreter.allocate_tensors()

# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

In [28]:
print(input_details)
print(output_details)

[{'name': 'input_2', 'index': 0, 'shape': array([ 1, 79, 18,  2], dtype=int32), 'shape_signature': array([ 1, 79, 18,  2], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]
[{'name': 'Identity', 'index': 192, 'shape': array([ 1, 23], dtype=int32), 'shape_signature': array([ 1, 23], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]


# Run inference

In [55]:
np.save("skeletons_with_neck_squat_79x18x2.npy", skeletons_corr)

In [43]:
input_data = single_nn_input

input_tensor = np.expand_dims(input_data.astype(np.float32), axis=0)
interpreter.set_tensor(input_details[0]['index'], input_tensor)
interpreter.invoke()
output_data = interpreter.get_tensor(output_details[0]['index'])

output_data = np.squeeze(output_data)




In [44]:
output_data

array([-1.3521575e+01, -2.4610739e+01, -2.1907761e+01, -1.8666994e+01,
       -1.3113013e-06, -2.0498335e+01, -2.2552057e+01, -2.0804609e+01,
       -2.5740786e+01, -2.5391951e+01, -2.0525833e+01, -2.2698870e+01,
       -2.4558897e+01, -2.0615149e+01, -2.3449255e+01, -1.8619581e+01,
       -1.9773645e+01, -2.2957602e+01, -1.8723085e+01, -1.7083897e+01,
       -2.3289736e+01, -2.1011726e+01, -2.7642921e+01], dtype=float32)

In [46]:
output_data.shape

(23,)

In [47]:
output_data.argmax(axis=0)

4

# Get action class corresponding to prediction

In [49]:
action_classes = [
        "background",
        "box",
        "clap",
        "jump",
        "squat",
        "wave",
        "rotate",
        "climb",
        "run_on_spot",
        "rope_jump",
        "fly-like-bird",
        "jumping-jack",
        "holahoop",
        "look-out",
        "eat-something",
        "jump-like-frog",
        "bang-with-hammer",
        "boxes-from-left-to-right",
        "cook",
        "horse-petting",
        "throw-stone",
        "pull-rope",
        "lift-bucket-from-floor"
    ]

In [51]:
action_id = output_data.argmax(axis=0)

print("predicted class: " + action_classes[action_id])

predicted class: squat


In [52]:
print("score: " + str(output_data[action_id]))

score: -1.3113013e-06


In [53]:
def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0) # only difference

In [54]:
softmax(output_data)

array([1.34169704e-06, 2.04970936e-11, 3.05900888e-10, 7.81677745e-09,
       9.99998689e-01, 1.25223698e-09, 1.60607652e-10, 9.21877519e-10,
       6.62093167e-12, 9.38461028e-12, 1.21827193e-09, 1.38677625e-10,
       2.15877212e-11, 1.11417908e-09, 6.54814397e-11, 8.19631918e-09,
       2.58472999e-09, 1.07063296e-10, 7.39039363e-09, 3.80678387e-08,
       7.68062350e-11, 7.49416751e-10, 9.88170808e-13], dtype=float32)

In [58]:
print("softmax score: " + str(softmax(output_data)[action_id]))

softmax score: 0.9999987


# Write example input to C++ readable file

In [60]:
import cv2 as cv

In [73]:
input_data_2D = input_data.reshape((79,-1))

In [69]:
input_data[0]

array([[103.84735 , 151.06212 ],
       [ 97.68504 , 168.07446 ],
       [ 81.665085, 168.16046 ],
       [ 68.523674, 188.66692 ],
       [ 74.398506, 201.2924  ],
       [113.704994, 167.98846 ],
       [123.47378 , 186.26178 ],
       [133.21793 , 200.10634 ],
       [ 84.70395 , 220.2473  ],
       [ 86.980034, 255.46948 ],
       [ 85.59968 , 294.20245 ],
       [105.12618 , 221.32732 ],
       [106.3571  , 257.34686 ],
       [104.31472 , 292.23346 ],
       [100.299774, 147.1401  ],
       [105.78834 , 147.37265 ],
       [ 94.929276, 148.57346 ],
       [105.97988 , 148.91405 ]], dtype=float32)

In [75]:
input_data_2D[0]

array([103.84735 , 151.06212 ,  97.68504 , 168.07446 ,  81.665085,
       168.16046 ,  68.523674, 188.66692 ,  74.398506, 201.2924  ,
       113.704994, 167.98846 , 123.47378 , 186.26178 , 133.21793 ,
       200.10634 ,  84.70395 , 220.2473  ,  86.980034, 255.46948 ,
        85.59968 , 294.20245 , 105.12618 , 221.32732 , 106.3571  ,
       257.34686 , 104.31472 , 292.23346 , 100.299774, 147.1401  ,
       105.78834 , 147.37265 ,  94.929276, 148.57346 , 105.97988 ,
       148.91405 ], dtype=float32)

In [97]:
np.savetxt("skeletons_with_neck_squat_79x36.mat", input_data_2D, delimiter=" ")

In [88]:
nrows, ncols, nchannels = input_data.shape

In [91]:
for row in range(nrows):
    for channel in range(nchannels):
        for col in range(ncols):
            input_data[row, col, channel]


In [102]:
def write_matrix3D_to_ascii(filename, matrix3D):
    
    nrows, ncols, nchannels = input_data.shape
    
    with open(filename, "w") as file:
        
        # write header [rows x cols x channels]
        nrows, ncols, nchannels = matrix3D.shape
        file.write(f"{nrows} {ncols} {nchannels}")
        file.write("\n")
        
        # write values 
        for row in range(nrows):
            for channel in range(nchannels):
                for col in range(ncols):
                    value = matrix3D[row, col, channel]
                    
                    file.write(str(value))
                    file.write(" ")
            file.write("\n")


In [110]:
def write_matrix2D_to_ascii(filename, matrix2D):
    
    nrows, ncols = matrix2D.shape
    
    with open(filename, "w") as file:
        
        # write header [rows x cols]
        nrows, ncols = matrix2D.shape
        file.write(f"{nrows} {ncols}")
        file.write("\n")
        
        # write values 
        for row in range(nrows):
            for col in range(ncols):
                value = matrix2D[row, col]

                file.write(str(value))
                file.write(" ")
            file.write("\n")


In [94]:
for channel in range(nchannels):
    for col in range(ncols):
        print(input_data[0, col, channel])

103.84735
97.68504
81.665085
68.523674
74.398506
113.704994
123.47378
133.21793
84.70395
86.980034
85.59968
105.12618
106.3571
104.31472
100.299774
105.78834
94.929276
105.97988
151.06212
168.07446
168.16046
188.66692
201.2924
167.98846
186.26178
200.10634
220.2473
255.46948
294.20245
221.32732
257.34686
292.23346
147.1401
147.37265
148.57346
148.91405


In [90]:
print(row)
print(channel)
print(col)

78
1
17


In [79]:
row.shape

(18, 2)

In [81]:
kpt.shape

(2,)

In [103]:
write_matrix3D_to_ascii("skeletons_with_neck_squat_79x18x2.mat", input_data)

In [111]:
write_matrix2D_to_ascii("skeletons_with_neck_squat_79x36.mat", input_data_2D)

In [112]:
!pwd

/media/data_ssd/libs/mediapipe_v0.8.9/notebooks


# Compute class for C++ output prediction

In [113]:
predictions_cpp = np.array([
-0.414527, 
-3.72665, 
-6.21815, 
-6.29307, 
-4.09375, 
-4.19713, 
-6.88958, 
-6.72822, 
-6.16086, 
-7.08454, 
-8.50217, 
-8.53692, 
-9.018, 
-4.46563, 
-6.14306, 
-2.60321, 
-3.2298, 
-2.87857, 
-7.73492, 
-7.26222, 
-6.36653, 
-5.81333, 
-2.46831, 
])

In [114]:
softmax(predictions_cpp)

array([6.60651899e-01, 2.40733176e-02, 1.99292632e-03, 1.84907234e-03,
       1.66765589e-02, 1.50386578e-02, 1.01834023e-03, 1.19665968e-03,
       2.11043497e-03, 8.37959150e-04, 2.03027076e-04, 1.96093062e-04,
       1.21208154e-04, 1.14974362e-02, 2.14833704e-03, 7.40354530e-02,
       3.95653634e-02, 5.62150252e-02, 4.37286835e-04, 7.01548021e-04,
       1.71810867e-03, 2.98746163e-03, 8.47278261e-02])

In [115]:
action_id = softmax(predictions_cpp).argmax(axis=0)

In [116]:
print("predicted class: " + action_classes[action_id])

predicted class: background


# Create train dataset

Params:

In [337]:
#frames_per_sample = 79
frames_per_sample = 10

In [338]:
print(skeletons_corr.shape)
print(skeletons_corr_neg.shape)

(1030, 18, 3)
(33874, 18, 3)


remove third dimension (score)

In [339]:
train_pos = skeletons_corr[:,:,:2]
train_neg = skeletons_corr_neg[:,:,:2]

combine last two dimensions

In [340]:
train_pos = train_pos.reshape(train_pos.shape[0], -1)
train_neg = train_neg.reshape(train_neg.shape[0], -1)

In [341]:
print(train_pos.shape)
print(train_neg.shape)

(1030, 36)
(33874, 36)


In [342]:
def create_multiple_samples(skeletons, frames_per_sample):
    pos_samples = []
    for i in range(skeletons.shape[0] - frames_per_sample):
        pos_samples.append(skeletons[i: i + frames_per_sample, :])

    return np.array(pos_samples)

In [343]:
data_train_pos = create_multiple_samples(train_pos, frames_per_sample=frames_per_sample)
data_train_neg = create_multiple_samples(train_neg, frames_per_sample=frames_per_sample)

In [344]:
print(data_train_pos.shape)
print(data_train_neg.shape)

(1020, 10, 36)
(33864, 10, 36)


In [345]:
x_train = np.concatenate((data_train_pos, data_train_neg), axis=0)

In [346]:
x_train.shape

(34884, 10, 36)

In [347]:
y_pos = np.zeros_like(np.squeeze(data_train_pos[:,0,0]))
y_neg = np.ones_like(np.squeeze(data_train_neg[:,0,0]))
y_train = np.concatenate((y_pos, y_neg), axis=0)

In [348]:
np.unique(y_train)

array([0., 1.], dtype=float32)

In [349]:
print(y_train.shape)

(34884,)


# Train simple AR model

simple dense model

In [350]:
# model = tf.keras.models.Sequential([
#     tf.keras.layers.Flatten(input_shape=(frames_per_sample, 36)),
#     tf.keras.layers.Dense(128, activation="relu"),
#     tf.keras.layers.Dropout(0.2),
#     tf.keras.layers.Dense(2)
# ])

In [351]:
# loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

conv 1d model

In [373]:
# model = tf.keras.models.Sequential([
#     tf.keras.layers.Conv1D(filters=32, kernel_size=3, padding="valid"),
#     tf.keras.layers.Dropout(0.2),
#     tf.keras.layers.Dense(2)
# ])

In [374]:
# model.compile(optimizer='adam',
#              loss=loss_fn,
#              metrics=['accuracy'])

conv 1d model - second try

In [375]:
from tensorflow.keras.layers import *
from tensorflow.keras.models import Sequential

In [376]:
n_timesteps, n_features = (frames_per_sample, 36)
n_outputs = 2

In [378]:
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(n_timesteps,n_features)))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(Dropout(0.5))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dense(n_outputs, activation='sigmoid'))
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [379]:
np.expand_dims(y_train, axis=1).shape

(34884, 1)

In [380]:
model.summary()

Model: "sequential_41"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_41 (Conv1D)           (None, 8, 64)             6976      
_________________________________________________________________
conv1d_42 (Conv1D)           (None, 6, 64)             12352     
_________________________________________________________________
dropout_26 (Dropout)         (None, 6, 64)             0         
_________________________________________________________________
max_pooling1d_14 (MaxPooling (None, 3, 64)             0         
_________________________________________________________________
flatten_22 (Flatten)         (None, 192)               0         
_________________________________________________________________
dense_47 (Dense)             (None, 100)               19300     
_________________________________________________________________
dense_48 (Dense)             (None, 2)               

In [395]:
class_weight = {0: 300.,
                1: 1.}

In [396]:
model.fit(x_train, y_train, batch_size = 30, epochs=15, class_weight=class_weight)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<tensorflow.python.keras.callbacks.History at 0x7faadbc0a8b0>

# Test trained model

In [401]:
result = model(x_train[100:5])
result

<tf.Tensor: shape=(0, 2), dtype=float32, numpy=array([], shape=(0, 2), dtype=float32)>

In [398]:
result = model(x_train[-5:])
result

<tf.Tensor: shape=(5, 2), dtype=float32, numpy=
array([[6.1063015e-17, 1.0000000e+00],
       [6.1063015e-17, 1.0000000e+00],
       [6.1063015e-17, 1.0000000e+00],
       [6.1063015e-17, 1.0000000e+00],
       [6.1063022e-17, 1.0000000e+00]], dtype=float32)>

add softmax to model

In [383]:
probability_model = tf.keras.Sequential([
    model,
    tf.keras.layers.Softmax()
])

In [384]:
result = probability_model(x_train[-5:])
result

<tf.Tensor: shape=(5, 2), dtype=float32, numpy=
array([[0.30158493, 0.6984151 ],
       [0.30158493, 0.6984151 ],
       [0.30158493, 0.6984151 ],
       [0.30158493, 0.6984151 ],
       [0.30158493, 0.6984151 ]], dtype=float32)>

In [385]:
y_train[-5:]

array([1., 1., 1., 1., 1.], dtype=float32)

In [386]:
result = probability_model(x_train[:5])
result

<tf.Tensor: shape=(5, 2), dtype=float32, numpy=
array([[0.49616787, 0.5038321 ],
       [0.49616787, 0.5038321 ],
       [0.49616787, 0.5038321 ],
       [0.49616787, 0.5038321 ],
       [0.4961679 , 0.5038321 ]], dtype=float32)>

In [372]:
y_train[:5]

array([0., 0., 0., 0., 0.], dtype=float32)