In [2]:
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow.keras as keras

import cv2
import numpy as np

In [3]:
video_ids = [2] # range(1, 6)
frame_skip = 8

In [7]:
labels = []
for id in video_ids:
    with open(f'data/labels/video{id}.txt') as f:
        lines = f.readlines()
        lines = [line.split() for line in lines]
        lines = np.array(lines)

        # Remove OTH label
        lines = lines[lines[:, 1] != 'OTH']
        
        # Prune frames
        lines = lines[::frame_skip]

        labels.append(lines)

labels = np.array(labels)

# Show frequency of labels
# unique, counts = np.unique(labels[:, 1], return_counts=True)
# import matplotlib.pyplot as plt
# plt.bar(unique, counts)
# plt.show()

In [8]:
# Load model
model = hub.load("https://tfhub.dev/google/movenet/singlepose/lightning/4")
movenet = model.signatures['serving_default']

In [9]:
data = []
for id, labels in zip(video_ids, labels):
    video = cv2.VideoCapture(f"data/videos/video{id}.mp4")
    for i, frame_num in enumerate(labels[:, 0].astype(int)):
        print(f'Loading video {id}... ({i}/{labels.shape[0]})', end='\r')
        video.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
        ret, frame = video.read()

        input_img = tf.expand_dims(frame, axis=0)
        input_img = tf.image.resize_with_pad(input_img, 192, 192)
        input_img = tf.cast(input_img, dtype=tf.int32)

        # Detection section
        keypoints_with_scores = movenet(input_img)['output_0'].numpy().flatten()
    
        data.append(keypoints_with_scores)

    video.release()

data = np.array(data)

Loading video 2... (0/2491)

2023-12-14 01:33:17.011775: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


Loading video 2... (2490/2491)

In [10]:
# One-hot encode labels
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
label_encoder.fit(labels[:, 1])
labels = label_encoder.transform(labels[:, 1])

labels = keras.utils.to_categorical(labels)

In [11]:
from keras.layers import LSTM, Dense, LayerNormalization

model = tf.keras.models.Sequential([
    LSTM(128, input_shape=(17*3, 1)),
    LayerNormalization(),
    Dense(10, activation='relu'),
])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 128)               66560     
                                                                 
 layer_normalization (Layer  (None, 128)               256       
 Normalization)                                                  
                                                                 
 dense (Dense)               (None, 10)                1290      
                                                                 
Total params: 68106 (266.04 KB)
Trainable params: 68106 (266.04 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [13]:
import datetime
log_dir = "logs/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

model.compile(optimizer='adam',
                loss='categorical_crossentropy',
                metrics=['accuracy'])

model.fit(data, labels, epochs=10, batch_size=32, validation_split=0.2, callbacks=[tensorboard_callback])

!tensorboard --logdir logs

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Serving TensorBoard on localhost; to expose to the network, use a proxy or pass --bind_all
TensorBoard 2.15.1 at http://localhost:6006/ (Press CTRL+C to quit)
^C
