In [None]:
%%capture
!!pip install -q git+https://github.com/keras-team/keras-nlp.git --upgrade

In [None]:
import pandas as pd
import numpy as np
import os
import pickle

import keras_nlp
import tensorflow as tf
from tensorflow.keras import regularizers
from keras.regularizers import l2, l1

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
EXERCISE = 'Es5'

max_length_mapping = {
        "Es1": 1515,
        "Es2": 1668,
        "Es3": 1518,
        "Es4": 1988,
        "Es5": 1022
    }

temporal_windows = { #[num_windows, window_size]
        "Es1": [5,303],
        "Es2": [3,556],
        "Es3": [11,138],
        "Es4": [4,497],
        "Es5": [7,146],
    }

# Load and Prepare Data

In [None]:
df = pd.read_csv("/content/drive/MyDrive/rehab-ai-data/KiMoRe_final/KiMoRe_data_movenet.csv", index_col=False).drop('Unnamed: 0', axis=1)

In [None]:
df = df[df['exercise']==EXERCISE]
print(f'Max frames: {df["#frames"].unique().max()}')

In [None]:
def get_dataframe_cols():
  KEYPOINT_DICT = {
    'nose': 0,
    'left_eye': 1,
    'right_eye': 2,
    'left_ear': 3,
    'right_ear': 4,
    'left_shoulder': 5,
    'right_shoulder': 6,
    'left_elbow': 7,
    'right_elbow': 8,
    'left_wrist': 9,
    'right_wrist': 10,
    'left_hip': 11,
    'right_hip': 12,
    'left_knee': 13,
    'right_knee': 14,
    'left_ankle': 15,
    'right_ankle': 16
  }
  df_cols = []
  for keypoint_name in KEYPOINT_DICT:
    df_cols.append(f"{keypoint_name}_y")
    df_cols.append(f"{keypoint_name}_x")
    df_cols.append(f"{keypoint_name}_confidence")
  return df_cols

In [None]:
all_cols = get_dataframe_cols()
face_cols = all_cols[:15]
cols_drop = face_cols
print(f"Dropping {len(cols_drop)} columns.")

In [None]:
print(f"Maximum video length: {max_length_mapping[EXERCISE]}")

def prepare_data(df, exercise_video_max_len, data_type):
  data = []
  labels = []
  padding_masks = []

  for index, row in df.iterrows():
    joint_positions_path = row['joint_positions']
    if joint_positions_path is np.NAN:
      continue
    clinical_score = row['clinical_score']
    video_length = row['#frames']

    joint_positions_data = pd.read_csv(joint_positions_path)
    joint_positions_data = joint_positions_data.drop(cols_drop, axis=1)
    joint_positions_data = joint_positions_data.to_numpy()

    padding_length = exercise_video_max_len - video_length
    padding_mask = np.zeros((video_length + padding_length))
    padding_mask[-padding_length:] = 1

    joint_positions_data_padded = np.pad(joint_positions_data, ((0, padding_length), (0, 0)), mode='constant', constant_values=0)

    data.append(joint_positions_data_padded)
    labels.append(clinical_score)
    padding_masks.append(padding_mask)

  data = np.array(data)
  labels = np.array(labels)
  padding_masks = np.array(padding_masks)

  data = np.nan_to_num(data)
  labels = np.nan_to_num(labels)

  print(f"{data_type} Data Shape:", data.shape)
  print(f"{data_type} Labels Shape:", labels.shape)
  print(f"{data_type} Padding Masks Shape:", padding_masks.shape)

  return (data, padding_masks), labels

In [None]:
(all_data, all_padding), all_labels = prepare_data(df, max_length_mapping[EXERCISE], "All Data")

# Build Model

In [None]:
NUM_JOINTS = all_data[0].shape[1]
NUM_WINDOWS = temporal_windows[EXERCISE][0]
WINDOW_SIZE = temporal_windows[EXERCISE][1]
NUM_HEADS = 4
D_MODEL = 10
DENSE_UNITS = 64
LEARNING_RATE = 0.001

In [None]:
inputs = tf.keras.Input(shape=(all_data[0].shape[0], all_data[0].shape[1]), name='orignal_data')

masks = tf.keras.Input(shape=(all_padding.shape[1]), name='padding_masks')

windows = tf.split(inputs, NUM_WINDOWS, axis=1)
print("Windows:")
for window in windows:
  print(window.shape)

windows_masks = tf.split(masks, NUM_WINDOWS, axis=1)
print("Windows Masks:")
for mask in windows_masks:
  print(mask.shape)

embedding_layer = tf.keras.layers.Dense(18, activation='relu')
embedding_layer3 = tf.keras.layers.Dense(10, activation='relu')

embeddings = []
for window in windows:
    embedding = embedding_layer(window)
    embedding = embedding_layer3(embedding)
    embeddings.append(embedding)

print("Embeddings:")
for embd in embeddings:
  print(embd.shape)

positional_embedding_layer = tf.keras.layers.Embedding(input_dim=WINDOW_SIZE, output_dim=D_MODEL)
positional_embeddings = []
for i in range(NUM_WINDOWS):
    positional_embedding = positional_embedding_layer(tf.range(WINDOW_SIZE))
    positional_embeddings.append(positional_embedding)

print("Positional Embeddings:")
for pos_embd in positional_embeddings:
  print(pos_embd.shape)

embeddings_all = [embedding + positional_embedding for embedding, positional_embedding in zip(embeddings, positional_embeddings)]
print("All Embeddings:")
for embd in embeddings_all:
  print(embd.shape)

transformer_encoder_layer = keras_nlp.layers.TransformerEncoder(intermediate_dim=D_MODEL, num_heads=NUM_HEADS)
encoded = [transformer_encoder_layer(window_embd, window_mask) for window_embd, window_mask in zip(embeddings, windows_masks)]
print("Encodings:")
for enc in encoded:
  print(enc.shape)

concat_output = tf.concat(encoded, axis=1)
print(f"Concat: {concat_output.shape}")

flatten_output = tf.keras.layers.Flatten()(concat_output)
print(f"Flatten: {flatten_output.shape}")

dense_output = tf.keras.layers.Dense(4970, activation='relu', kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01))(flatten_output)
dense_output = tf.keras.layers.Dense(621, activation='relu', kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01))(dense_output)
dense_output = tf.keras.layers.Dense(77, activation='relu', kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01))(dense_output)
print(f"Final Dense: {dense_output.shape}")
output = tf.keras.layers.Dense(1)(dense_output)

model = tf.keras.Model(inputs=[inputs, masks],
                       outputs=output,
                       name='transformer_model')

In [None]:
tf.keras.utils.plot_model(model, show_shapes=True, show_dtype=True, show_layer_activations=True)

# Plot Functions

In [None]:
def plot_predictions(train_preds, train_labels, test_preds, test_labels, fold):
  # Plot the predictions
  plt.figure(figsize=(8, 8))
  plt.suptitle(f'{EXERCISE} - Fold {fold}',fontsize=20)
  plt.subplot(2, 1, 1)
  plt.plot(train_preds, 's', color='red', label='Prediction', linestyle='None', alpha=0.5, markersize=6)
  plt.plot(train_labels, 'o', color='green', label='Clinical Score', alpha=0.4, markersize=6)
  plt.title('Training Set', fontsize=18)
  plt.xlabel('Sequence Number', fontsize=16)
  plt.ylabel('Clinical Score Scale', fontsize=16)
  plt.legend(loc=3, prop={'size': 14})

  plt.subplot(2, 1, 2)
  plt.plot(test_preds, 's', color='red', label='Prediction', linestyle='None', alpha=0.5, markersize=6)
  plt.plot(test_labels, 'o', color='green', label='Clinical Score', alpha=0.4, markersize=6)
  plt.title('Testing Set', fontsize=18)
  plt.xlabel('Sequence Number', fontsize=16)
  plt.ylabel('Clinical Score Scale', fontsize=16)
  plt.legend(loc=3, prop={'size': 14})

  plt.tight_layout()
  fig_title = f'{EXERCISE}_fold{fold}_pred_plot'
  plt.savefig(f'/content/drive/MyDrive/rehab-ai-data/saved_models_images/{fig_title}.png', dpi=300)
  plt.show()

In [None]:
def plot_history(history, ptype, fold=None):
  type_history = history.history[ptype]

  epochs = range(len(type_history))
  plt.plot(epochs, type_history, label=f'Training {ptype.capitalize()}')

  if fold:
    type_history_val = history.history[f'val_{ptype}']
    plt.plot(epochs, type_history_val, label=f'Validation {ptype.capitalize()}')
    plt.title(f'Training and Validation {ptype.capitalize()}')
    plt.suptitle(f'{EXERCISE} - Fold {fold}')
  else:
    plt.title(f'{EXERCISE} {ptype.capitalize()}')

  plt.xlabel('Epoch')
  plt.ylabel(f'{ptype.capitalize()}')
  plt.legend()

  fig_title = f'{EXERCISE}_{ptype}_plot'
  if fold:
    fig_title += f'_fold{fold}'
  plt.savefig(f'/content/drive/MyDrive/rehab-ai-data/saved_models_images/{fig_title}.png', dpi=300)
  plt.show()

# Cross Validation

In [None]:
class PrintEpochs(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        if epoch in [1, 25, 50, 75, 100]:
              values = ", ".join([f"{key}: {value:.4f}" for key, value in logs.items()])
              print(f"Epoch {epoch}: {values}")

def cross_validate(model, data, labels, padding_masks, k=5):
  y_true, y_pred, histories = list(), list(), list()
  i = 1
  kfold = KFold(n_splits=k, random_state=0, shuffle=True)
  print(f"Cross Validating Model Using {k} Folds...")
  for train_idx, val_idx in kfold.split(data):
    print(f"---------------- Fold {i} ----------------")
    X_train, X_val = data[train_idx], data[val_idx]
    padding_train, padding_val = padding_masks[train_idx], padding_masks[val_idx]
    y_train, y_val = labels[train_idx], labels[val_idx]

    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE), loss='mse', metrics=['mae'])

    history = model.fit([X_train, padding_train], y_train, epochs=100, validation_data=([X_val, padding_val], y_val), verbose=0, callbacks=[PrintEpochs()])

    train_pred = model.predict([X_train, padding_train])
    val_pred = model.predict([X_val, padding_val])

    y_true.extend(y_val)
    y_pred.extend(val_pred)
    histories.append(history)

    fold_mae = mean_absolute_error(y_val, val_pred)
    print(f'- MAE of fold {i} = {fold_mae}')

    plot_history(history, 'loss', i)
    plot_history(history, 'mae', i)
    plot_predictions(train_pred, y_train, val_pred, y_val, i)
    i = i+1


  mae = mean_absolute_error(y_true, y_pred)
  print(f'OOF MAE = {mae}')

In [None]:
cross_validate(model, all_data, all_labels, all_padding)

# Final Model Training

In [None]:
for i in range(len(model.weights)):
    model.weights[i]._handle_name = str(i) + '__' + model.weights[i].name

In [None]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE), loss='mse', metrics=['mae'])

In [None]:
checkpoint_filepath = f'/content/drive/MyDrive/rehab-ai-data/saved_models_weights/ml_model_{EXERCISE}_Weights.hdf5'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_mae',
    save_best_only=False)

In [None]:
%%time
history = model.fit([all_data, all_padding], all_labels, epochs=100,
                     callbacks=[model_checkpoint_callback])

In [None]:
model.summary()

# Model History

In [None]:
plot_history(history, 'loss')

In [None]:
plot_history(history, 'mae')

# Saving the model

In [None]:
model.load_weights(f'/content/drive/MyDrive/rehab-ai-data/saved_models_weights/ml_model_{EXERCISE}_Weights.hdf5')

In [None]:
!mkdir -p saved_model
model.save(f'/content/drive/MyDrive/rehab-ai-data/saved_models_weights/ml_model_{EXERCISE}.h5')