In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
!pip install -q opencv-python
!pip install -q git+https://github.com/tensorflow/docs

  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for tensorflow-docs (setup.py) ... [?25l[?25hdone


In [3]:
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow_docs.vis import embed
import numpy as np
import cv2

import pandas as pd
import os
import subprocess

## Load Model from TF hub

In [4]:
model_name = "movenet_thunder" #@param ["movenet_lightning", "movenet_thunder", "movenet_lightning_f16.tflite", "movenet_thunder_f16.tflite", "movenet_lightning_int8.tflite", "movenet_thunder_int8.tflite"]

if "tflite" in model_name:
  if "movenet_lightning_f16" in model_name:
    !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/lightning/tflite/float16/4?lite-format=tflite
    input_size = 192
  elif "movenet_thunder_f16" in model_name:
    !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/thunder/tflite/float16/4?lite-format=tflite
    input_size = 256
  elif "movenet_lightning_int8" in model_name:
    !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/lightning/tflite/int8/4?lite-format=tflite
    input_size = 192
  elif "movenet_thunder_int8" in model_name:
    !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/thunder/tflite/int8/4?lite-format=tflite
    input_size = 256
  else:
    raise ValueError("Unsupported model name: %s" % model_name)

  # Initialize the TFLite interpreter
  interpreter = tf.lite.Interpreter(model_path="model.tflite")
  interpreter.allocate_tensors()

  def movenet(input_image):
    """Runs detection on an input image.

    Args:
      input_image: A [1, height, width, 3] tensor represents the input image
        pixels. Note that the height/width should already be resized and match the
        expected input resolution of the model before passing into this function.

    Returns:
      A [1, 1, 17, 3] float numpy array representing the predicted keypoint
      coordinates and scores.
    """
    # TF Lite format expects tensor type of uint8.
    input_image = tf.cast(input_image, dtype=tf.uint8)
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    interpreter.set_tensor(input_details[0]['index'], input_image.numpy())
    # Invoke inference.
    interpreter.invoke()
    # Get the model prediction.
    keypoints_with_scores = interpreter.get_tensor(output_details[0]['index'])
    return keypoints_with_scores

else:
  if "movenet_lightning" in model_name:
    module = hub.load("https://tfhub.dev/google/movenet/singlepose/lightning/4")
    input_size = 192
  elif "movenet_thunder" in model_name:
    module = hub.load("https://tfhub.dev/google/movenet/singlepose/thunder/4")
    input_size = 256
  else:
    raise ValueError("Unsupported model name: %s" % model_name)

  def movenet(input_image):
    """Runs detection on an input image.

    Args:
      input_image: A [1, height, width, 3] tensor represents the input image
        pixels. Note that the height/width should already be resized and match the
        expected input resolution of the model before passing into this function.

    Returns:
      A [1, 1, 17, 3] float numpy array representing the predicted keypoint
      coordinates and scores.
    """
    model = module.signatures['serving_default']

    # SavedModel format expects tensor type of int32.
    input_image = tf.cast(input_image, dtype=tf.int32)
    # Run model inference.
    outputs = model(input_image)
    # Output is a [1, 1, 17, 3] tensor.
    keypoints_with_scores = outputs['output_0'].numpy()
    return keypoints_with_scores

In [5]:
#@title Cropping Algorithm

# Confidence score to determine whether a keypoint prediction is reliable.
MIN_CROP_KEYPOINT_SCORE = 0.2

def init_crop_region(image_height, image_width):
  """Defines the default crop region.

  The function provides the initial crop region (pads the full image from both
  sides to make it a square image) when the algorithm cannot reliably determine
  the crop region from the previous frame.
  """
  if image_width > image_height:
    box_height = image_width / image_height
    box_width = 1.0
    y_min = (image_height / 2 - image_width / 2) / image_height
    x_min = 0.0
  else:
    box_height = 1.0
    box_width = image_height / image_width
    y_min = 0.0
    x_min = (image_width / 2 - image_height / 2) / image_width

  return {
    'y_min': y_min,
    'x_min': x_min,
    'y_max': y_min + box_height,
    'x_max': x_min + box_width,
    'height': box_height,
    'width': box_width
  }

def torso_visible(keypoints):
  """Checks whether there are enough torso keypoints.

  This function checks whether the model is confident at predicting one of the
  shoulders/hips which is required to determine a good crop region.
  """
  return ((keypoints[0, 0, KEYPOINT_DICT['left_hip'], 2] >
           MIN_CROP_KEYPOINT_SCORE or
          keypoints[0, 0, KEYPOINT_DICT['right_hip'], 2] >
           MIN_CROP_KEYPOINT_SCORE) and
          (keypoints[0, 0, KEYPOINT_DICT['left_shoulder'], 2] >
           MIN_CROP_KEYPOINT_SCORE or
          keypoints[0, 0, KEYPOINT_DICT['right_shoulder'], 2] >
           MIN_CROP_KEYPOINT_SCORE))

def determine_torso_and_body_range(
    keypoints, target_keypoints, center_y, center_x):
  """Calculates the maximum distance from each keypoints to the center location.

  The function returns the maximum distances from the two sets of keypoints:
  full 17 keypoints and 4 torso keypoints. The returned information will be
  used to determine the crop size. See determineCropRegion for more detail.
  """
  torso_joints = ['left_shoulder', 'right_shoulder', 'left_hip', 'right_hip']
  max_torso_yrange = 0.0
  max_torso_xrange = 0.0
  for joint in torso_joints:
    dist_y = abs(center_y - target_keypoints[joint][0])
    dist_x = abs(center_x - target_keypoints[joint][1])
    if dist_y > max_torso_yrange:
      max_torso_yrange = dist_y
    if dist_x > max_torso_xrange:
      max_torso_xrange = dist_x

  max_body_yrange = 0.0
  max_body_xrange = 0.0
  for joint in KEYPOINT_DICT.keys():
    if keypoints[0, 0, KEYPOINT_DICT[joint], 2] < MIN_CROP_KEYPOINT_SCORE:
      continue
    dist_y = abs(center_y - target_keypoints[joint][0]);
    dist_x = abs(center_x - target_keypoints[joint][1]);
    if dist_y > max_body_yrange:
      max_body_yrange = dist_y

    if dist_x > max_body_xrange:
      max_body_xrange = dist_x

  return [max_torso_yrange, max_torso_xrange, max_body_yrange, max_body_xrange]

def determine_crop_region(
      keypoints, image_height,
      image_width):
  """Determines the region to crop the image for the model to run inference on.

  The algorithm uses the detected joints from the previous frame to estimate
  the square region that encloses the full body of the target person and
  centers at the midpoint of two hip joints. The crop size is determined by
  the distances between each joints and the center point.
  When the model is not confident with the four torso joint predictions, the
  function returns a default crop which is the full image padded to square.
  """
  target_keypoints = {}
  for joint in KEYPOINT_DICT.keys():
    target_keypoints[joint] = [
      keypoints[0, 0, KEYPOINT_DICT[joint], 0] * image_height,
      keypoints[0, 0, KEYPOINT_DICT[joint], 1] * image_width
    ]

  if torso_visible(keypoints):
    center_y = (target_keypoints['left_hip'][0] +
                target_keypoints['right_hip'][0]) / 2;
    center_x = (target_keypoints['left_hip'][1] +
                target_keypoints['right_hip'][1]) / 2;

    (max_torso_yrange, max_torso_xrange,
      max_body_yrange, max_body_xrange) = determine_torso_and_body_range(
          keypoints, target_keypoints, center_y, center_x)

    crop_length_half = np.amax(
        [max_torso_xrange * 1.9, max_torso_yrange * 1.9,
          max_body_yrange * 1.2, max_body_xrange * 1.2])

    tmp = np.array(
        [center_x, image_width - center_x, center_y, image_height - center_y])
    crop_length_half = np.amin(
        [crop_length_half, np.amax(tmp)]);

    crop_corner = [center_y - crop_length_half, center_x - crop_length_half];

    if crop_length_half > max(image_width, image_height) / 2:
      return init_crop_region(image_height, image_width)
    else:
      crop_length = crop_length_half * 2;
      return {
        'y_min': crop_corner[0] / image_height,
        'x_min': crop_corner[1] / image_width,
        'y_max': (crop_corner[0] + crop_length) / image_height,
        'x_max': (crop_corner[1] + crop_length) / image_width,
        'height': (crop_corner[0] + crop_length) / image_height -
            crop_corner[0] / image_height,
        'width': (crop_corner[1] + crop_length) / image_width -
            crop_corner[1] / image_width
      }
  else:
    return init_crop_region(image_height, image_width)

def crop_and_resize(image, crop_region, crop_size):
  """Crops and resize the image to prepare for the model input."""
  boxes=[[crop_region['y_min'], crop_region['x_min'],
          crop_region['y_max'], crop_region['x_max']]]
  output_image = tf.image.crop_and_resize(
      image, box_indices=[0], boxes=boxes, crop_size=crop_size)
  return output_image

def run_inference(movenet, image, crop_region, crop_size):
  """Runs model inferece on the cropped region.

  The function runs the model inference on the cropped region and updates the
  model output to the original image coordinate system.
  """
  image_height, image_width, _ = image.shape
  input_image = crop_and_resize(
    tf.expand_dims(image, axis=0), crop_region, crop_size=crop_size)
  # Run model inference.
  keypoints_with_scores = movenet(input_image)
  # Update the coordinates.
  for idx in range(17):
    keypoints_with_scores[0, 0, idx, 0] = (
        crop_region['y_min'] * image_height +
        crop_region['height'] * image_height *
        keypoints_with_scores[0, 0, idx, 0]) / image_height
    keypoints_with_scores[0, 0, idx, 1] = (
        crop_region['x_min'] * image_width +
        crop_region['width'] * image_width *
        keypoints_with_scores[0, 0, idx, 1]) / image_width
  return keypoints_with_scores

In [11]:
  KEYPOINT_DICT = {
    'nose': 0,
    'left_eye': 1,
    'right_eye': 2,
    'left_ear': 3,
    'right_ear': 4,
    'left_shoulder': 5,
    'right_shoulder': 6,
    'left_elbow': 7,
    'right_elbow': 8,
    'left_wrist': 9,
    'right_wrist': 10,
    'left_hip': 11,
    'right_hip': 12,
    'left_knee': 13,
    'right_knee': 14,
    'left_ankle': 15,
    'right_ankle': 16
  }

In [6]:
def get_dataframe_cols():
  df_cols = []
  for keypoint_name in KEYPOINT_DICT:
    df_cols.append(f"{keypoint_name}_y")
    df_cols.append(f"{keypoint_name}_x")
    df_cols.append(f"{keypoint_name}_confidence")
  return df_cols

In [7]:
def get_video_frame_record(keypoints):
  record = []
  for keypoint in keypoints[0][0]:
    record.append(keypoint[0])
    record.append(keypoint[1])
    record.append(keypoint[2])
  return record

In [8]:
def process_video(exercise_rgb_dir, exercise_label_path, input_video_name, output_video_name):
  cap = cv2.VideoCapture(f"{exercise_rgb_dir}/rgb/{input_video_name}")
  frame_rate = int(cap.get(cv2.CAP_PROP_FPS))
  frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
  frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
  crop_region = init_crop_region(frame_height, frame_width)


  video_records = []
  while True:
    ret, frame = cap.read()
    if not ret:
      break

    keypoints_with_scores = run_inference(
        movenet, frame,
        crop_region,
        crop_size=[input_size, input_size])

    crop_region = determine_crop_region(keypoints_with_scores, frame_height, frame_width)

    frame_record = get_video_frame_record(keypoints_with_scores)
    video_records.append(frame_record)

  cap.release()
  cv2.destroyAllWindows()

  video_df_cols = get_dataframe_cols()
  video_df = pd.DataFrame(data=video_records, columns=video_df_cols)
  video_df.to_csv(f"{exercise_rgb_dir}/{output_video_name}.csv", index=False)
  cmd = f"cp {exercise_label_path} {exercise_rgb_dir}"
  subprocess.run(cmd, shell=True)

### Exercises processed:
Exercise 1 <br>
Processed 78 participants.<br>
CPU times: user 2h 8min 10s, sys: 2min 20s, total: 2h 10min 31s<br>
Wall time: 1h 35min 4s<br>
<br>
Exercise 4<br>
Processed 78 participants.<br>
CPU times: user 2h 16min 2s, sys: 2min 25s, total: 2h 18min 28s<br>
Wall time: 1h 39min 27s<br>
<br><br>

In [9]:
KIMORE_RGB = "/content/gdrive/MyDrive/PSUT/Graduation-Project2/rehab-ai-data/KiMoRe_rgb_movenet"
KIMORE = "/content/gdrive/MyDrive/PSUT/Graduation-Project2/rehab-ai-data/KiMoRe"
EXERCISE = "Es4"

In [12]:
%%time

print(f"Processing Exercise {EXERCISE} ...")
unprocessed_videos = []
processed_counter = 0
for group in os.listdir(KIMORE_RGB):
  for sub_group in os.listdir(f"{KIMORE_RGB}/{group}"):
    sub_group_path = f"{KIMORE_RGB}/{group}/{sub_group}"
    for participant in os.listdir(sub_group_path):
      processed_counter += 1
      print(f"\t{participant}")
      participant_dir = f"{sub_group_path}/{participant}"
      exercise_rgb_dir = f"{participant_dir}/{EXERCISE}"
      exercise_dir = f"{KIMORE}/{group}/{sub_group}/{participant}/{EXERCISE}/Label/"
      exercise_label_path = None

      for file in os.listdir(exercise_dir):
        if file.startswith("ClinicalAssessment"):
          exercise_label_path = f"{exercise_dir}/{file}"

      if not os.listdir(f"{exercise_rgb_dir}/rgb"):
        print(f"\t\t\tNo video found for exercise {EXERCISE} of {participant}.")
        unprocessed_videos.append(exercise_rgb_dir)
        continue

      input_video_name = None
      for file in os.listdir(f"{exercise_rgb_dir}/rgb"):
        if file.endswith(".mp4"):
          input_video_name = file
          output_video_name = f"{participant}_{EXERCISE}"
          process_video(exercise_rgb_dir, exercise_label_path, input_video_name, output_video_name)
          print(f"\t\t\tFinished processing {output_video_name}.csv")

print(f"Processed {processed_counter} participants.")

Processing Exercise Es4 ...
	P_ID11
			Finished processing P_ID11_Es4.csv
	P_ID16
			Finished processing P_ID16_Es4.csv
	P_ID10
			Finished processing P_ID10_Es4.csv
	P_ID4
			Finished processing P_ID4_Es4.csv
	P_ID3
			No video found for exercise Es4 of P_ID3.
	P_ID2
			Finished processing P_ID2_Es4.csv
	P_ID15
			Finished processing P_ID15_Es4.csv
	P_ID12
			Finished processing P_ID12_Es4.csv
	P_ID6
			Finished processing P_ID6_Es4.csv
	P_ID7
			Finished processing P_ID7_Es4.csv
	P_ID14
			Finished processing P_ID14_Es4.csv
	P_ID9
			Finished processing P_ID9_Es4.csv
	P_ID5
			Finished processing P_ID5_Es4.csv
	P_ID13
			Finished processing P_ID13_Es4.csv
	P_ID8
			Finished processing P_ID8_Es4.csv
	P_ID1
			No video found for exercise Es4 of P_ID1.
	S_ID2
			Finished processing S_ID2_Es4.csv
	S_ID5
			Finished processing S_ID5_Es4.csv
	S_ID3
			No video found for exercise Es4 of S_ID3.
	S_ID4
			Finished processing S_ID4_Es4.csv
	S_ID8
			Finished processing S_ID8_Es4.csv
	S_ID10
		