# Models Place 位置


In [1]:
from pathlib import Path

path = Path("../utils/models/")

# .stat().st_size
for item in sorted(path.glob("*")):
    print(f"name : {item.name},\t size : {item.stat().st_size}, len : {len(item.name)}")

# len(item.name)


name : .DS_Store,	 size : 10244, len : 9
name : BlazePose,	 size : 160, len : 9
name : movenet,	 size : 192, len : 7
name : movenet.tflite,	 size : 4758512, len : 14
name : movenet_multipose_lightning,	 size : 192, len : 27
name : movenet_multipose_lightning_float16.tflite,	 size : 9585276, len : 42
name : movenet_t,	 size : 192, len : 9
name : movenet_t.tflite,	 size : 12584128, len : 16


# TF Lite

In [2]:
#################################################
##  Records 
##  macbook pro intel cpu
##    single_thunder : avg time : 87, fps : 11
##    single_light   : avg time : 29, fps : 33
##    multi_light    : avg time : 81, fps : 12
##  macmini m2 pro cpu
##    single_thunder : avg time : 56, fps : 17
##    single_light   : avg time : 13, fps : 75
##    multi_light    : avg time : 36, fps : 27
## 

import cv2
import time
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt


video_path  = "Upright row bilateral dumbbells.mp4"
model_index = 1
show_index  = 10
show_flag   = False

model_type_list = [
    "single_thunder",
    "single_light",
    "multi_light"
]
model_type = model_type_list[model_index]

if model_type == "single_thunder":
    model_place = "../utils/models/movenet_t.tflite"
elif model_type == "single_light":
    model_place = "../utils/models/movenet.tflite"
elif model_type == "multi_light":
    model_place = "../utils/models/movenet_multipose_lightning_float16.tflite"


if model_type == "single_thunder" or model_type == "single_light":
    interpreter = tf.lite.Interpreter(model_path=model_place)
    interpreter.allocate_tensors()
    input_details  = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
elif model_type == "multi_light":
    target_size = 256     # 32 倍數

    interpreter = tf.lite.Interpreter(model_path=model_place)
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    is_dynamic_shape_model = input_details[0]['shape_signature'][2] == -1
    if is_dynamic_shape_model:
        input_tensor_index = input_details[0]['index']
        input_shape = (1, target_size, target_size, 3)
        interpreter.resize_tensor_input(input_tensor_index, input_shape, strict=True)
    interpreter.allocate_tensors()


cap = cv2.VideoCapture(video_path)

count      = 0
count_time = 0
while(True):
    # 擷取影像
    ret, frame = cap.read()
    if not ret:
        print("Can't receive frame (stream end?). Exiting ...")
        break

    # frame = cv2.flip(frame, 1)
    
    heigh, width, _ = frame.shape
    # 裁切為正方形
    if heigh < width:
        x = int((width-heigh)/2); y = 0
        w = heigh; h = heigh; width = heigh
        frame = frame[y:y+h, x:x+w]
    elif heigh > width:
        x = 0; y = int((heigh-width)/2)
        w = width; h = width; heigh = width
        frame = frame[y:y+h, x:x+w]

    start_time = time.time()
    if model_type == "single_thunder" or model_type == "single_light":
        # thunder 256, light 192
        image = cv2.resize(frame, (input_details[0]['shape'][1], input_details[0]['shape'][1]), interpolation=cv2.INTER_AREA)[:,:,::-1]
        image = tf.expand_dims(image, axis=0)
        interpreter.set_tensor(input_details[0]['index'], tf.cast(image, dtype=tf.uint8).numpy())
        interpreter.invoke()
        target_keypoints_with_scores = interpreter.get_tensor(output_details[0]['index'])
        keypoints_with_scores = target_keypoints_with_scores[0][0]
    elif model_type == "multi_light":
        image_target = cv2.resize(frame, (target_size, target_size), interpolation=cv2.INTER_AREA)[:,:,::-1]
        input_tensor = tf.expand_dims(image_target, axis=0)
        interpreter.set_tensor(input_details[0]['index'], input_tensor.numpy())
        interpreter.invoke()
        multi_keypoints_with_scores = interpreter.get_tensor(output_details[0]['index'])
        target_keypoints_with_scores = multi_keypoints_with_scores[0][0]
        keypoints_with_scores = target_keypoints_with_scores[:51].reshape((17,3))
        
    
    # Draw
    if count == show_index and show_flag:
        for keypoint in keypoints_with_scores:
            y_coordinate = int( keypoint[0] * heigh )
            x_coordinate = int( keypoint[1] * width )
            score = keypoint[2]

            if score > 0.8:
                cv2.circle(frame, (x_coordinate, y_coordinate), 2, (255,0,0), 2)
            elif score > 0.4:
                cv2.circle(frame, (x_coordinate, y_coordinate), 2, (255,255,0), 2)
            else:
                cv2.circle(frame, (x_coordinate, y_coordinate), 2, (0,0,255), 2)
        
                
        plt.figure(figsize=(8, 8))
        plt.imshow(frame[:,:,::-1])
        plt.show()
    
    
    end_time = time.time()
    count      += 1
    count_time += (end_time - start_time)
    
    # print(f"time : {int((end_time - start_time)*1000)}")

cap.release()
avg_time = count_time / count

print(f"count : {count}, avg time : {int(avg_time * 1000)}, fps : {int(1/avg_time)}")

2024-02-22 11:03:54.468185: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-02-22 11:04:37.879012: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Can't receive frame (stream end?). Exiting ...
count : 54, avg time : 31, fps : 31


# tensorflow

In [3]:
#################################################
##  Records 
##  macbook pro intel cpu
##    single_thunder : avg time : 81, fps : 12
##    single_light   : avg time : 52, fps : 18
##    multi_light    : avg time : 97, fps : 10
##  macmini m2 pro cpu
##    single_thunder : avg time : 49, fps : 20
##    single_light   : avg time : 45, fps : 21
##    multi_light    : avg time : 60, fps : 16
## 


import cv2
import time
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt


video_path  = "Upright row bilateral dumbbells.mp4"
model_index = 1
show_index  = 10
show_flag   = False

model_type_list = [
    "single_thunder",
    "single_light",
    "multi_light"
]
model_type = model_type_list[model_index]

if model_type == "single_thunder":
    model_place = "../utils/models/movenet_t"
elif model_type == "single_light":
    model_place = "../utils/models/movenet"
elif model_type == "multi_light":
    model_place = "../utils/models/movenet_multipose_lightning"

# print(f"model path : {model_place}")


model_load = tf.saved_model.load(model_place)
model = model_load.signatures['serving_default']
if model_type == "single_thunder" or model_type == "single_light":
    _, target_height, target_width, _ = model.inputs[0].shape
elif model_type == "multi_light":
    target_height = 256; target_width = 256


cap = cv2.VideoCapture(video_path)

count      = 0
count_time = 0
while(True):
    # 擷取影像
    ret, frame = cap.read()
    if not ret:
        print("Can't receive frame (stream end?). Exiting ...")
        break

    # frame = cv2.flip(frame, 1)
    
    heigh, width, _ = frame.shape
    # 裁切為正方形
    if heigh < width:
        x = int((width-heigh)/2); y = 0
        w = heigh; h = heigh; width = heigh
        frame = frame[y:y+h, x:x+w]
    elif heigh > width:
        x = 0; y = int((heigh-width)/2)
        w = width; h = width; heigh = width
        frame = frame[y:y+h, x:x+w]

    start_time = time.time()
    
    image_resize = cv2.resize(frame, (target_width, target_height), interpolation=cv2.INTER_AREA)[:,:,::-1]
    image_r0 = tf.expand_dims(image_resize, axis=0)
    input_image = tf.cast(image_r0, dtype=tf.int32)
    
    if model_type == "single_thunder" or model_type == "single_light":
        target_keypoints_with_scores = model(input_image)['output_0']
        keypoints_with_scores = target_keypoints_with_scores[0][0].numpy()
    elif model_type == "multi_light":
        # input_image /= 255
        multi_keypoints_with_scores = model(input_image)['output_0']
        target_keypoints_with_scores = multi_keypoints_with_scores[0][0].numpy()
        keypoints_with_scores = target_keypoints_with_scores[:51].reshape((17,3))
        
    # Draw
    if count == show_index and show_flag:
        for keypoint in keypoints_with_scores:
            y_coordinate = int( keypoint[0] * heigh )
            x_coordinate = int( keypoint[1] * width )
            score = keypoint[2]

            if score > 0.8:
                cv2.circle(frame, (x_coordinate, y_coordinate), 2, (255,0,0), 2)
            elif score > 0.4:
                cv2.circle(frame, (x_coordinate, y_coordinate), 2, (255,255,0), 2)
            else:
                cv2.circle(frame, (x_coordinate, y_coordinate), 2, (0,0,255), 2)
        
                
        plt.figure(figsize=(8, 8))
        plt.imshow(frame[:,:,::-1])
        plt.show()
    
    
    end_time = time.time()
    count      += 1
    count_time += (end_time - start_time)
    
    # print(f"time : {int((end_time - start_time)*1000)}")

cap.release()
avg_time = count_time / count

print(f"count : {count}, avg time : {int(avg_time * 1000)}, fps : {int(1/avg_time)}")



Can't receive frame (stream end?). Exiting ...
count : 54, avg time : 53, fps : 18


In [None]:
??tf.lite.Interpreter