In [None]:
import tensorflow as tf
import cv2
import os
import json


keypoint_mapping = {
    'nose': 0,
    'left_eye_smplhf': 1,
    'right_eye_smplhf': 2,
    'left_shoulder': 5,
    'right_shoulder': 6,
    'left_elbow': 7,
    'right_elbow': 8,
    'left_wrist': 9,
    'right_wrist': 10,
    'left_hip': 11,
    'right_hip': 12,
    'left_knee': 13,
    'right_knee': 14,
    'left_ankle': 15,
    'right_ankle': 16
}

def draw_keypoints_and_connections(frame, detected_keypoints, ground_truth_keypoints):
   
    connections = [
        ('left_shoulder', 'right_shoulder'),
        ('left_shoulder', 'left_elbow'),
        ('right_shoulder', 'right_elbow'),
        ('left_elbow', 'left_wrist'),
        ('right_elbow', 'right_wrist'),
        ('left_hip', 'right_hip'),
        ('left_hip', 'left_knee'),
        ('right_hip', 'right_knee'),
        ('left_knee', 'left_ankle'),
        ('right_knee', 'right_ankle'),
        ('left_shoulder','left_hip'),
        ('right_shoulder','right_hip'),
        ('nose','left_eye_smplhf'),
        ('nose','right_eye_smplhf')
      
    ]

    for keypoint_name, keypoint_coords in detected_keypoints.items():
       
        x, y = int(keypoint_coords['x']* frame.shape[1]), int(keypoint_coords['y']*frame.shape[0])
        cv2.circle(frame, (x, y), 3, (0,0,255), -1)  

  
  
    for connection in connections:
            start_name, end_name = connection
            if start_name in detected_keypoints and end_name in detected_keypoints:
                start_coords = (int(detected_keypoints[start_name]['x']* frame.shape[1]), int(detected_keypoints[start_name]['y']*frame.shape[0]))
                end_coords = (int(detected_keypoints[end_name]['x']* frame.shape[1]), int(detected_keypoints[end_name]['y']*frame.shape[0]))
                cv2.line(frame, start_coords, end_coords, (0,0,255), 3)
            
    for keypoint_name, keypoint_coords in ground_truth_keypoints.items():
        if keypoint_name in keypoint_mapping:
            x, y = int(keypoint_coords['x']), int(keypoint_coords['y'])
            cv2.circle(frame, (x, y), 2, (255,0,0), -1)  


    return frame



root_folder = 'TestDataset/Full'
interpreter = tf.lite.Interpreter(model_path="movenet_thunder.tflite")
interpreter.allocate_tensors()


output_file = 'movenet_keypoint_distances.txt'
with open(output_file, 'w') as distance_file:

  
    for exercise_folder in os.listdir(root_folder):
        exercise_folder_path = os.path.join(root_folder, exercise_folder)

     
        if os.path.isdir(exercise_folder_path):
            print(f"Processing exercise folder: {exercise_folder}")

        
            data_folder_path = os.path.join(exercise_folder_path, 'data')
            if not os.path.exists(data_folder_path):
                continue  

            
            for video_file in os.listdir(data_folder_path):
                video_file_path = os.path.join(data_folder_path, video_file)

              
                if video_file.endswith('.mp4'):
                    print(f"Processing video: {video_file}")

                 
                    json_file_path = os.path.splitext(video_file_path)[0] + '.json'
                    try:
                        with open(json_file_path, 'r') as json_file:
                            annotations = json.load(json_file)
                    except json.JSONDecodeError:
                        print(f"Error decoding JSON in file: {json_file_path}")
                        continue 
                   
                 
                    cap = cv2.VideoCapture(video_file_path)
                   
                    video_out_folder = os.path.join(exercise_folder_path, 'MovenetVideos')
                    if not os.path.exists(video_out_folder):
                        os.makedirs(video_out_folder)
                                
                    video_out_path = os.path.join(video_out_folder, video_file)
                    fourcc = cv2.VideoWriter_fourcc(*'mp4v') 
                    fps = int(cap.get(cv2.CAP_PROP_FPS))
                    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                    out = cv2.VideoWriter(video_out_path, fourcc, fps, (frame_width, frame_height))
                    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

                    print(f"Total frames: {frame_count}")

                 
                    cap = cv2.VideoCapture(video_file_path)
                    frame_number = 0  

                    while True:
                        ret, frame = cap.read()
                        if not ret:
                            break

                       
                        original_width, original_height = frame.shape[1], frame.shape[0]
                        
                        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                        
                     
                        frame_number = int(cap.get(cv2.CAP_PROP_POS_FRAMES))*3
                        
                     
                        resized_width, resized_height = 256, 256
                        
                     
                        width_scaling_factor = original_width / resized_width
                        height_scaling_factor = original_height / resized_height
                        
                      
                        resized_frame = cv2.resize(frame_rgb, (256, 256), interpolation=cv2.INTER_LANCZOS4)
                        image = tf.convert_to_tensor(resized_frame,dtype=tf.uint8)
                        image = tf.expand_dims(image, axis=0)
                     
                        input_image = tf.cast(image, dtype=tf.float32)
                        input_details = interpreter.get_input_details()
                        output_details = interpreter.get_output_details()
                        
                        interpreter.set_tensor(input_details[0]['index'], input_image.numpy())
                        
                        interpreter.invoke()
                        
                      
                        keypoints_with_scores = interpreter.get_tensor(output_details[0]['index'])
                        
                        if frame_number < len (annotations["annotations"]):
                            real_keypoints_frame_data = annotations["annotations"][frame_number]
                        else:
                             print(f"No annotation found for frame {frame_number} in video {video_file}")

                             continue  
                        if "armature_keypoints" in real_keypoints_frame_data:
                                    real_keypoints = real_keypoints_frame_data["armature_keypoints"]
                        else:
                                    print(f"No keypoints found for frame {frame_number} in video {video_file}")
                                    continue
                      
                        keypoints = {}
                        for keypoint_name, keypoint_index in keypoint_mapping.items():
                            keypoints[keypoint_name] = {
                                'x': keypoints_with_scores[0, 0, keypoint_index, 1] ,
                                'y': keypoints_with_scores[0, 0, keypoint_index, 0] 
                            }
                          
                        
                        frame_with_keypoints = draw_keypoints_and_connections(frame, 
                        keypoints, 
                        real_keypoints
                    )
                                    
                      
                        
                        print("Processing keypoints")
                        
                   
                        for keypoint_name, keypoint_data in real_keypoints.items():
                            if keypoint_name in keypoint_mapping:
                                detected_x = keypoints[keypoint_name]['x'] *resized_width * width_scaling_factor
                                detected_y = keypoints[keypoint_name]['y'] * resized_height * height_scaling_factor
    
                              
                                ground_truth_x = real_keypoints[keypoint_name]['x'] 
                                ground_truth_y = real_keypoints[keypoint_name]['y'] 

                        
                                euclidean_distance = ((detected_x - ground_truth_x) ** 2 + (detected_y - ground_truth_y) ** 2) ** 0.5
                          
                              
                                normalization_factor = max(
                                    ((real_keypoints['left_shoulder']['x'] - real_keypoints['right_hip']['x'])**2 + 
                                     (real_keypoints['left_shoulder']['y'] - real_keypoints['right_hip']['y'])**2)**0.5,
                                    ((real_keypoints['right_shoulder']['x'] - real_keypoints['left_hip']['x'])**2 + 
                                     (real_keypoints['right_shoulder']['y'] - real_keypoints['left_hip']['y'])**2)**0.5
                                )
                                
                                print(f'Writing to file: Video: {video_file_path}, Frame: {frame_number}, Keypoint {keypoint_name}, Euclidean Distance = {euclidean_distance},Torso Diameter = {normalization_factor}')

                             
                                distance_file.write(f'Video: {video_file_path}, Frame: {frame_number}, Keypoint {keypoint_name}, Euclidean Distance = {euclidean_distance},Torso Diameter = {normalization_factor}\n')

                        out.write(cv2.cvtColor(frame_with_keypoints, cv2.COLOR_RGB2BGR))
                        if cv2.waitKey(1) & 0xFF == ord('q'):
                            break

                   
                    cap.release()
                    cv2.destroyAllWindows()

    
    distance_file.close()
