# Extracting discrete features from 3D Hand landmark coordinates

## This notebook aims to cover the following:

1. **Retrieving data from the "hand_gesture_data" folder in the current directory.**
    - Users have the option to collect their own data using the "data_collect.py" script or use the existing example data.
2. **Extracting discrete features from the coordinates** 
    1. **Palm Orientation**
    2. **Moving Direction**
    3. **Hand Pose**

3. **Classifying dynamic hand gestures using the sequences of discrete features (Optional)** 
    - These features can be used for classifying dynamic hand gestures or training a new model.

## 1. **Retrieving data from the "hand_gesture_data" folder**
- Inside the "hand_gesture_data" folder, there are subfolders named after the hand gestures.
- Each subfolder contains the data for a specific hand gesture.
- You can use the existing example data or collect your own data using the "data_collect.py" script.

#### 1.1 Load the existing example data

In [1]:
import numpy as np
import os

data = os.path.join(os.getcwd(), "hand_gesture_data")

swipe_right = os.path.join(data, "swipe_right")
swipe_right_files = [os.path.join(swipe_right, f) for f in os.listdir(swipe_right)]

swipe_right_data = {
    "features": [np.load(f) for f in swipe_right_files if ("_hands" not in f and "_raw" not in f)],
    "raw": [np.load(f) for f in swipe_right_files if "_raw" in f],
    "hands": [np.load(f, allow_pickle=True) for f in swipe_right_files if "_hands" in f]
}

print(f"Number of swipe right trials: {len(swipe_right_data['features'])}\n")

Number of swipe right trials: 2



#### 1.2 Store the data in some accessible format

In [153]:
trial = 1

swipe_right_trial = {
    "features": swipe_right_data["features"][trial],
    "raw": swipe_right_data["raw"][trial],
    "hands": swipe_right_data["hands"][trial]
}
print(f"Trial {trial} number of recorded frames: {len(swipe_right_trial['features'])}")

Trial 1 number of recorded frames: 227


### To visualise the data, run the cell below and adjust the slider to view the hand landmarks for different frames.

In [79]:
from matplotlib import pyplot as plt
from ipywidgets import interact, widgets
import cv2

from ultraleap_demo.hand_renderer import HandRenderer # This is a simple class to render the recorded hand gesture data frame by frame

# Load the hand data
hand_data = swipe_right_trial["hands"]
hands_colour = (255, 255, 255)

# Create the instances of the HandRenderer class for dots and skeleton formats
dots_renderer = HandRenderer(np.zeros((480, 640, 3), dtype=np.uint8), hands_format="Dots", hands_colour=hands_colour, circle_radius=3)
skeleton_renderer = HandRenderer(np.zeros((480, 640, 3), dtype=np.uint8), hands_format="Skeleton", hands_colour=hands_colour, circle_radius=3)

# Create a slider for frame navigation
frame_slider = widgets.IntSlider(min=0, max=len(hand_data)-1, step=1, value=0, layout=widgets.Layout(width='90%'), description='Frame')

def show_frames(frame_number):
    # Render the hand data for this frame
    dots_renderer.render_hand_data(hand_data[frame_number])
    skeleton_renderer.render_hand_data(hand_data[frame_number])

    # Convert the images from BGR to RGB
    dots_image_rgb = cv2.cvtColor(dots_renderer.output_image, cv2.COLOR_BGR2RGB)
    skeleton_image_rgb = cv2.cvtColor(skeleton_renderer.output_image, cv2.COLOR_BGR2RGB)

    # Create a subplot with 1 row and 2 columns
    plt.figure(figsize=(20, 6))
    plt.subplot(1, 2, 1)

    # Display the dots image
    plt.imshow(skeleton_image_rgb)
    plt.axis('off')  # Hide the axes
    plt.title("Skeleton")  # Set the title

    # Display the skeleton image
    plt.subplot(1, 2, 2)
    plt.imshow(dots_image_rgb)
    plt.axis('off')  # Hide the axes
    plt.title("Dots")  # Set the title

    plt.suptitle(f"Frame {frame_number}", fontsize=16)  # Set the title for the whole figure

    plt.show()

# Display the slider and the frame
interact(show_frames, frame_number=frame_slider)

interactive(children=(IntSlider(value=0, description='Frame', layout=Layout(width='90%'), max=225), Output()),…

<function __main__.show_frames(frame_number)>

## 2. **Extracting discrete features from the coordinates** 

### 2.1 **Palm Orientation**

#### 2.1.1 **Functions** to calculate the palm orientation:

In [None]:
from ultraleap_demo.prep_functions import *

def rotate_vector(vector, axis, theta):
    """
    Rotate a 3D vector around the x, y, or z axis by a given angle (theta).
    
    Parameters:
        vector (ndarray): The 3D vector to rotate.
        axis (str): The axis to rotate around ('x', 'y', or 'z').
        theta (float): The angle (in radians) to rotate by.
    
    Returns:
        ndarray: The rotated 3D vector.
    """
    rotation_matrix = np.eye(3)
    
    if axis == 'x':
        rotation_matrix[1, 1] = np.cos(theta)
        rotation_matrix[1, 2] = -np.sin(theta)
        rotation_matrix[2, 1] = np.sin(theta)
        rotation_matrix[2, 2] = np.cos(theta)
    elif axis == 'y':
        rotation_matrix[0, 0] = np.cos(theta)
        rotation_matrix[0, 2] = np.sin(theta)
        rotation_matrix[2, 0] = -np.sin(theta)
        rotation_matrix[2, 2] = np.cos(theta)
    elif axis == 'z':
        rotation_matrix[0, 0] = np.cos(theta)
        rotation_matrix[0, 1] = -np.sin(theta)
        rotation_matrix[1, 0] = np.sin(theta)
        rotation_matrix[1, 1] = np.cos(theta)
    else:
        raise ValueError("Invalid axis. Must be 'x', 'y', or 'z'.")
    
    rotated_vector = np.dot(rotation_matrix, vector)
    
    return rotated_vector


def get_average_landmarks(landmarks_frames):
    # From the frames list of hand landmarks get the average position for each landmark
    average_landmarks = np.zeros((22, 3))
    for frame in landmarks_frames:
        average_landmarks += frame
    average_landmarks /= len(landmarks_frames)
    return average_landmarks


def get_normal_vector(landmarks):
    # Extract the relevant landmarks (assuming 0-based indexing)
    index_base = np.array(landmarks[6])  # Base of the index finger
    pinky_base = np.array(landmarks[14])  # Base of the pinky finger
    wrist = np.array(landmarks[1])  # Wrist

    # Calculate the vectors between the points
    v1 = index_base - wrist
    v2 = pinky_base - wrist

    # Compute the normal vector of the palm plane
    normal_vector = np.cross(v1, v2)

    # Normalize the normal vector
    normal_vector = normal_vector / np.linalg.norm(normal_vector)

    return normal_vector

def classify_palm_rotation(palm_normal, camera_vector, threshold=0.666):
    # Calculate the dot product between the palm normal and camera vector
    dot_product = np.dot(palm_normal, camera_vector)

    second_dot_product = np.dot(palm_normal, rotate_vector(camera_vector, 'y', np.pi/2))

    third_dot_product = np.dot(palm_normal, rotate_vector(camera_vector, 'y', -np.pi/2))

    condition_1 = second_dot_product > -threshold and second_dot_product < threshold
    condition_2 = third_dot_product > -threshold and third_dot_product < threshold

    product = second_dot_product * third_dot_product

    # Classify the palm rotation based on the dot product value
    if dot_product > threshold:
        if condition_1 and condition_2:
            if product > 0:
                return "down"
            else:
                if (np.abs(product) < 0.1):
                    return "down"
                else:
                    return "up"
        else:
            return "down"
    elif dot_product < -threshold:
        return "up"
    elif second_dot_product > -threshold and second_dot_product < threshold:
        if third_dot_product < threshold:
            return "opposite"
        else:

            return "down"
    else:
        return "opposite"

#### 2.1.2 **Calculating** the palm orientation for each frame.

In [186]:
hand_landmarks_frame = swipe_right_trial["raw"][0]

normal_vector = get_normal_vector(hand_landmarks_frame)

result = classify_palm_rotation(normal_vector, np.array([0, 1, 0]))
print(f"The palm orientation for the first frame is facing: {result}")

# Now we can get the palm orientations for the rest of the frames
palm_orientation_features = []
for frame in swipe_right_trial["raw"]:
    normal_vector = get_normal_vector(frame)
    result = classify_palm_rotation(normal_vector, np.array([0, 1, 0]))
    palm_orientation_features.append(result)

The palm orientation for the first frame is facing: down


The 'down' orientation is also the same as towards the camera. Similarly, the 'up' orientation is the same as away from the camera. The 'opposite' orientation is all the other orientations.

<img src="https://i.ibb.co/nBf02Jh/orientation-sphere-cropped.png" width="600">

#### 2.1.3 **Visualising** the palm orientation for each frame.

In [187]:
from matplotlib import pyplot as plt
from ipywidgets import interact, widgets
import cv2

from ultraleap_demo.hand_renderer import HandRenderer # This is a simple class to render the recorded hand gesture data frame by frame

# Load the hand data
hand_data = swipe_right_trial["hands"]
hands_colour = (255, 255, 255)

# Create the instances of the HandRenderer class for dots and skeleton formats
dots_renderer = HandRenderer(np.zeros((480, 640, 3), dtype=np.uint8), hands_format="Dots", hands_colour=hands_colour, circle_radius=3)
skeleton_renderer = HandRenderer(np.zeros((480, 640, 3), dtype=np.uint8), hands_format="Skeleton", hands_colour=hands_colour, circle_radius=3)

# Create a slider for frame navigation
frame_slider = widgets.IntSlider(min=0, max=len(hand_data)-1, step=1, value=0, layout=widgets.Layout(width='90%'), description='Frame')

def show_frames(frame_number):
    # Render the hand data for this frame
    dots_renderer.render_hand_data(hand_data[frame_number])
    skeleton_renderer.render_hand_data(hand_data[frame_number])

    # Convert the images from BGR to RGB
    dots_image_rgb = cv2.cvtColor(dots_renderer.output_image, cv2.COLOR_BGR2RGB)
    skeleton_image_rgb = cv2.cvtColor(skeleton_renderer.output_image, cv2.COLOR_BGR2RGB)

    # Create a subplot with 1 row and 2 columns
    plt.figure(figsize=(20, 6))
    plt.subplot(1, 2, 1)

    # Display the dots image
    plt.imshow(skeleton_image_rgb)
    plt.axis('off')  # Hide the axes
    plt.title("Skeleton")  # Set the title

    # Display the skeleton image
    plt.subplot(1, 2, 2)
    plt.imshow(dots_image_rgb)
    plt.axis('off')  # Hide the axes
    plt.title("Dots")  # Set the title

    plt.suptitle(f"Frame {frame_number} - Palm Orientation: {palm_orientation_features[frame_number]}", fontsize=16)

    plt.show()

# Display the slider and the frame
interact(show_frames, frame_number=frame_slider)

interactive(children=(IntSlider(value=0, description='Frame', layout=Layout(width='90%'), max=225), Output()),…

<function __main__.show_frames(frame_number)>

### 2.2 **Moving Direction**

#### 2.2.1 **Functions** to calculate the moving direction:

In [152]:
def moving_directions_sorted(vec, avg_distance, stationary_threshold_ratio=0.8, dimensions=2):

    if dimensions == 3:
        categories = {'up': 0, 'down': 0, 'towards_camera': 0, 'away_from_camera': 0, 'left': 0, 'right': 0, 'stationary': 0}
        

        stationary_threshold = avg_distance * stationary_threshold_ratio
        x, y, z = vec
        
        if x > 0:
            categories['right'] = x
        else:
            categories['left'] = x
        if y > 0:
            categories['up'] = y
        else:
            categories['down'] = y
        if z > 0:
            categories['towards_camera'] = z
        else:
            categories['away_from_camera'] = z
        if np.linalg.norm(vec) < stationary_threshold:
            categories['stationary'] = np.linalg.norm(vec)

        # Get the key, value with the highest magnitude
        
    else:
        categories = {'up': 0, 'down': 0, 'left': 0, 'right': 0, 'stationary': 0}
        stationary_threshold = avg_distance * stationary_threshold_ratio
        x, y, z = vec
        if x > 0:
            categories['right'] = x
        else:
            categories['left'] = x
        if y > 0:
            categories['up'] = y
        else:
            categories['down'] = y
        if np.linalg.norm(vec) < stationary_threshold:
            categories['stationary'] = np.linalg.norm(vec)
    return sorted(categories.items(), key=lambda x: abs(x[1]), reverse=True)



def get_directions(centroids, L=1):
    directions = [None] * (len(centroids) - 1)
    for i in range(0, len(centroids) - L, L):
        direction = centroids[i + L] - centroids[i]
        for j in range(i, i + L):
            directions[j] = direction
    # Fill the remaining slots with the last calculated direction
    for i in range(len(centroids) - L, len(centroids) - 1):
        directions[i] = direction
    return np.array(directions)

def get_most_common(top_n_frames):
    return Counter(top_n_frames).most_common(1)[0][0]

def get_sorted_direction(sorted_moving_directions, n=2):

    first_direction = sorted_moving_directions[0][0]
    moving_direction_name = first_direction[0]
    
    directions = [moving_direction_name]
    top_n_directions = []
    for i in range(len(sorted_moving_directions)):
        top_n_direction = sorted_moving_directions[i][:n]
        top_n_directions.append(top_n_direction)
        top_n_direction_names = [direction[0] for direction in top_n_direction]
        if moving_direction_name not in top_n_direction_names:
            moving_direction_name = top_n_direction[0][0]
        directions.append(moving_direction_name)
    return np.array(directions), top_n_directions

def get_nonzero_top_n_directions(frames, idxs=None, L = 1, n=5, stationary_threshold_ratio=1.6, dimensions=2, reverse=False, avg_distance=None):
    if idxs is None:
        idxs = [i for i in range(1, 22)]
    frames = frames[::-1] if reverse else frames
    frames = [frame[idxs] for frame in frames]
    centroids = np.array([np.mean(frame, axis=0) for frame in frames])
    direction_vecs = get_directions(centroids)
    distances = np.array([np.linalg.norm(direction) for direction in direction_vecs])
    avg_distance = np.mean(distances) if avg_distance is None else avg_distance
    sorted_moving_directions = [moving_directions_sorted(direction_vec, avg_distance, stationary_threshold_ratio=stationary_threshold_ratio, dimensions=dimensions) for direction_vec in direction_vecs]
    directions, top_n_directions = get_sorted_direction(sorted_moving_directions, n)
    nonzero_top_n_directions = [[d for d in direction if abs(d[1]) > 0] for direction in top_n_directions]
    # add stationary direction to the second element of the list
    return [[('stationary', 0)]] + nonzero_top_n_directions

def exponential_decay(d):
    return 2 ** (-d)

def calculate_weights(distances, formula=lambda d: 1 / (d + 1)):
    return [formula(d) for d in distances]

def sort_key(item):
    direction, slope = item
    if direction in ['left', 'down', 'away_from_camera']:
        return -slope  # For 'left' and 'down', a negative slope is considered larger
    else:
        return slope  # For other directions, a positive slope is considered larger
    
def get_motion_directions(frame_data, top_dirs=1, window_size=3, weight_formula=exponential_decay):

    output = []
    distances = [0 for _ in frame_data]
    distances_history = [dict(left=0, right=0, up=0, down=0, stationary=0) for _ in frame_data]
    weighted_distances_history = [dict(left=0, right=0, up=0, down=0, stationary=0) for _ in frame_data]
    
    for i in range(len(frame_data)):
        frame = frame_data[i]

        if not frame:
            if i == 0:
                output.append('stationary')
            else:
                if len(output) < (window_size//2 + 1):
                    prev_frames = frame_data[max(0, i-window_size//2):i if i > 0 else 1]
                else:
                    # use the distances_history to calculate the previous frames
                    prev_distances = distances_history[max(0, i-window_size//2):i].copy()
                    prev_distances = [{k: v for k, v in sorted(prev_distance.items(), key=sort_key, reverse=True)} for prev_distance in prev_distances]
                    prev_frames = [list(prev_distance.items()) for prev_distance in prev_distances]
                
                prev_frames = [f for f in prev_frames if f]
                        
                next_frames = frame_data[i+1:min(len(frame_data), i+window_size//2+1)]
                next_frames = [f for f in next_frames if f]
                
                prev_distances = range(len(prev_frames), 0, -1)
                next_distances = range(1, len(next_frames) + 1)
                
                prev_weights = calculate_weights(prev_distances, weight_formula)
                next_weights = calculate_weights(next_distances, weight_formula)

                dir_weights = defaultdict(int)
                for prev_frame, weight in zip(prev_frames, prev_weights):
                    for dir, dist in prev_frame[:top_dirs]:
                        dir_weights[dir] += weight * dist
                        distances_history[i][dir] += dist
                        weighted_distances_history[i][dir] += weight * dist
                    
                for next_frame, weight in zip(next_frames, next_weights):
                    for dir, dist in next_frame[:top_dirs]:
                        dir_weights[dir] += weight * dist
                        distances_history[i][dir] += dist
                        weighted_distances_history[i][dir] += weight * dist

                common_dir = max(dir_weights, key=lambda x: abs(dir_weights[x]))
                output.append(common_dir)
                distances.append(dir_weights[common_dir])
                for prev_frame in prev_frames:
                    for dir, dist in prev_frame:
                        if dir == common_dir:
                            distances[i] += dist
                for next_frame in next_frames:
                    for dir, dist in next_frame:
                        if dir == common_dir:
                            distances[i] += dist

        else:
            if frame[0][0] == 'stationary' and len(frame) == 1:
                if len(output) < (window_size//2 + 1):
                    prev_frames = frame_data[max(0, i-window_size//2):i if i > 0 else 1]
                else:
                    # use the distances_history to calculate the previous frames
                    prev_distances = distances_history[max(0, i-window_size//2):i].copy()
                    prev_distances = [{k: v for k, v in sorted(prev_distance.items(), key=sort_key, reverse=True)} for prev_distance in prev_distances]
                    prev_frames = [list(prev_distance.items()) for prev_distance in prev_distances]

                prev_frames = [f for f in prev_frames if f]
                next_frames = frame_data[i+1:min(len(frame_data), i+window_size//2+1)]
                next_frames = [f for f in next_frames if f]
                prev_distances = range(len(prev_frames), 0, -1)
                next_distances = range(1, len(next_frames) + 1)
                
                prev_weights = calculate_weights(prev_distances, weight_formula)
                next_weights = calculate_weights(next_distances, weight_formula)

                dir_weights = defaultdict(int)
                for prev_frame, weight in zip(prev_frames, prev_weights):
                    for dir, dist in prev_frame[:top_dirs]:
                        dir_weights[dir] += weight
                        distances_history[i][dir] += dist
                        weighted_distances_history[i][dir] += weight * dist

                for next_frame, weight in zip(next_frames, next_weights):
                    for dir, dist in next_frame[:top_dirs]:
                        dir_weights[dir] += weight
                        distances_history[i][dir] += dist
                        weighted_distances_history[i][dir] += weight * dist

                common_dir = max(dir_weights, key=lambda x: abs(dir_weights[x]))
                output.append(common_dir)
                for prev_frame in prev_frames:
                    for dir, dist in prev_frame:
                        if dir == common_dir:
                            distances[i] += dist
                for next_frame in next_frames:
                    for dir, dist in next_frame:
                        if dir == common_dir:
                            distances[i] += dist
            else:
                if i == 0 or not output[-1]:
                    output.append(frame[0][0])
                else:
                    if len(output) < (window_size//2 + 1):
                        prev_frames = frame_data[max(0, i-window_size//2):i if i > 0 else 1]
                    else:
                        # use the distances_history to calculate the previous frames
                        prev_distances = distances_history[max(0, i-window_size//2):i].copy()
                        # sort the distances_history by the absolute value of the distance
                        prev_distances = [{k: v for k, v in sorted(prev_distance.items(), key=sort_key, reverse=True)} for prev_distance in prev_distances]
                        prev_frames = [list(prev_distance.items()) for prev_distance in prev_distances]
                    prev_top_dir = output[-1]
                    next_frames = frame_data[i+1:min(len(frame_data), i+window_size//2+1)]
                    if prev_top_dir in [dir for dir, _ in frame[:top_dirs]]:
                        output.append(prev_top_dir)
                    elif len(next_frames) > 0 and prev_top_dir in [dir for dir, _ in next_frames[0][:top_dirs]]:
                        output.append(prev_top_dir)
                    else:
                        output.append(frame[0][0])

                    for prev_frame in prev_frames:
                        for dir, dist in prev_frame:
                            distances_history[i][dir] += dist
                            weighted_distances_history[i][dir] += dist
                            if dir == output[-1]:
                                distances[i] += dist
                            
                    for next_frame in next_frames:
                        for dir, dist in next_frame:
                            distances_history[i][dir] += dist
                            weighted_distances_history[i][dir] += dist
                            if dir == output[-1]:
                                distances[i] += dist

    # Sort each dictionary in each of the history lists by absolute value (descending)
    for i in range(len(distances_history)):
        distances_history[i] = {k: v for k, v in sorted(distances_history[i].items(), key=sort_key, reverse=True)}
        weighted_distances_history[i] = {k: v for k, v in sorted(weighted_distances_history[i].items(), key=sort_key, reverse=True)}
    return output, distances, distances_history, weighted_distances_history

def get_frames_data(frames):
    return np.array([frames[i] for i in range(len(frames))])

def get_previous_frames_data(n, frames):
    return get_frames_data(frames[-n:])

def get_frames_directions(frames, idxs = None, L = 1, n=5, stationary_threshold_ratio=1.6, avg_distance = None):
    return get_nonzero_top_n_directions(get_frames_data(frames), idxs = idxs, L = L, n = n, stationary_threshold_ratio = stationary_threshold_ratio, avg_distance = avg_distance)

def get_frames_motion(frames_directions = None, top_dirs = 2, window_size = WINDOW_SIZE, weight_formula = exponential_decay, idxs = None, L = 1, n=5, stationary_threshold_ratio=1.6, avg_distance = None):
    frames_directions = get_frames_directions(idxs = idxs, L = L, n = n, stationary_threshold_ratio = stationary_threshold_ratio, avg_distance = avg_distance) if frames_directions is None else frames_directions
    return get_motion_directions(frames_directions, top_dirs = top_dirs, window_size = window_size, weight_formula = weight_formula)

def get_previous_frames_motion(frames, n, top_dirs = 2, window_size = WINDOW_SIZE, weight_formula = exponential_decay, idxs = None, L = 1, stationary_threshold_ratio=1.6, avg_distance = None):
    return get_frames_motion(get_frames_directions(frames[-n:], idxs = idxs, L = L, stationary_threshold_ratio = stationary_threshold_ratio), top_dirs = top_dirs, window_size = window_size, weight_formula = weight_formula, avg_distance=avg_distance)[0]



#### 2.2.2 **Calculating** the moving direction for each frame:

In [7]:

top_dirs = 3
window_size = 3
stationary_threshold = 1.3 # 1.418
moving_direction_indexes = [6, 18, 10, 14, 0, 1, 3, 7, 19] # Indexes of the landmarks to use for calculating the moving directions
L = 1
direction_mapping_2d = {'up': 0, 'down': 1, 'left': 2, 'right': 3, 'stationary': 4}
frames = swipe_right_trial["raw"]
window_size_moving_directions = get_previous_frames_motion(frames=frames, n = len(frames), top_dirs = top_dirs, window_size = window_size, weight_formula = exponential_decay, idxs = moving_direction_indexes, L = L, stationary_threshold_ratio = stationary_threshold)
prev_state = "stationary"
prev_frame_change = 0

interval_dict = {}
for i, state in enumerate(window_size_moving_directions):
    if prev_state is not None:
        if state != prev_state:
            interval_dict[(prev_frame_change, i)] = [prev_state, state]
            print(f"Frames {prev_frame_change}:{i} -> {prev_state}")

            print(f"\t Hand motion transition from: {prev_state} -> {state} at frame {i}\n")

            prev_frame_change = i


    prev_state = state

last_interval = list(interval_dict.keys())[-1]
last_state = interval_dict[last_interval][1]
interval_dict[(last_interval[1], len(frames)-1)] = [last_state, last_state]

Frames 0:4 -> stationary
	 Hand motion transition from: stationary -> left at frame 4

Frames 4:26 -> left
	 Hand motion transition from: left -> stationary at frame 26

Frames 26:41 -> stationary
	 Hand motion transition from: stationary -> right at frame 41

Frames 41:85 -> right
	 Hand motion transition from: right -> stationary at frame 85

Frames 85:105 -> stationary
	 Hand motion transition from: stationary -> left at frame 105

Frames 105:158 -> left
	 Hand motion transition from: left -> stationary at frame 158

Frames 158:181 -> stationary
	 Hand motion transition from: stationary -> right at frame 181



#### 2.2.3 **Visualising** the moving directions over frames:
- As a sanity check we can redraw the hand landmarks over the frames to see if the discretised moving directions are accurate.

In [99]:
from matplotlib import pyplot as plt
from ipywidgets import interact, widgets
import cv2

from ultraleap_demo.hand_renderer import HandRenderer # This is a simple class to render the recorded hand gesture data frame by frame

# Load the hand data
hand_data = swipe_right_trial["hands"]
hands_colour = (255, 255, 255)

# Create the instances of the HandRenderer class for dots and skeleton formats
dots_renderer = HandRenderer(np.zeros((480, 640, 3), dtype=np.uint8), hands_format="Dots", hands_colour=hands_colour, circle_radius=3)
skeleton_renderer = HandRenderer(np.zeros((480, 640, 3), dtype=np.uint8), hands_format="Skeleton", hands_colour=hands_colour, circle_radius=3)

for i in range(len(list(interval_dict.keys()))):
    start, end = list(interval_dict.keys())[i]
    print(f"Interval {i+1}: {start}:{end} -> {interval_dict[(start, end)][0]}")
    frame_slider = widgets.IntSlider(min=start, max=end-1, step=1, value=start, layout=widgets.Layout(width='50%'), description='Frame')
    def show_frames(frame_number):
        # Render the hand data for this frame
        dots_renderer.render_hand_data(hand_data[frame_number])
        skeleton_renderer.render_hand_data(hand_data[frame_number])

        # Convert the images from BGR to RGB
        dots_image_rgb = cv2.cvtColor(dots_renderer.output_image, cv2.COLOR_BGR2RGB)
        skeleton_image_rgb = cv2.cvtColor(skeleton_renderer.output_image, cv2.COLOR_BGR2RGB)

        # Create a subplot with 1 row and 2 columns
        plt.figure(figsize=(10, 5))
        plt.subplot(1, 2, 1)

        # Display the dots image
        plt.imshow(skeleton_image_rgb)
        plt.axis('off')  # Hide the axes
        plt.title("Skeleton")  # Set the title

        # Display the skeleton image
        plt.subplot(1, 2, 2)
        plt.imshow(dots_image_rgb)
        plt.axis('off')  # Hide the axes
        plt.title("Dots")  # Set the title
        plt.suptitle(f"Frame {frame_number} - Motion: {window_size_moving_directions[frame_number]}", fontsize=16)
        # decrease vertical space between subplots and suptitle
        plt.subplots_adjust(top=1.1)

        plt.show()
    # Display the slider and the frame
    interact(show_frames, frame_number=frame_slider)

Interval 1: 0:4 -> stationary


interactive(children=(IntSlider(value=0, description='Frame', layout=Layout(width='50%'), max=3), Output()), _…

Interval 2: 4:26 -> left


interactive(children=(IntSlider(value=4, description='Frame', layout=Layout(width='50%'), max=25, min=4), Outp…

Interval 3: 26:41 -> stationary


interactive(children=(IntSlider(value=26, description='Frame', layout=Layout(width='50%'), max=40, min=26), Ou…

Interval 4: 41:85 -> right


interactive(children=(IntSlider(value=41, description='Frame', layout=Layout(width='50%'), max=84, min=41), Ou…

Interval 5: 85:105 -> stationary


interactive(children=(IntSlider(value=85, description='Frame', layout=Layout(width='50%'), max=104, min=85), O…

Interval 6: 105:158 -> left


interactive(children=(IntSlider(value=105, description='Frame', layout=Layout(width='50%'), max=157, min=105),…

Interval 7: 158:181 -> stationary


interactive(children=(IntSlider(value=158, description='Frame', layout=Layout(width='50%'), max=180, min=158),…

Interval 8: 181:225 -> right


interactive(children=(IntSlider(value=181, description='Frame', layout=Layout(width='50%'), max=224, min=181),…

### 2.3 **Hand Pose**

#### 2.3.1 **Functions** to calculate the hand pose similarity using hand joint bending angles:

In [None]:
import numpy as np

from ultraleap_demo.classes import HandPose


def get_bending_joint_idxs():
    return [
        (0,2,4),(2,4,5), # thumb
        (0,6,8),(6,8,9), # index
        (0,10,12),(10,12,13), # middle
        (0,14,16),(14,16,17), # ring
        (0,18,20),(18,20,21) # little
    ]

def get_bending_angles(jointA, jointB, jointC):
    # Calculate vectors
    AB = np.subtract(jointA, jointB)
    BC = np.subtract(jointC, jointB)

    # Calculate dot product and norms
    dot_product = np.dot(AB, BC)
    norm_ab = np.linalg.norm(AB)
    norm_bc = np.linalg.norm(BC)

    # Calculate angle in radians and make sure no division by zero
    angle_rad = np.arccos(dot_product / (norm_ab * norm_bc + 1e-6))

    # Convert to degrees
    angle_deg = np.degrees(angle_rad)

    return angle_deg

def joint_bending_angles(coordinates):
    bending_joint_idxs = get_bending_joint_idxs()
    bending_angles = []
    for idxs in bending_joint_idxs:
        jointA = np.array(coordinates[idxs[0]])
        jointB = np.array(coordinates[idxs[1]])
        jointC = np.array(coordinates[idxs[2]])

        # Calculate the bending angle
        angle = get_bending_angles(jointA, jointB, jointC)
        bending_angles.append(angle)
    return np.array(bending_angles)

def evaluate_similarity_score(score, threshold = 0.5):
    if score < threshold:
        return 1
    else:
        return 0

#### 2.3.2 **Calculating** the hand pose similarity for each frame:

In [154]:
similarity_lookback = 5
similarity_threshold = 0.9

frames = swipe_right_trial["raw"]

handpose_model_dir = os.path.join(os.path.join(os.getcwd(), 'ultraleap_demo'), 'handpose_models')
handpose_model_paths = {os.path.join(handpose_model_dir, p).split("\\")[-1].split(".")[0].split("_")[0]:os.path.join(handpose_model_dir, p) for p in os.listdir(handpose_model_dir)}

handpose = HandPose(handpose_model_paths["filtereddhg"])
similarities = []
evaluated_scores = []
for i in range(len(frames) - similarity_lookback):
    lookback_frame = frames[i]
    current_frame = frames[i + similarity_lookback]

    lookback_frame_bending_angles = joint_bending_angles(lookback_frame)
    current_frame_bending_angles = joint_bending_angles(current_frame)

    similarity_score = handpose.get_similarity(current_frame_bending_angles, lookback_frame_bending_angles)
    evaluated_score = evaluate_similarity_score(similarity_score, threshold = similarity_threshold)

    similarities.append(similarity_score)
    evaluated_scores.append(evaluated_score)

#### 2.3.3 **Visualising** the hand pose similarity over frames:

In [150]:
from matplotlib import pyplot as plt
from ipywidgets import interact, widgets
import cv2

from ultraleap_demo.hand_renderer import HandRenderer # This is a simple class to render the recorded hand gesture data frame by frame

# Load the hand data
hand_data = swipe_right_trial["hands"]
hands_colour = (255, 255, 255)

# Create the instances of the HandRenderer class for dots and skeleton formats
skeleton_renderer_lookback = HandRenderer(np.zeros((480, 640, 3), dtype=np.uint8), hands_format="Skeleton", hands_colour=hands_colour, circle_radius=3)
skeleton_renderer_current = HandRenderer(np.zeros((480, 640, 3), dtype=np.uint8), hands_format="Skeleton", hands_colour=hands_colour, circle_radius=3)

# Create a slider for frame navigation
frame_slider = widgets.IntSlider(min=0, max=len(hand_data)-1-similarity_lookback, step=1, value=0, layout=widgets.Layout(width='90%'), description='Frame')

def show_frames(frame_number):
    # Render the hand data for this frame
    skeleton_renderer_lookback.render_hand_data(hand_data[frame_number])
    skeleton_renderer_current.render_hand_data(hand_data[frame_number + similarity_lookback])

    # Convert the images from BGR to RGB
    current_image_rgb = cv2.cvtColor(skeleton_renderer_lookback.output_image, cv2.COLOR_BGR2RGB)
    lookback_image_rgb = cv2.cvtColor(skeleton_renderer_current.output_image, cv2.COLOR_BGR2RGB)

    # Create a subplot with 1 row and 2 columns
    plt.figure(figsize=(20, 6))
    plt.subplot(1, 2, 1)

    # Display the dots image
    plt.imshow(current_image_rgb)
    plt.axis('off')  # Hide the axes
    plt.title(f"Current - Frame {frame_number}")  # Set the title

    # Display the skeleton image
    plt.subplot(1, 2, 2)
    plt.imshow(lookback_image_rgb)
    plt.axis('off')  # Hide the axes
    plt.title(f"Lookback - Frame {frame_number + similarity_lookback}")  # Set the title

    plt.suptitle(f"Hand Pose Similarity: {similarities[frame_number]:.4f} -> {'Similar' if evaluated_scores[frame_number] == 0 else 'Dissimilar'}", fontsize=16)

    plt.show()

# Display the slider and the frame
interact(show_frames, frame_number=frame_slider)

interactive(children=(IntSlider(value=0, description='Frame', layout=Layout(width='90%'), max=195), Output()),…

<function __main__.show_frames(frame_number)>

## 3. **Classifying dynamic hand gestures using the sequences of discrete features (optional)**

#### Ensure all features are same length as similarity scores (due to lookback window)

In [221]:
print(f"Length of moving directions: {len(window_size_moving_directions)}")
print(f"Length of palm orientations: {len(palm_orientation_features)}")
print(f"Length of similarity scores: {len(evaluated_scores)}\n")

moving_directions = window_size_moving_directions[similarity_lookback:]
palm_orientations = palm_orientation_features[similarity_lookback:]
evaluated_scores = evaluated_scores

print(f"Length of moving directions: {len(moving_directions)}")
print(f"Length of palm orientations: {len(palm_orientations)}")
print(f"Length of similarity scores: {len(evaluated_scores)}")

Length of moving directions: 226
Length of palm orientations: 226
Length of similarity scores: 221

Length of moving directions: 221
Length of palm orientations: 221
Length of similarity scores: 221


#### Mapping the feature labels to integer labels for classification:

In [244]:
orientation_mapping = {'up': 0, 'down': 1, 'opposite': 2}
inverted_orientation_mapping = {v: k for k, v in orientation_mapping.items()}
direction_mapping_2d = {'up': 0, 'down': 1, 'left': 2, 'right': 3, 'stationary': 4}
inverted_direction_mapping_2d = {v: k for k, v in direction_mapping_2d.items()}

print(f"First 5 moving directions {moving_directions[:5]}")
print(f"First 5 palm orientations {palm_orientations[:5]}")
print(f"First 5 hand pose scores {evaluated_scores[:5]}\n")

int_moving_directions = [direction_mapping_2d[direction] for direction in moving_directions]
int_palm_orientations = [orientation_mapping[orientation] for orientation in palm_orientations]
int_hand_pose = evaluated_scores

print(f"First 5 moving directions {int_moving_directions[:5]}")
print(f"First 5 palm orientations {int_palm_orientations[:5]}")
print(f"First 5 hand pose scores {int_hand_pose[:5]}")

First 5 moving directions ['left', 'left', 'left', 'left', 'left']
First 5 palm orientations ['down', 'down', 'down', 'down', 'down']
First 5 hand pose scores [0, 0, 0, 0, 0]

First 5 moving directions [2, 2, 2, 2, 2]
First 5 palm orientations [1, 1, 1, 1, 1]
First 5 hand pose scores [0, 0, 0, 0, 0]


#### Remapping the feature labels to same labels that were used for training the Transformer model:

In [245]:
state_mapping_dir = os.path.join('ultraleap_demo','state_mapping')

# load the state mapping for each of the features
moving_direction_state_mapping = pickle.load(open(os.path.join(state_mapping_dir, 'moving_direction_mapping.pkl'), 'rb'))
inverted_moving_direction_state_mapping = {v: k for k, v in moving_direction_state_mapping.items()}
palm_orientation_state_mapping = pickle.load(open(os.path.join(state_mapping_dir, 'palm_orientation_mapping.pkl'), 'rb'))
inverted_palm_orientation_state_mapping = {v: k for k, v in palm_orientation_state_mapping.items()}

mapped_moving_directions = np.array([moving_direction_state_mapping[str(direction)] for direction in int_moving_directions])
mapped_palm_orientations = np.array([palm_orientation_state_mapping[str(orientation)] for orientation in int_palm_orientations])
mapped_hand_pose = np.array(int_hand_pose)

#### **Note:** Follow this process (creating feature vector for each frame) for many different hand gesture examples in order to create the lookup table for classification.
- Below is an example of how to load the lookup table and prepare it to be used for classification.

In [417]:
from ultraleap_demo.load_demo import *
from data.dhg import DHG

test_size = 0.1 # The size of the test set (Smaller pct means slower classification but more accuracy. Larger pct means faster classification but less accuracy)
target_length = 64 # The target length to normalize the sequences to

data_dir = os.path.join(os.getcwd(), "data")
dhg_data = DHG(data_dir)

performer_states_dict = get_performer_states_dict(dhg_data, smoother=True)
normalized_performer_states_dict = normalize_performer_state(performer_states_dict, by_subject=True)
gestures = [f"gesture_{i}" for i in range(7, 11)]
train_lookup_table, test_lookup_table = make_train_test_lookup_table(normalized_performer_states_dict, test_size=test_size, gestures=gestures)

normalized_train_lookup_table = normalize_lookup_table(train_lookup_table, target_length)
prepared_train_lookup_table = lookup_table_tensor(normalized_train_lookup_table)

Loading pickle files...
Done loading pickle files


#### Function to make Transformer time series prediction, combine the prediction to the original sequence and then classify this new sequence:

In [418]:
from ultraleap_demo.load_demo import model_dict

def classify_gesture_sequence(mapped_moving_directions, mapped_palm_orientations, mapped_hand_poses, sequence_length, output_window, lookup_table, target_length, threshold=0.05, device='cpu'):
    gesture_mapped_names = {
        "gesture_7":"Swipe Right",
        "gesture_8":"Swipe Left",
        "gesture_9":"Swipe Up",
        "gesture_10":"Swipe Down",
    }

    combined = np.squeeze(combine_mapped_separated_sequences([mapped_moving_directions], [mapped_palm_orientations], [mapped_hand_poses]), axis=1)[0].T
    to_predict_sequence = make_predict_frame_sequence(list(combined), sequence_length, output_window)
    model_dict[sequence_length][output_window] = model_dict[sequence_length][output_window].to(device)
    model = model_dict[sequence_length][output_window]
    model.eval()
    to_predict_sequence = to_predict_sequence.to(device)
    
    predicted_states = combine_predicted_features(make_prediction(model, to_predict_sequence, output_window))
    performed_states = to_predict_sequence.tolist()[0][:-output_window]
    classify_sequence = performed_states + predicted_states

    gesture, _, scores = classify_gesture(classify_sequence=classify_sequence, lookup_table=lookup_table, target_length=target_length)

    gesture_scores = [l[1] for l in list(scores.values())]
    # Calculate the sum of all scores
    total_score = sum(gesture_scores)

    # Normalize the scores
    normalized_scores = [score / total_score for score in gesture_scores]

    # Sort the normalized scores in descending order
    sorted_scores = sorted(normalized_scores, reverse=True)

    # Calculate the difference between the best score and the second best score
    score_difference = sorted_scores[0] - sorted_scores[1]

    # Check if the score difference is below the threshold
    if score_difference < threshold:
        classified_gesture = "Unknown"
    else:
        classified_gesture = gesture_mapped_names[gesture]

    return classified_gesture, score_difference

#### Classifying the hand gesture sequences combined with the Transformer time series predictions using the lookup table:

In [432]:
from matplotlib import pyplot as plt
from ipywidgets import interact, widgets
import cv2

from ultraleap_demo.hand_renderer import HandRenderer # This is a simple class to render the recorded hand gesture data frame by frame

start_idx = 0
end_idx = len(mapped_moving_directions)

confidence_threshold = 0.05
sequence_length = 32
target_length = 64
output_window = 1
device = 'cpu'
normalize = True

intervals = []
classified_gestures = []
confidences = []
for i in range(1, end_idx):
    if i - start_idx >= target_length:
        start_idx += 1 
    mapped_moving_direction_sequence = mapped_moving_directions[start_idx:i]
    mapped_palm_orientation_sequence = mapped_palm_orientations[start_idx:i]
    mapped_hand_pose_sequence = mapped_hand_pose[start_idx:i]

    if normalize:
        mapped_moving_direction_sequence = normalize_sequence(mapped_moving_direction_sequence, target_length)
        mapped_palm_orientation_sequence = normalize_sequence(mapped_palm_orientation_sequence, target_length)
        mapped_hand_pose_sequence = normalize_sequence(mapped_hand_pose_sequence, target_length)

    classified_gesture,confidence = classify_gesture_sequence(mapped_moving_direction_sequence, mapped_palm_orientation_sequence, mapped_hand_pose_sequence, sequence_length, output_window, prepared_train_lookup_table, target_length, threshold=confidence_threshold, device=device)

    classified_gestures.append(classified_gesture)
    confidences.append(confidence)

#### **Visualising** the classification results:
- Confidence increases as the model sees more frames of the hand gesture.

In [440]:
from matplotlib import pyplot as plt
from ipywidgets import interact, widgets
import cv2

from ultraleap_demo.hand_renderer import HandRenderer # This is a simple class to render the recorded hand gesture data frame by frame

# Load the hand data
hand_data = swipe_right_trial["hands"]
hands_colour = (255, 255, 255)

# Create the instances of the HandRenderer class for dots and skeleton formats
dots_renderer = HandRenderer(np.zeros((480, 640, 3), dtype=np.uint8), hands_format="Dots", hands_colour=hands_colour, circle_radius=3)
skeleton_renderer = HandRenderer(np.zeros((480, 640, 3), dtype=np.uint8), hands_format="Skeleton", hands_colour=hands_colour, circle_radius=3)

frame_slider = widgets.IntSlider(min=0, max=end_idx-1-output_window, step=1, value=0, layout=widgets.Layout(width='90%'), description='Frame')
def show_frames(frame_number):
    # Render the hand data for this frame
    dots_renderer.render_hand_data(hand_data[frame_number])
    skeleton_renderer.render_hand_data(hand_data[frame_number])

    # Convert the images from BGR to RGB
    dots_image_rgb = cv2.cvtColor(dots_renderer.output_image, cv2.COLOR_BGR2RGB)
    skeleton_image_rgb = cv2.cvtColor(skeleton_renderer.output_image, cv2.COLOR_BGR2RGB)

    # Create a subplot with 1 row and 2 columns
    plt.figure(figsize=(20, 6))
    plt.subplot(1, 2, 1)

    # Display the dots image
    plt.imshow(skeleton_image_rgb)
    plt.axis('off')  # Hide the axes
    plt.title("Skeleton")  # Set the title

    # Display the skeleton image
    plt.subplot(1, 2, 2)
    plt.imshow(dots_image_rgb)
    plt.axis('off')  # Hide the axes
    plt.title("Dots")  # Set the title
    plt.suptitle(f"Frame {frame_number} - Classified Gesture: {classified_gestures[frame_number]} with a confidence of {confidences[frame_number]:.4f}", fontsize=16)
    # decrease vertical space between subplots and suptitle
    plt.subplots_adjust(top=0.9)

    plt.show()

# Display the slider and the frame
interact(show_frames, frame_number=frame_slider)

interactive(children=(IntSlider(value=0, description='Frame', layout=Layout(width='90%'), max=219), Output()),…

<function __main__.show_frames(frame_number)>