# Pose-based Approaches

In [1]:
import os
import glob
import numpy as np

data_path = "landmarks_mapped/landmarks_mapped"  

# List all .npy files
files = glob.glob(os.path.join(data_path, "*.npy"))

print("Total files found:", len(files))

data = []
labels =[]  # Using a set to avoid repeated labels

for file in files:
    # Load npy file
    arr = np.load(file, allow_pickle=True)
    data.append(arr)

    filename = os.path.basename(file)      
    label = filename.split("_")[0]           
    label = label.split(".")[0]              
    labels.append(label)



print("Loaded samples:", len(data))
print("labels:", len(labels))
print("Number of labels:", len(labels))
print("First sample shape:", data[0].shape)



Total files found: 11980
Loaded samples: 11980
labels: 11980
Number of labels: 11980
First sample shape: (74, 438)


## Preprocessing Stage 

In [2]:


pose_videos = []
face_videos = []
lh_videos = []
rh_videos = []

for video in data:
    # Split along the last dimension (feature_dim)
    pose_videos.append(video[:, 0:132])
    face_videos.append(video[:, 132:312])
    lh_videos.append(video[:, 312:375])
    rh_videos.append(video[:, 375:438])

# Check shapes of the first video
print("First video shapes:")
print("Pose:", pose_videos[0].shape)
print("Face:", face_videos[0].shape)
print("Left hand:", lh_videos[0].shape)
print("Right hand:", rh_videos[0].shape)




First video shapes:
Pose: (74, 132)
Face: (74, 180)
Left hand: (74, 63)
Right hand: (74, 63)


### Hand reconstruction

In [None]:

def initialize_first_last_flat(hand_frames):
    """
    hand_frames: (T, F) where F = 63 flattened features
    Missing hand = all zeros
    """

    T = len(hand_frames)

    # find indices with valid (non-zero) hand detections
    detected = [i for i in range(T) if not np.all(hand_frames[i] == 0)]

    if len(detected) == 0:
        return hand_frames  # nothing to reconstruct

    # average detected hand frames
    avg_hand = np.mean([hand_frames[i] for i in detected], axis=0)

    # first frame
    if np.all(hand_frames[0] == 0):
        hand_frames[0] = avg_hand

    # last frame
    if np.all(hand_frames[-1] == 0):
        hand_frames[-1] = avg_hand

    return hand_frames


def reconstruct_hands_flat(hand_frames):
    """
    Bilinear interpolation for flattened hand features.
    hand_frames: (T, F) where F = 63
    """

    hand_frames = initialize_first_last_flat(hand_frames)
    T = len(hand_frames)

    for k in range(T):
        if not np.all(hand_frames[k] == 0):
            continue 

        
        a = 1
        while k - a >= 0 and np.all(hand_frames[k - a] == 0):
            a += 1

        # find next non-zero frame
        b = 1
        while k + b < T and np.all(hand_frames[k + b] == 0):
            b += 1

        
        if k - a < 0 or k + b >= T:
            continue

        prev_frame = hand_frames[k - a]
        next_frame = hand_frames[k + b]

        
        hand_frames[k] = (b * prev_frame + a * next_frame) / (a + b)

    return hand_frames


In [None]:
lh_reconstructed=[]
for lh_video in lh_videos:
    lh_reconstructed.append((lh_video))

In [5]:
rh_reconstructed=[]
for rh_video in rh_videos:
    rh_reconstructed.append(reconstruct_hands_flat(rh_video))

In [59]:

# --------------------------------------------------
# 1) Small random noise (jitter)
# --------------------------------------------------
def augment_jitter(seq, sigma=0.01):
    noise = np.random.normal(0, sigma, seq.shape)
    return seq + noise

# --------------------------------------------------
# 2) Random scaling
# --------------------------------------------------
def augment_scaling(seq, scale_range=(0.95, 1.05)):
    scale = np.random.uniform(*scale_range)
    return seq * scale

# --------------------------------------------------
# 3) Small random rotation
# Only rotates (x,y) pairs, not the entire 438-vector blindly.
# --------------------------------------------------
def augment_rotation(seq, angle_range=(-5, 5)):
    angle = np.radians(np.random.uniform(*angle_range))
    cos_a, sin_a = np.cos(angle), np.sin(angle)

    seq_rot = seq.copy()
    reshaped = seq.reshape(seq.shape[0], -1, 2)

    for t in range(len(reshaped)):
        x = reshaped[t][:, 0]
        y = reshaped[t][:, 1]
        reshaped[t][:, 0] = x * cos_a - y * sin_a
        reshaped[t][:, 1] = x * sin_a + y * cos_a

    return reshaped.reshape(seq.shape)

# --------------------------------------------------
# 4) Time warping by interpolation
# --------------------------------------------------
def augment_time_warp(seq, speed_range=(0.9, 1.1)):
    T = seq.shape[0]
    speed = np.random.uniform(*speed_range)

    # New number of frames
    new_T = int(T * speed)
    new_T = max(5, new_T)

    indices = np.linspace(0, T - 1, new_T)
    warped = np.zeros((new_T, seq.shape[1]))

    for i, idx in enumerate(indices):
        warped[i] = seq[int(idx)]
        
    # Resize back to original length
    indices_fixed = np.linspace(0, new_T - 1, T)
    fixed = np.zeros((T, seq.shape[1]))
    for i, idx in enumerate(indices_fixed):
        fixed[i] = warped[int(idx)]

    return fixed

# --------------------------------------------------
# 5) Random frame drop (mild)
# --------------------------------------------------
def augment_frame_drop(seq, drop_rate=0.05):
    seq_aug = seq.copy()
    T = seq.shape[0]

    num_drop = int(T * drop_rate)
    drop_idx = np.random.choice(T, num_drop, replace=False)

    for idx in drop_idx:
        seq_aug[idx] = seq_aug[idx - 1] if idx > 0 else seq_aug[idx]

    return seq_aug


# --------------------------------------------------
# MASTER FUNCTION: Randomly apply augmentations
# --------------------------------------------------
def augment_sequence(seq):
    if np.random.rand() < 0.5:
        seq = augment_jitter(seq)

    if np.random.rand() < 0.3:
        seq = augment_scaling(seq)

    if np.random.rand() < 0.3:
        seq = augment_rotation(seq)

    if np.random.rand() < 0.4:
        seq = augment_time_warp(seq)

    if np.random.rand() < 0.3:
        seq = augment_frame_drop(seq)

    return seq


In [None]:
# def pad_videos_list(videos, max_frames):
#     """
#     Pad or truncate a list of videos to the same number of frames.

#     videos: list of np.arrays, each (num_frames, num_points, dims) or (num_frames, feature_dim)
#     max_frames: int, target number of frames
#     # Returns: np.array of shape (num_videos, max_frames, ...)
#     # """
    # padded_videos = []

    # for video in videos:
    #     video = np.asarray(video, dtype=np.float32)
    #     num_frames = video.shape[0]
    #     pad_amount = max_frames - num_frames

    #     if pad_amount > 0:
    #         # Create zeros to pad
    #         if video.ndim == 3:  # (num_frames, num_points, dims)
    #             pad_shape = (pad_amount, video.shape[1], video.shape[2])
    #         elif video.ndim == 2:  # (num_frames, feature_dim)
    #             pad_shape = (pad_amount, video.shape[1])
    #         else:
    #             raise ValueError(f"Unsupported video shape: {video.shape}")

    #         video_padded = np.concatenate([video, np.zeros(pad_shape, dtype=np.float32)], axis=0)
    #     else:
    #         # Truncate if longer than max_frames
    #         video_padded = video[:max_frames]

    #     padded_videos.append(video_padded)

    # # Stack into a single NumPy array
    # return np.stack(padded_videos, axis=0)  # shape: (num_videos, max_frames, ...)



In [None]:
# pose_input = pad_videos_list(pose_norm, 100)
# lh_input   = pad_videos_list(lh_norm, 100)
# rh_input   = pad_videos_list(rh_norm, 100)
# face_input = pad_videos_list(face_norm, 100)

In [25]:
preprocessed_data = []

for i in range(11980):
    # concatenate features for video i
    video_features = np.concatenate([
        pose_videos[i], 
        face_videos[i],
        lh_reconstructed[i],
        rh_reconstructed[i],
        
    ], axis=1)   # combine feature columns

    preprocessed_data.append(video_features)

print("Number of videos:", len(preprocessed_data))
print("Shape of first video:", preprocessed_data[0].shape)


Number of videos: 11980
Shape of first video: (74, 438)


In [60]:
augmented_list = []

for video in preprocessed_data:
    aug_video = augment_sequence(video)
    augmented_list.append(aug_video)

print("Number of augmented videos:", len(augmented_list))
print("Example video shape:", augmented_list[0].shape)


Number of augmented videos: 11980
Example video shape: (74, 438)
