### Notes for Modification

- Wake - Hands overlapping with face.
- Goose - Multiple variations of signs to represent the same thing.
- Nap - Hands overlapping with face/ Eyes close
- Give - Hands are closed.
- After - Hands overlapping
- Mouth - Hand closed, hand over mouth.

Commons Things:
- Closed hands
- Hands over face
- Overlapping landmarks

Solutions:
- Include eye landmarks.
- Backfill missing data with previous landmark data.
- Preserve relative distances between landmarks.

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_addons as tfa
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sn

from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split, GroupShuffleSplit 

from layers.PreprocessLayer import PreprocessLayer
from utils.Utils import print_shape_dtype, pd_read_s3_parquet, upload_file 

import glob
import sys
import os
import math
import gc
import sys
import sklearn
import scipy
import boto3
import io
import wandb
import json

2023-04-08 22:40:44.408766: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
 The versions of TensorFlow you are currently using is 2.12.0 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


In [2]:
tf.get_logger().setLevel('INFO')

In [3]:
with open("./config.json") as fp:
    config = json.load(fp)

In [4]:
s3_client = boto3.client(
    "s3"
)

In [5]:
AWS_S3_BUCKET = "w251-asl-data"
TRAIN_CSV_FILE = "raw-data/train.csv"

In [6]:
train_file = s3_client.get_object(Bucket=AWS_S3_BUCKET, Key=TRAIN_CSV_FILE)

In [7]:
train = pd.read_csv(train_file.get("Body"))

N_SAMPLES = len(train)
print(f'N_SAMPLES: {N_SAMPLES}')

N_SAMPLES: 94477


In [8]:
# Get complete file path to file
def get_file_path(path):
    return f'{AWS_S3_BUCKET}/raw-data/{path}'

train['file_path'] = train['path'].apply(get_file_path)

In [9]:
# Add ordinally Encoded Sign (assign number to each sign name)
train['sign_ord'] = train['sign'].astype('category').cat.codes

# Dictionaries to translate sign <-> ordinal encoded sign
SIGN2ORD = train[['sign', 'sign_ord']].set_index('sign').squeeze().to_dict()
ORD2SIGN = train[['sign_ord', 'sign']].set_index('sign_ord').squeeze().to_dict()

In [None]:
display(train.head(30))
display(train.info())

In [10]:
# Source: https://www.kaggle.com/competitions/asl-signs/overview/evaluation
ROWS_PER_FRAME = 543  # number of landmarks per frame
#w251-asl-data/raw-data/train_landmark_files/28656/3311214787.parquet

def load_relevant_data_subset(pq_path):
    data_columns = ['x', 'y']
    data = pd_read_s3_parquet(pq_path[14:], AWS_S3_BUCKET, columns=data_columns)
    n_frames = int(len(data) / ROWS_PER_FRAME)
    data = data.values.reshape(n_frames, ROWS_PER_FRAME, len(data_columns))
    return data.astype(np.float32)

In [14]:
"""
    face: 0:468
    left_hand: 468:489
    pose: 489:522
    right_hand: 522:544
        
"""

# USE_TYPES = ['left_hand', 'pose', 'right_hand']
# START_IDX = 468
# LIPS_IDXS0 = np.array([
#         61, 185, 40, 39, 37, 0, 267, 269, 270, 409,
#         291, 146, 91, 181, 84, 17, 314, 405, 321, 375,
#         78, 191, 80, 81, 82, 13, 312, 311, 310, 415,
#         95, 88, 178, 87, 14, 317, 402, 318, 324, 308,
#     ])
# # Landmark indices in original data
# LEFT_HAND_IDXS0 = np.arange(468,489)
# RIGHT_HAND_IDXS0 = np.arange(522,543)
# POSE_IDXS0 = np.arange(502, 512)
# LANDMARK_IDXS0 = np.concatenate((LIPS_IDXS0, LEFT_HAND_IDXS0, RIGHT_HAND_IDXS0, POSE_IDXS0))

LIPS_IDXS0 = [0, 11, 12, 13, 14, 15, 17, 37, 38, 39, 40, 41, 42, 61, 62, 72, 73, 
        74, 76, 77, 78, 80, 81, 82, 84, 86, 87, 88, 89, 90, 91, 95, 96, 146, 
        178, 179, 180, 181, 183, 184, 185, 191, 267, 268, 269, 270, 271, 272, 
        291, 292, 302, 303, 304, 306, 307, 308, 310, 311, 312, 314, 316, 317, 
        318, 319, 320, 321, 324, 325, 375, 402, 403, 404, 405, 407, 408, 409, 415]

EYES_IDXS0 = [  6,   7,  22,  23,  24,  25,  26,  30,  31,  33,  56, 110, 112,
       113, 122, 128, 130, 133, 144, 145, 153, 154, 155, 157, 158, 159,
       160, 161, 163, 168, 173, 188, 189, 190, 193, 196, 197, 232, 233,
       243, 244, 245, 246, 247, 249, 252, 253, 254, 255, 256, 259, 260,
       263, 286, 339, 341, 351, 357, 359, 362, 373, 374, 380, 381, 382,
       384, 385, 386, 387, 388, 390, 398, 412, 413, 414, 417, 419, 453,
       463, 464, 465, 466, 467]

POSE_IDXS0 = np.arange(489, 514)
LEFT_HAND_IDXS0 = np.arange(468,489)
RIGHT_HAND_IDXS0 = np.arange(522,543)

LANDMARK_IDXS0 = np.concatenate((LIPS_IDXS0, LEFT_HAND_IDXS0, RIGHT_HAND_IDXS0, EYES_IDXS0, POSE_IDXS0))
N_COLS = LANDMARK_IDXS0.size

HAND_IDXS0 = np.concatenate((LEFT_HAND_IDXS0, RIGHT_HAND_IDXS0), axis=0)

# Landmark indices in processed data
# LIPS_IDXS = np.argwhere(np.isin(LANDMARK_IDXS0, LIPS_IDXS0)).squeeze()
# LEFT_HAND_IDXS = np.argwhere(np.isin(LANDMARK_IDXS0, LEFT_HAND_IDXS0)).squeeze()
# RIGHT_HAND_IDXS = np.argwhere(np.isin(LANDMARK_IDXS0, RIGHT_HAND_IDXS0)).squeeze()
# HAND_IDXS = np.argwhere(np.isin(LANDMARK_IDXS0, HAND_IDXS0)).squeeze()
# POSE_IDXS = np.argwhere(np.isin(LANDMARK_IDXS0, POSE_IDXS0)).squeeze()

print(f'# HAND_IDXS: {len(HAND_IDXS)}, N_COLS: {N_COLS}')

# HAND_IDXS: 42, N_COLS: 227


In [15]:
LIPS_START = 0
LEFT_HAND_START = LIPS_IDXS.size
RIGHT_HAND_START = LEFT_HAND_START + LEFT_HAND_IDXS.size
POSE_START = RIGHT_HAND_START + RIGHT_HAND_IDXS.size

print(f'LIPS_START: {LIPS_START}, LEFT_HAND_START: {LEFT_HAND_START}, RIGHT_HAND_START: {RIGHT_HAND_START}, POSE_START: {POSE_START}')

LIPS_START: 0, LEFT_HAND_START: 77, RIGHT_HAND_START: 98, POSE_START: 119


In [None]:
preprocess_layer = PreprocessLayer(config["N_ROWS"], config["N_DIMS"], HAND_IDXS0, LANDMARK_IDXS0, config["INPUT_SIZE"])

In [16]:
"""
    face: 0:468
    left_hand: 468:489
    pose: 489:522
    right_hand: 522:544
        
"""
def get_data(file_path):
    # Load Raw Data
    data = load_relevant_data_subset(file_path)
    # Process Data Using Tensorflow
    data = preprocess_layer(data)
    
    return data

In [None]:
version = config["DATA_VERSION"]

# Get the full dataset
def get_x_y():
    # Create arrays to save data
    X = np.zeros([N_SAMPLES, INPUT_SIZE, N_COLS, N_DIMS], dtype=np.float32)
    y = np.zeros([N_SAMPLES], dtype=np.int32)
    NON_EMPTY_FRAME_IDXS = np.full([N_SAMPLES, INPUT_SIZE], -1, dtype=np.float32)

    for row_idx, (file_path, sign_ord) in enumerate(tqdm(train[['file_path', 'sign_ord']].values)):
        if row_idx % 5000 == 0:
            print(f'Generated {row_idx}/{N_SAMPLES}')

        data, non_empty_frame_idxs = get_data(file_path)
        X[row_idx] = data
        y[row_idx] = sign_ord
        NON_EMPTY_FRAME_IDXS[row_idx] = non_empty_frame_idxs
        if np.isnan(data).sum() > 0:
            print(row_idx)
            return data
    
    # Save X/y
    np.save('X.npy', X)
    np.save('y.npy', y)
    np.save('NON_EMPTY_FRAME_IDXS.npy', NON_EMPTY_FRAME_IDXS)
    
    # Put to S3
    upload_file("./X.npy", AWS_S3_BUCKET, f'processed-data/v{version}/X.npy')
    upload_file("./y.npy", AWS_S3_BUCKET, f'processed-data/v{version}/y.npy')
    upload_file("./NON_EMPTY_FRAME_IDXS.npy", AWS_S3_BUCKET, f'processed-data/v{version}/NON_EMPTY_FRAME_IDXS.npy')
    
    return X, y, NON_EMPTY_FRAME_IDXS

In [None]:
if PREPROCESS_DATA:
    X, y, NON_EMPTY_FRAME_IDXS = get_x_y()
else:
    X = s3_client.get_object(Bucket=AWS_S3_BUCKET, Key=f'processed-data/v{version}/X.npy')
    X = np.load(io.BytesIO(X['Body'].read()))
    
    y = s3_client.get_object(Bucket=AWS_S3_BUCKET, Key=f'processed-data/v{version}/y.npy')
    y = np.load(io.BytesIO(y['Body'].read()))
    
    NON_EMPTY_FRAME_IDXS = s3_client.get_object(Bucket=AWS_S3_BUCKET, Key=f'processed-data/v{version}/NON_EMPTY_FRAME_IDXS.npy')
    NON_EMPTY_FRAME_IDXS = np.load(io.BytesIO(NON_EMPTY_FRAME_IDXS['Body'].read()))
    
print_shape_dtype([X, y, NON_EMPTY_FRAME_IDXS], ['X', 'y', 'NON_EMPTY_FRAME_IDXS'])
print(f'# NaN Values X: {np.isnan(X).sum()}')

In [None]:
display(pd.Series(y).value_counts().to_frame('Class Count').iloc[[0,1,2,3,4, -5,-4,-3,-2,-1]])

In [None]:
X = s3_client.get_object(Bucket=AWS_S3_BUCKET, Key=f'processed-data/v{version}/X.npy')
X = np.load(io.BytesIO(X['Body'].read()))

y = s3_client.get_object(Bucket=AWS_S3_BUCKET, Key=f'processed-data/v{version}/y.npy')
y = np.load(io.BytesIO(y['Body'].read()))

NON_EMPTY_FRAME_IDXS = s3_client.get_object(Bucket=AWS_S3_BUCKET, Key=f'processed-data/v{version}/NON_EMPTY_FRAME_IDXS.npy')
NON_EMPTY_FRAME_IDXS = np.load(io.BytesIO(NON_EMPTY_FRAME_IDXS['Body'].read()))

In [None]:
pp = PreprocessLayerMod(config["N_ROWS"], config["N_DIMS"], HAND_IDXS0, LANDMARK_IDXS0, config["INPUT_SIZE"])

In [64]:
# Execution when there are less frames than specificed input size. 
data0 = load_relevant_data_subset("w251-asl-data/raw-data/train_landmark_files/28656/1000106739.parquet")
N_FRAMES0 = tf.shape(data0)[0] # 11
gather = tf.gather(data0, HAND_IDXS0, axis=1) # Returns all frames with hand landmark data
frames_hands_nansum = tf.experimental.numpy.nanmean(gather, axis=[1,2]) # Get mean across X,Y sum them together
non_empty_frames_idxs = tf.where(frames_hands_nansum > 0) 
non_empty_frames_idxs = tf.squeeze(non_empty_frames_idxs, axis=1) # Get indicies where there are no NAN
data = tf.gather(data0, non_empty_frames_idxs, axis=0) # Put frames that have non-empty indicies into single tensor
non_empty_frames_idxs = tf.cast(non_empty_frames_idxs, tf.float32) # Cast
N_FRAMES = tf.shape(data)[0] # Number of frames in the video with non-empty hands
data = tf.gather(data, LANDMARK_IDXS0, axis=1) # Filters out landmarks that have not been selected
# Pads to the right of non_empty_frames_idxs with -1's
non_empty_frames_idxs = tf.pad(non_empty_frames_idxs, [[0, config["INPUT_SIZE"]-N_FRAMES]], constant_values=-1)
data = tf.pad(data, [[0, config["INPUT_SIZE"]-N_FRAMES], [0,0], [0,0]], constant_values=0) # Fill all the -1's with zero values.
data = tf.where(tf.math.is_nan(data), 0.0, data) # Fill all NANs with 0.

In [86]:
# LIPS_IDXS0 = [0, 11, 12, 13, 14, 15, 17, 37, 38, 39, 40, 41, 42, 61, 62, 72, 73, 
#         74, 76, 77, 78, 80, 81, 82, 84, 86, 87, 88, 89, 90, 91, 95, 96, 146, 
#         178, 179, 180, 181, 183, 184, 185, 191, 267, 268, 269, 270, 271, 272, 
#         291, 292, 302, 303, 304, 306, 307, 308, 310, 311, 312, 314, 316, 317, 
#         318, 319, 320, 321, 324, 325, 375, 402, 403, 404, 405, 407, 408, 409, 415]

# EYES_IDXS0 = [  6,   7,  22,  23,  24,  25,  26,  30,  31,  33,  56, 110, 112,
#        113, 122, 128, 130, 133, 144, 145, 153, 154, 155, 157, 158, 159,
#        160, 161, 163, 168, 173, 188, 189, 190, 193, 196, 197, 232, 233,
#        243, 244, 245, 246, 247, 249, 252, 253, 254, 255, 256, 259, 260,
#        263, 286, 339, 341, 351, 357, 359, 362, 373, 374, 380, 381, 382,
#        384, 385, 386, 387, 388, 390, 398, 412, 413, 414, 417, 419, 453,
#        463, 464, 465, 466, 467]

FACE_IDXS0 = [0, 6, 7, 11, 12, 13, 14, 15, 17, 22, 23, 24, 25, 26, 30, 31, 33, 37, 38, 39, 40, 41, 42, 56, 
             61, 62, 72, 73, 74, 76, 77, 78, 80, 81, 82, 84, 86, 87, 88, 89, 90, 91, 95, 96, 110, 112, 113, 
             122, 128, 130, 133, 144, 145, 146, 153, 154, 155, 157, 158, 159, 160, 161, 163, 168, 173, 178, 
             179, 180, 181, 183, 184, 185, 188, 189, 190, 191, 193, 196, 197, 232, 233, 243, 244, 245, 246, 
             247, 249, 252, 253, 254, 255, 256, 259, 260, 263, 267, 268, 269, 270, 271, 272, 286, 291, 292, 
             302, 303, 304, 306, 307, 308, 310, 311, 312, 314, 316, 317, 318, 319, 320, 321, 324, 325, 339, 
             341, 351, 357, 359, 362, 373, 374, 375, 380, 381, 382, 384, 385, 386, 387, 388, 390, 398, 402, 
             403, 404, 405, 407, 408, 409, 412, 413, 414, 415, 417, 419, 453, 463, 464, 465, 466, 467]


POSE_IDXS0 = np.arange(489, 514)
LEFT_HAND_IDXS0 = np.arange(468,489)
RIGHT_HAND_IDXS0 = np.arange(522,543)

LANDMARK_IDXS0 = np.concatenate((LIPS_IDXS0, LEFT_HAND_IDXS0, RIGHT_HAND_IDXS0, EYES_IDXS0, POSE_IDXS0))
N_COLS = LANDMARK_IDXS0.size

HAND_IDXS0 = np.concatenate((LEFT_HAND_IDXS0, RIGHT_HAND_IDXS0), axis=0)

# Landmark indices in processed data
# LIPS_IDXS = np.argwhere(np.isin(LANDMARK_IDXS0, LIPS_IDXS0)).squeeze()
# LEFT_HAND_IDXS = np.argwhere(np.isin(LANDMARK_IDXS0, LEFT_HAND_IDXS0)).squeeze()
# RIGHT_HAND_IDXS = np.argwhere(np.isin(LANDMARK_IDXS0, RIGHT_HAND_IDXS0)).squeeze()
# HAND_IDXS = np.argwhere(np.isin(LANDMARK_IDXS0, HAND_IDXS0)).squeeze()
# POSE_IDXS = np.argwhere(np.isin(LANDMARK_IDXS0, POSE_IDXS0)).squeeze()

FACE_START = 0
LEFT_HAND_START = len(FACE_IDXS0)
POSE_START = LEFT_HAND_START + LEFT_HAND_IDXS0.size
RIGHT_HAND_START = POSE_START + POSE_IDXS0.size

In [134]:
### Modified
data0 = load_relevant_data_subset("w251-asl-data/raw-data/train_landmark_files/28656/1000106739.parquet")
data0 = tf.stack([data0, tf.math.add(data0, 3)], 0)

N_FRAMES = tf.shape(data0)[1]
data = tf.gather(data0, LANDMARK_IDXS0, axis=2)

# TODO: Add batch dimension when slicing. 
x = data

face = tf.slice(x, [0, 0, FACE_START, 0], [-1, N_FRAMES, LEFT_HAND_START, 2])
face = tf.keras.utils.normalize(face, axis=1, order=2)

# left_hand = tf.slice(x, [0, 0, LEFT_HAND_START, 0], [-1, N_FRAMES, POSE_START, 2])
# left_hand = tf.keras.utils.normalize(left_hand, axis=1, order=2)

# pose = tf.slice(x, [0, 0, POSE_START, 0], [-1, N_FRAMES, RIGHT_HAND_START, 2])
# pose = tf.keras.utils.normalize(pose, axis=1, order=2)

# right_hand = tf.slice(x, [0, 0, RIGHT_HAND_START, 0], [-1, N_FRAMES, tf.shape(x)[1], 2])
# right_hand = tf.keras.utils.normalize(right_hand, axis=1, order=2)


# data = tf.keras.utils.normalize(
#     data0[0], axis=-1, order=2)

# # LIPS
# lips = tf.slice(x, [0,0,LIPS_START,0], [-1,config["INPUT_SIZE"], 40, 2])
# lips = tf.where(
#         tf.math.equal(lips, 0.0),
#         0.0,
#         (lips - LIPS_MEAN) / LIPS_STD,
#     )
# lips = tf.reshape(lips, [-1, config["INPUT_SIZE"], 40*2])
# # LEFT HAND
# left_hand = tf.slice(x, [0,0,40,0], [-1,config["INPUT_SIZE"], 21, 2])
# left_hand = tf.where(
#         tf.math.equal(left_hand, 0.0),
#         0.0,
#         (left_hand - LEFT_HANDS_MEAN) / LEFT_HANDS_STD,
#     )
# left_hand = tf.reshape(left_hand, [-1, config["INPUT_SIZE"], 21*2])
# # RIGHT HAND
# right_hand = tf.slice(x, [0,0,61,0], [-1,config["INPUT_SIZE"], 21, 2])
# right_hand = tf.where(
#         tf.math.equal(right_hand, 0.0),
#         0.0,
#         (right_hand - RIGHT_HANDS_MEAN) / RIGHT_HANDS_STD,
#     )
# right_hand = tf.reshape(right_hand, [-1, config["INPUT_SIZE"], 21*2])
# # POSE
# pose = tf.slice(x, [0,0,82,0], [-1,config["INPUT_SIZE"], 10, 2])
# pose = tf.where(
#         tf.math.equal(pose, 0.0),
#         0.0,
#         (pose - POSE_MEAN) / POSE_STD,
#     )
# pose = tf.reshape(pose, [-1, config["INPUT_SIZE"], 10*2])

# x = lips, left_hand, right_hand, pose


face

<tf.Tensor: shape=(2, 11, 160, 2), dtype=float32, numpy=
array([[[[0.3109382 , 0.30274147],
         [0.3108517 , 0.30263487],
         [0.31070736, 0.3025791 ],
         ...,
         [0.31003988, 0.29980758],
         [0.31016448, 0.29940003],
         [0.3101387 , 0.29969093]],

        [[0.30949372, 0.3010725 ],
         [0.30935383, 0.30120784],
         [0.3091575 , 0.30136055],
         ...,
         [0.3087556 , 0.29856545],
         [0.3087838 , 0.2983931 ],
         [0.30884987, 0.29858443]],

        [[0.30696207, 0.301286  ],
         [0.30691883, 0.3012452 ],
         [0.30694762, 0.30109647],
         ...,
         [0.30833372, 0.29799432],
         [0.3085047 , 0.2975407 ],
         [0.3084519 , 0.29800206]],

        ...,

        [[0.29300418, 0.30127618],
         [0.29308486, 0.30110818],
         [0.2932472 , 0.30086678],
         ...,
         [0.2930791 , 0.30263004],
         [0.2929298 , 0.30289853],
         [0.2930756 , 0.30265015]],

        [[0.29334256, 0.3

In [None]:
if N_FRAMES < 4: # Number of frames we want
    non_empty_frames_idxs = tf.pad(tf.range(0, N_FRAMES, 1), [[0, 32-N_FRAMES]], constant_values=-1)
    data = tf.pad(data, [[0, 32-N_FRAMES], [0,0], [0,0]], constant_values=-1)
    # Fill NaN Values With 0
    data = tf.where(tf.math.is_nan(data), -1., data)
    # return data, non_empty_frames_idxs

In [None]:
class PreprocessLayerMod(tf.keras.layers.Layer):
    def __init__(self, N_ROWS, N_DIMS, HAND_IDXS0, LANDMARK_IDXS0, INPUT_SIZE):
        super(PreprocessLayerMod, self).__init__()
        self.N_ROWS = N_ROWS
        self.N_DIMS = N_DIMS
        self.HAND_IDXS0 = HAND_IDXS0
        self.LANDMARK_IDXS0 = LANDMARK_IDXS0
        self.INPUT_SIZE = INPUT_SIZE
        self.N_COLS = LANDMARK_IDXS0.size
        
    def pad_edge(self, t, repeats, side):
        if side == 'LEFT':
            return tf.concat((tf.repeat(t[:1], repeats=repeats, axis=0), t), axis=0)
        elif side == 'RIGHT':
            return tf.concat((t, tf.repeat(t[-1:], repeats=repeats, axis=0)), axis=0)
    
    @tf.function(
        input_signature=(tf.TensorSpec(shape=[None, 543, 2], dtype=tf.float32),),
    )
    def call(self, data0):
        # Number of Frames in Video
        N_FRAMES0 = tf.shape(data0)[0]
        
        # Filter Out Frames With Empty Hand Data
        frames_hands_nansum = tf.experimental.numpy.nanmean(tf.gather(data0, self.HAND_IDXS0, axis=1), axis=[1,2])
        non_empty_frames_idxs = tf.where(frames_hands_nansum > 0)
        non_empty_frames_idxs = tf.squeeze(non_empty_frames_idxs, axis=1)
        data = tf.gather(data0, non_empty_frames_idxs, axis=0)
        
        # Cast Indices in float32 to be compatible with Tensorflow Lite
        non_empty_frames_idxs = tf.cast(non_empty_frames_idxs, tf.float32) 

        # Number of Frames in Filtered Video
        N_FRAMES = tf.shape(data)[0]
        
        # Gather Relevant Landmark Columns
        data = tf.gather(data, self.LANDMARK_IDXS0, axis=1)
        
        # Video fits in self.INPUT_SIZE
        if N_FRAMES < self.INPUT_SIZE:
            # Pad With -1 to indicate padding
            non_empty_frames_idxs = tf.pad(non_empty_frames_idxs, [[0, self.INPUT_SIZE-N_FRAMES]], constant_values=-1)
            # Pad Data With Zeros
            data = tf.pad(data, [[0, self.INPUT_SIZE-N_FRAMES], [0,0], [0,0]], constant_values=0)
            # Fill NaN Values With 0
            data = tf.where(tf.math.is_nan(data), 0.0, data)
            return data, non_empty_frames_idxs
        # Video needs to be downsampled to INPUT_SIZE
        else:
            # Repeat
            if N_FRAMES < self.INPUT_SIZE**2:
                repeats = tf.math.floordiv(self.INPUT_SIZE * self.INPUT_SIZE, N_FRAMES0)
                data = tf.repeat(data, repeats=repeats, axis=0)
                non_empty_frames_idxs = tf.repeat(non_empty_frames_idxs, repeats=repeats, axis=0)

            # Pad To Multiple Of Input Size
            pool_size = tf.math.floordiv(len(data), self.INPUT_SIZE)
            if tf.math.mod(len(data), self.INPUT_SIZE) > 0:
                pool_size += 1

            if pool_size == 1:
                pad_size = (pool_size * self.INPUT_SIZE) - len(data)
            else:
                pad_size = (pool_size * self.INPUT_SIZE) % len(data)

            # Pad Start/End with Start/End value
            pad_left = tf.math.floordiv(pad_size, 2) + tf.math.floordiv(self.INPUT_SIZE, 2)
            pad_right = tf.math.floordiv(pad_size, 2) + tf.math.floordiv(self.INPUT_SIZE, 2)
            if tf.math.mod(pad_size, 2) > 0:
                pad_right += 1

            # Pad By Concatenating Left/Right Edge Values
            data = self.pad_edge(data, pad_left, 'LEFT')
            data = self.pad_edge(data, pad_right, 'RIGHT')

            # Pad Non Empty Frame Indices
            non_empty_frames_idxs = self.pad_edge(non_empty_frames_idxs, pad_left, 'LEFT')
            non_empty_frames_idxs = self.pad_edge(non_empty_frames_idxs, pad_right, 'RIGHT')

            # Reshape to Mean Pool
            data = tf.reshape(data, [self.INPUT_SIZE, -1, self.N_COLS, self.N_DIMS])
            non_empty_frames_idxs = tf.reshape(non_empty_frames_idxs, [self.INPUT_SIZE, -1])

            # Mean Pool
            data = tf.experimental.numpy.nanmean(data, axis=1)
            non_empty_frames_idxs = tf.experimental.numpy.nanmean(non_empty_frames_idxs, axis=1)

            # Fill NaN Values With 0
            data = tf.where(tf.math.is_nan(data), 0.0, data)
            
            return data, non_empty_frames_idxs

In [None]:
LIPS_IDXS0 = [0, 11, 12, 13, 14, 15, 17, 37, 38, 39, 40, 41, 42, 61, 62, 72, 73, 
        74, 76, 77, 78, 80, 81, 82, 84, 86, 87, 88, 89, 90, 91, 95, 96, 146, 
        178, 179, 180, 181, 183, 184, 185, 191, 267, 268, 269, 270, 271, 272, 
        291, 292, 302, 303, 304, 306, 307, 308, 310, 311, 312, 314, 316, 317, 
        318, 319, 320, 321, 324, 325, 375, 402, 403, 404, 405, 407, 408, 409, 415]

EYES_IDXS0 = [  6,   7,  22,  23,  24,  25,  26,  30,  31,  33,  56, 110, 112,
       113, 122, 128, 130, 133, 144, 145, 153, 154, 155, 157, 158, 159,
       160, 161, 163, 168, 173, 188, 189, 190, 193, 196, 197, 232, 233,
       243, 244, 245, 246, 247, 249, 252, 253, 254, 255, 256, 259, 260,
       263, 286, 339, 341, 351, 357, 359, 362, 373, 374, 380, 381, 382,
       384, 385, 386, 387, 388, 390, 398, 412, 413, 414, 417, 419, 453,
       463, 464, 465, 466, 467]

POSE_IDXS0 = np.arange(489, 514)
LEFT_HAND_IDXS0 = np.arange(468,489)
RIGHT_HAND_IDXS0 = np.arange(522,543)

LANDMARK_IDXS0 = np.concatenate((LIPS_IDXS0, LEFT_HAND_IDXS0, RIGHT_HAND_IDXS0, EYES_IDXS0, POSE_IDXS0))

In [None]:
data = pd_read_s3_parquet("raw-data/train_landmark_files/28656/1000106739.parquet", AWS_S3_BUCKET)

In [None]:
data[(data['frame']==29) & (data["type"] == "pose")]

In [None]:
data[(data['frame']==29) & (data.index.isin(POSE_IDXS0))].plot.scatter(x='x',y='y', marker='.')