In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_addons as tfa
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sn

from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split, GroupShuffleSplit

import glob
import sys
import os
import math
import gc
import sys
import sklearn
import scipy

print(f'Tensorflow V{tf.__version__}')
print(f'Keras V{tf.keras.__version__}')
print(f'Python V{sys.version}')

2023-04-30 09:53:21.650063: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-04-30 09:53:21.876529: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-04-30 09:53:22.549730: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64
2023-04-30 09:53:22.549825: W tensorflow/

Tensorflow V2.11.0
Keras V2.11.0
Python V3.7.11 (default, Jul 27 2021, 14:32:16) 
[GCC 7.5.0]


In [2]:
target = "base_fix_32"
out_root = f"./processed_data/{target}/"
os.makedirs(out_root, exist_ok=True)

In [3]:
import logging

def setLogger(output_path):
    logging_outfile = output_path + "/logger.log"
    logging.basicConfig(
        level=logging.INFO,
        format="[%(asctime)s][%(name)s][%(funcName)s][%(levelname)s] %(message)s",
        filename=str(logging_outfile),
        filemode="w"
    )
    logger = logging.getLogger(__name__)
    #logger.setLevel(logging.DEBUG)

    ## sets up stream handler
    shandler = logging.StreamHandler()
    shandler.setLevel(logging.INFO)

    ## sets up formatter
    formatter = logging.Formatter("[%(asctime)s][%(name)s][%(funcName)s][%(levelname)s] %(message)s")
    shandler.setFormatter(formatter)

    ## adds handlers to logger
    logger.addHandler(shandler)
    return logger

logger = setLogger(out_root)


# Plot Config

In [4]:
# MatplotLib Global Settings
mpl.rcParams.update(mpl.rcParamsDefault)
mpl.rcParams['xtick.labelsize'] = 16
mpl.rcParams['ytick.labelsize'] = 16
mpl.rcParams['axes.labelsize'] = 18
mpl.rcParams['axes.titlesize'] = 24

# Config

In [5]:
# If True, processing data from scratch
# If False, loads preprocessed data
PREPROCESS_DATA = False
TRAIN_MODEL = True
# True: use all data for training -> gives better LB result
USE_VAL = True

N_ROWS = 543
N_DIMS = 3
DIM_NAMES = ['x', 'y', 'z']
SEED = 4949
NUM_CLASSES = 250
# IS_INTERACTIVE = os.environ['KAGGLE_KERNEL_RUN_TYPE'] == 'Interactive'
VERBOSE = 1 
IS_INTERACTIVE=False

INPUT_SIZE = 32

BATCH_ALL_SIGNS_N = 4
BATCH_SIZE = 256
N_EPOCHS = 100
LR_MAX = 1e-3
N_WARMUP_EPOCHS = 0
WD_RATIO = 0.05
MASK_VAL = 4237

In [6]:
config = {
    'preprocess_data': PREPROCESS_DATA,
    'train_model': TRAIN_MODEL,
    'use_val': USE_VAL,
    'n_rows': N_ROWS,
    'n_dims': N_DIMS,
    'dim_names': DIM_NAMES,
    'seed': SEED,
    'num_classes': NUM_CLASSES,
    'verbose': VERBOSE,
    'is_interactive': IS_INTERACTIVE,
    'input_size': INPUT_SIZE,
    'batch_all_signs_n': BATCH_ALL_SIGNS_N,
    'batch_size': BATCH_SIZE,
    'n_epochs': N_EPOCHS,
    'lr_max': LR_MAX,
    'n_warmup_epochs': N_WARMUP_EPOCHS,
    'wd_ratio': WD_RATIO,
    'mask_val': MASK_VAL
}

In [7]:
logger.info(config)

[2023-04-30 09:53:24,374][__main__][<module>][INFO] {'preprocess_data': False, 'train_model': True, 'use_val': True, 'n_rows': 543, 'n_dims': 3, 'dim_names': ['x', 'y', 'z'], 'seed': 4949, 'num_classes': 250, 'verbose': 1, 'is_interactive': False, 'input_size': 32, 'batch_all_signs_n': 4, 'batch_size': 256, 'n_epochs': 100, 'lr_max': 0.001, 'n_warmup_epochs': 0, 'wd_ratio': 0.05, 'mask_val': 4237}


# Utils

In [8]:
# Prints Shape and Dtype For List Of Variables
def print_shape_dtype(l, names):
    for e, n in zip(l, names):
        print(f'{n} shape: {e.shape}, dtype: {e.dtype}')

# Train

In [9]:
train = pd.read_csv('/home/jovyan/pvc-nfs-skobayashi/competition/kaggle/2023_kaggle_gislr/data/kaggle/train.csv')

N_SAMPLES = len(train)
print(f'N_SAMPLES: {N_SAMPLES}')

N_SAMPLES: 94477


In [10]:
# Add ordinally Encoded Sign (assign number to each sign name)
train['sign_ord'] = train['sign'].astype('category').cat.codes

# Dictionaries to translate sign <-> ordinal encoded sign
SIGN2ORD = train[['sign', 'sign_ord']].set_index('sign').squeeze().to_dict()
ORD2SIGN = train[['sign_ord', 'sign']].set_index('sign_ord').squeeze().to_dict()

# split

In [11]:
train

Unnamed: 0,path,participant_id,sequence_id,sign,sign_ord
0,train_landmark_files/26734/1000035562.parquet,26734,1000035562,blow,25
1,train_landmark_files/28656/1000106739.parquet,28656,1000106739,wait,232
2,train_landmark_files/16069/100015657.parquet,16069,100015657,cloud,48
3,train_landmark_files/25571/1000210073.parquet,25571,1000210073,bird,23
4,train_landmark_files/62590/1000240708.parquet,62590,1000240708,owie,164
...,...,...,...,...,...
94472,train_landmark_files/53618/999786174.parquet,53618,999786174,white,238
94473,train_landmark_files/26734/999799849.parquet,26734,999799849,have,108
94474,train_landmark_files/25571/999833418.parquet,25571,999833418,flower,86
94475,train_landmark_files/29302/999895257.parquet,29302,999895257,room,188


In [12]:
# Save Validation
PARTICIPANT_IDS = train['participant_id'].values
X = train["path"].values
y = train['sign_ord'].values
indices = train.index.values

# group shuffle
# splitter = GroupShuffleSplit(test_size=0.20, n_splits=2, random_state=SEED)
# train_idxs, val_idxs = next(splitter.split(X, y, groups=PARTICIPANT_IDS))

# random shuffle
_, _, _, _, train_idxs, val_idxs = train_test_split(X, y, indices, test_size=0.2, random_state=SEED)

train["valid"] = False
train.loc[val_idxs, "valid"]=True

train.to_csv(f"{out_root}/train.csv")

In [13]:
len(train_idxs), len(val_idxs)

(75581, 18896)

# Add File Path

In [14]:
# Get complete file path to file
def get_file_path(path):
    return f'/home/jovyan/pvc-nfs-skobayashi/competition/kaggle/2023_kaggle_gislr/data/kaggle/{path}'

train['file_path'] = train['path'].apply(get_file_path)

# Landmark Indices

In [15]:
USE_TYPES = ['left_hand', 'pose', 'right_hand']
START_IDX = 468
LIPS_IDXS0 = np.array([
        61, 185, 40, 39, 37, 0, 267, 269, 270, 409,
        291, 146, 91, 181, 84, 17, 314, 405, 321, 375,
        78, 191, 80, 81, 82, 13, 312, 311, 310, 415,
        95, 88, 178, 87, 14, 317, 402, 318, 324, 308,
    ])
LEYE_IDXS0 = np.array([
    263, 249, 390, 373, 374, 380, 381, 382, 362,
    466, 388, 387, 386, 385, 384, 398,
    ])
REYE_IDXS0 = np.array([
    33, 7, 163, 144, 145, 153, 154, 155, 133,
    246, 161, 160, 159, 158, 157, 173,
    ])
# Landmark indices in original data
LEFT_HAND_IDXS0 = np.arange(468,489)
RIGHT_HAND_IDXS0 = np.arange(522,543)
LEFT_POSE_IDXS0 = np.array([500, 502, 504, 506, 508, 510])
RIGHT_POSE_IDXS0 = np.array([501, 503, 505, 507, 509, 511])
LANDMARK_IDXS_LEFT_DOMINANT0 = np.concatenate((LIPS_IDXS0, LEFT_HAND_IDXS0, LEFT_POSE_IDXS0, LEYE_IDXS0))
LANDMARK_IDXS_RIGHT_DOMINANT0 = np.concatenate((LIPS_IDXS0, RIGHT_HAND_IDXS0, RIGHT_POSE_IDXS0, REYE_IDXS0))
HAND_IDXS0 = np.concatenate((LEFT_HAND_IDXS0, RIGHT_HAND_IDXS0), axis=0)
N_COLS = LANDMARK_IDXS_LEFT_DOMINANT0.size
# Landmark indices in processed data
LIPS_IDXS = np.argwhere(np.isin(LANDMARK_IDXS_LEFT_DOMINANT0, LIPS_IDXS0)).squeeze()
LEFT_HAND_IDXS = np.argwhere(np.isin(LANDMARK_IDXS_LEFT_DOMINANT0, LEFT_HAND_IDXS0)).squeeze()
RIGHT_HAND_IDXS = np.argwhere(np.isin(LANDMARK_IDXS_LEFT_DOMINANT0, RIGHT_HAND_IDXS0)).squeeze()
HAND_IDXS = np.argwhere(np.isin(LANDMARK_IDXS_LEFT_DOMINANT0, HAND_IDXS0)).squeeze()
POSE_IDXS = np.argwhere(np.isin(LANDMARK_IDXS_LEFT_DOMINANT0, LEFT_POSE_IDXS0)).squeeze()
EYE_IDXS = np.argwhere(np.isin(LANDMARK_IDXS_LEFT_DOMINANT0, LEYE_IDXS0)).squeeze()

print(f'# HAND_IDXS: {len(HAND_IDXS)}, N_COLS: {N_COLS}')

# HAND_IDXS: 21, N_COLS: 83


In [16]:
# acceraration and verocity
N_MOTION_VEROCITY = N_COLS*2
print(N_MOTION_VEROCITY)

166


In [17]:
# Bone indices
#LEFT_HAND_BONE_IDXS0 = np.array([[0, 1], [0, 5], [0, 17], [1, 2], [2, 3], [3, 4], [5, 6], [5, 9], [6, 7], [7, 8], [9, 10], [9, 13], [10, 11], [11, 12], [13, 14], [13, 17], [14, 15], [15, 16], [17, 18], [18, 19], [19, 20]])
LEFT_HAND_BONE_IDXS0 = np.array([[468, 469], [468, 473], [468, 485], [469, 470], [470, 471], [471, 472], [473, 474], [473, 477], [474, 475], [475, 476], 
                                 [477, 478], [477, 481], [478, 479], [479, 480], [481, 482], [481, 485], [482, 483], [483, 484], [485, 486], [486, 487], [487, 488]])
RIGHT_HAND_BONE_IDXS0 = np.array([[522, 523], [522, 527], [522, 539], [523, 524], [524, 525], [525, 526], [527, 528], [527, 531], [528, 529], [529, 530], 
                                  [531, 532], [531, 535], [532, 533], [533, 534], [535, 536], [535, 539], [536, 537], [537, 538], [539, 540], [540, 541], [541, 542]])
LEFT_POSE_BONE_IDXS0 = np.array([[500, 502], [502, 504], [504, 506], [504, 508], [504, 510], [506, 508]])
RIGHT_POSE_BONE_IDXS0 = np.array([[501, 503], [503, 505], [505, 507], [505, 509], [505, 511], [507, 509]])

LEFT_HAND_BONE_0 = LEFT_HAND_BONE_IDXS0[:, 0]
LEFT_HAND_BONE_1 = LEFT_HAND_BONE_IDXS0[:, 1]
RIGHT_HAND_BONE_0 = RIGHT_HAND_BONE_IDXS0[:, 0]
RIGHT_HAND_BONE_1 = RIGHT_HAND_BONE_IDXS0[:, 1]
LEFT_POSE_BONE_0 = LEFT_POSE_BONE_IDXS0[:, 0]
LEFT_POSE_BONE_1 = LEFT_POSE_BONE_IDXS0[:, 1]
RIGHT_POSE_BONE_0 = RIGHT_POSE_BONE_IDXS0[:, 0]
RIGHT_POSE_BONE_1 = RIGHT_POSE_BONE_IDXS0[:, 1]

LEFT_HAND_BONE_IDXS = np.arange(LANDMARK_IDXS_LEFT_DOMINANT0.size, LANDMARK_IDXS_LEFT_DOMINANT0.size + len(LEFT_HAND_BONE_IDXS0))
N_COLS = LANDMARK_IDXS_LEFT_DOMINANT0.size + len(LEFT_HAND_BONE_IDXS0)
LEFT_POSE_BONE_IDXS = np.arange(N_COLS, N_COLS + len(LEFT_POSE_BONE_IDXS0))
N_COLS = N_COLS + len(LEFT_POSE_BONE_IDXS0)
N_COLS += N_MOTION_VEROCITY

N_COLS += len(RIGHT_HAND_BONE_IDXS0) #dist
N_COLS += (len(RIGHT_HAND_BONE_IDXS0) -1 ) # angle
N_COLS +=(len(RIGHT_HAND_BONE_IDXS0) -1 ) # angle velocity
N_COLS += len(LEFT_POSE_BONE_IDXS0) #dist
N_COLS += (len(LEFT_POSE_BONE_IDXS0) -1)# angle
N_COLS += (len(LEFT_POSE_BONE_IDXS0) -1) # angle velocity

print(f'# HAND_IDXS: {len(HAND_IDXS)}, POSE_IDXS: {len(POSE_IDXS)}, LIPS_IDXS: {len(LIPS_IDXS)}, HAND_BONE_IDXS: {len(LEFT_HAND_BONE_IDXS)}, POSE_BONE_IDXS: {len(LEFT_POSE_BONE_IDXS)}, N_COLS: {N_COLS}')

# HAND_IDXS: 21, POSE_IDXS: 6, LIPS_IDXS: 40, HAND_BONE_IDXS: 21, POSE_BONE_IDXS: 6, N_COLS: 353


In [18]:
LEFT_HAND_SHAPE_IDXS0 = np.array([[470, 485], [472, 476], [476, 480], [480, 484], [484, 488],
                                 [468, 472], [468, 476], [468, 480], [468, 484], [468, 488], [472, 485]])
RIGHT_HAND_SHAPE_IDXS0 = np.array([[524, 539], [526, 530], [530, 534], [534, 538], [538, 542],
                                  [522, 526], [522, 530], [522, 534], [522, 538], [522, 542], [526, 539]])

LEFT_POSE_SHAPE_IDXS0 = np.array([[500, 504], [504, 512], [504, 501], [504, 513]])
RIGHT_POSE_SHAPE_IDXS0 = np.array([[501, 505], [505, 513], [505, 500], [505, 512]])

LEFT_HAND_SHAPE_IDXS = np.arange(N_COLS, N_COLS + len(LEFT_HAND_SHAPE_IDXS0))
N_COLS = N_COLS + len(LEFT_HAND_SHAPE_IDXS)
LEFT_POSE_SHAPE_IDXS = np.arange(N_COLS, N_COLS + len(LEFT_POSE_SHAPE_IDXS0))
N_COLS = N_COLS + len(LEFT_POSE_SHAPE_IDXS)


In [19]:
N_COLS

368

In [20]:
POSE_OFFSET = START_IDX+21
POSE_CENTER_IDXS0 = np.array([[POSE_OFFSET+11, POSE_OFFSET+12]])


In [21]:
LIPS_START = 0
LEFT_HAND_START = LIPS_IDXS.size
RIGHT_HAND_START = LEFT_HAND_START + LEFT_HAND_IDXS.size
POSE_START = RIGHT_HAND_START + RIGHT_HAND_IDXS.size

print(f'LIPS_START: {LIPS_START}, LEFT_HAND_START: {LEFT_HAND_START}, RIGHT_HAND_START: {RIGHT_HAND_START}, POSE_START: {POSE_START}')

LIPS_START: 0, LEFT_HAND_START: 40, RIGHT_HAND_START: 61, POSE_START: 61


In [22]:
# Bone
HAND_BONE_START = POSE_START + POSE_IDXS.size
POSE_BONE_START = HAND_BONE_START + LEFT_HAND_BONE_IDXS.size

print(f'LIPS_START: {LIPS_START}, LEFT_HAND_START: {LEFT_HAND_START}, RIGHT_HAND_START: {RIGHT_HAND_START}, POSE_START: {POSE_START}, HAND_BONE_START: {HAND_BONE_START}, POSE_BONE_START: {POSE_BONE_START}')

LIPS_START: 0, LEFT_HAND_START: 40, RIGHT_HAND_START: 61, POSE_START: 61, HAND_BONE_START: 67, POSE_BONE_START: 88


In [23]:
config2 = {
    'use_types': USE_TYPES,
    'start_idx': START_IDX,
    'lips_idxs0': LIPS_IDXS0,
    'left_hand_idxs0': LEFT_HAND_IDXS0,
    'right_hand_idxs0': RIGHT_HAND_IDXS0,
    'left_pose_idxs0': LEFT_POSE_IDXS0,
    'right_pose_idxs0': RIGHT_POSE_IDXS0,
    'landmark_idxs_left_dominant0': LANDMARK_IDXS_LEFT_DOMINANT0,
    'landmark_idxs_right_dominant0': LANDMARK_IDXS_RIGHT_DOMINANT0,
    'hand_idxs0': HAND_IDXS0,
    'n_cols': N_COLS,
    'lips_idxs': LIPS_IDXS,
    'left_hand_idxs': LEFT_HAND_IDXS,
    'right_hand_idxs': RIGHT_HAND_IDXS,
    'hand_idxs': HAND_IDXS,
    'pose_idxs': POSE_IDXS
}

In [24]:
logger.info(config2)

[2023-04-30 09:53:25,160][__main__][<module>][INFO] {'use_types': ['left_hand', 'pose', 'right_hand'], 'start_idx': 468, 'lips_idxs0': array([ 61, 185,  40,  39,  37,   0, 267, 269, 270, 409, 291, 146,  91,
       181,  84,  17, 314, 405, 321, 375,  78, 191,  80,  81,  82,  13,
       312, 311, 310, 415,  95,  88, 178,  87,  14, 317, 402, 318, 324,
       308]), 'left_hand_idxs0': array([468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480,
       481, 482, 483, 484, 485, 486, 487, 488]), 'right_hand_idxs0': array([522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534,
       535, 536, 537, 538, 539, 540, 541, 542]), 'left_pose_idxs0': array([500, 502, 504, 506, 508, 510]), 'right_pose_idxs0': array([501, 503, 505, 507, 509, 511]), 'landmark_idxs_left_dominant0': array([ 61, 185,  40,  39,  37,   0, 267, 269, 270, 409, 291, 146,  91,
       181,  84,  17, 314, 405, 321, 375,  78, 191,  80,  81,  82,  13,
       312, 311, 310, 415,  95,  88, 178,  87,  14, 317, 402,

# Process Data Tensorflow

In [25]:
# Source: https://www.kaggle.com/competitions/asl-signs/overview/evaluation
ROWS_PER_FRAME = 543  # number of landmarks per frame

def load_relevant_data_subset(pq_path):
    data_columns = ['x', 'y', 'z']
    data = pd.read_parquet(pq_path, columns=data_columns)
    n_frames = int(len(data) / ROWS_PER_FRAME)
    data = data.values.reshape(n_frames, ROWS_PER_FRAME, len(data_columns))
    return data.astype(np.float32)

In [26]:
"""
    Tensorflow layer to process data in TFLite
    Data needs to be processed in the model itself, so we can not use Python
""" 
def tf_acos(x):
    negate = tf.cast(x < 0, dtype=tf.float32)
    x = tf.abs(x)
    ret = tf.constant(-0.0187293, dtype=tf.float32)
    ret = ret * x
    ret = ret + tf.constant(0.0742610, dtype=tf.float32)
    ret = ret * x
    ret = ret - tf.constant(0.2121144, dtype=tf.float32)
    ret = ret * x
    ret = ret + tf.constant(1.5707288, dtype=tf.float32)
    ret = ret * tf.sqrt(1.0 - x)
    ret = ret - 2 * negate * ret
    return negate * tf.constant(3.14159265358979, dtype=tf.float32) + ret

def angle_between_vectors_tf(v1, v2):
    cos_theta = tf.math.divide_no_nan(tf.einsum('ij,ij->i', v1, v2),
                                      (tf.norm(v1, axis=-1) * tf.norm(v2, axis=-1)))
    return tf_acos(tf.clip_by_value(cos_theta, -1, 1))

class PreprocessLayer(tf.keras.layers.Layer):
    def __init__(self):
        super(PreprocessLayer, self).__init__()
        normalisation_correction = tf.constant([
                    # Add 0.50 to left hand (original right hand) and substract 0.50 of right hand (original left hand)
                    [0] * len(LIPS_IDXS) + [0.50] * len(LEFT_HAND_IDXS) + [0.50] * len(POSE_IDXS) + [0] * len(EYE_IDXS),
                    # Y coordinates stay intact
                    [0] * len(LANDMARK_IDXS_LEFT_DOMINANT0),
                    # Z coordinates stay intact
                    [0] * len(LANDMARK_IDXS_LEFT_DOMINANT0),
                ],
                dtype=tf.float32,
            )
        self.normalisation_correction = tf.transpose(normalisation_correction, [1, 0])
        
    def pad_edge(self, t, repeats, side):
        if side == 'LEFT':
            return tf.concat((tf.repeat(t[:1], repeats=repeats, axis=0), t), axis=0)
        elif side == 'RIGHT':
            return tf.concat((t, tf.repeat(t[-1:], repeats=repeats, axis=0)), axis=0)
    
    @tf.function(
        input_signature=(tf.TensorSpec(shape=[None,N_ROWS,N_DIMS], dtype=tf.float32),),
    )
    def call(self, data0):
        # normalize points
        pose_center = (data0[:, POSE_OFFSET+11, :] + data0[:, POSE_OFFSET+12, :])/2
        pose_center = tf.reshape(pose_center, [-1, 1, pose_center.shape[1]])
        data0 = data0 - pose_center
        
        # Number of Frames in Video
        N_FRAMES0 = tf.shape(data0)[0]
        
        # Find dominant hand by comparing summed absolute coordinates
        left_hand_sum = tf.math.reduce_sum(tf.where(tf.math.is_nan(tf.gather(data0, LEFT_HAND_IDXS0, axis=1)), 0, 1))
        right_hand_sum = tf.math.reduce_sum(tf.where(tf.math.is_nan(tf.gather(data0, RIGHT_HAND_IDXS0, axis=1)), 0, 1))
        left_dominant = left_hand_sum >= right_hand_sum
        
        # Count non NaN Hand values in each frame for the dominant hand
        if left_dominant:
            frames_hands_non_nan_sum = tf.math.reduce_sum(
                    tf.where(tf.math.is_nan(tf.gather(data0, LEFT_HAND_IDXS0, axis=1)), 0, 1),
                    axis=[1, 2],
                )
        else:
            frames_hands_non_nan_sum = tf.math.reduce_sum(
                    tf.where(tf.math.is_nan(tf.gather(data0, RIGHT_HAND_IDXS0, axis=1)), 0, 1),
                    axis=[1, 2],
                )
        
        # Find frames indices with coordinates of dominant hand
        non_empty_frames_idxs = tf.where(frames_hands_non_nan_sum > 0)
        non_empty_frames_idxs = tf.squeeze(non_empty_frames_idxs, axis=1)
        # Filter frames
        data = tf.gather(data0, non_empty_frames_idxs, axis=0)
        
        # Cast Indices in float32 to be compatible with Tensorflow Lite
        non_empty_frames_idxs = tf.cast(non_empty_frames_idxs, tf.float32)
        # Normalize to start with 0
        non_empty_frames_idxs -= tf.reduce_min(non_empty_frames_idxs)
        
        # Number of Frames in Filtered Video
        N_FRAMES = tf.shape(data)[0]
        
        # Gather Relevant Landmark Columns
        if left_dominant:
            #######################################
            # bone
            #######################################
            hand_bone = tf.expand_dims(data[:, LEFT_HAND_BONE_IDXS0[0][1]] - data[:, LEFT_HAND_BONE_IDXS0[0][0]], axis=1)
            for i in range(1, len(LEFT_HAND_BONE_IDXS0)):
                hand_bone = tf.concat([hand_bone, tf.expand_dims(data[:, LEFT_HAND_BONE_IDXS0[i][1]] - data[:, LEFT_HAND_BONE_IDXS0[i][0]], axis=1)], axis=1)
                
            pose_bone = tf.expand_dims(data[:, LEFT_POSE_BONE_IDXS0[0][1]] - data[:, LEFT_POSE_BONE_IDXS0[0][0]], axis=1)
            for i in range(1, len(LEFT_POSE_BONE_IDXS0)):
                pose_bone = tf.concat([pose_bone, tf.expand_dims(data[:, LEFT_POSE_BONE_IDXS0[i][1]] - data[:, LEFT_POSE_BONE_IDXS0[i][0]], axis=1)], axis=1)

            hand_shape = tf.expand_dims(data[:, LEFT_HAND_SHAPE_IDXS0[0][1]] - data[:, LEFT_HAND_SHAPE_IDXS0[0][0]], axis=1)
            for i in range(1, len(LEFT_HAND_SHAPE_IDXS0)):
                hand_shape = tf.concat([hand_shape, tf.expand_dims(data[:, LEFT_HAND_SHAPE_IDXS0[i][1]] - data[:, LEFT_HAND_SHAPE_IDXS0[i][0]], axis=1)], axis=1)
                
            pose_shape = tf.expand_dims(data[:, LEFT_POSE_SHAPE_IDXS0[0][1]] - data[:, LEFT_POSE_SHAPE_IDXS0[0][0]], axis=1)
            for i in range(1, len(LEFT_POSE_SHAPE_IDXS0)):
                pose_shape = tf.concat([pose_shape, tf.expand_dims(data[:, LEFT_POSE_SHAPE_IDXS0[i][1]] - data[:, LEFT_POSE_SHAPE_IDXS0[i][0]], axis=1)], axis=1)
            #######################################
            # distance, angle, 
            #######################################
            # drop Z...?   
            # hand ----------
            # dist
            pose_data = data
            hand_joint_distances = []
            for i, j in LEFT_HAND_BONE_IDXS0:
                hand_joint_distances.append(tf.norm(pose_data[:, i] - pose_data[:, j], axis=-1))
            hand_joint_distances = tf.stack(hand_joint_distances, axis=-1)
            hand_joint_distances = tf.expand_dims(hand_joint_distances, axis=-1) # [163, 33, 1]にする
            hand_joint_distances = tf.tile(hand_joint_distances, [1, 1, 3]) # [163, 33, 3]にする
            
            # angle
            hand_relative_angles = []
            for (i1, j1), (i2, j2) in zip(LEFT_HAND_BONE_IDXS0[:-1], LEFT_HAND_BONE_IDXS0[1:]):
                v1 = pose_data[:, i1] - pose_data[:, j1]
                v2 = pose_data[:, i2] - pose_data[:, j2]

                hand_relative_angles.append(angle_between_vectors_tf(v1, v2))
            hand_relative_angles = tf.stack(hand_relative_angles, axis=-1)
            hand_relative_angles = tf.expand_dims(hand_relative_angles, axis=-1) # [163, 33, 1]にする
            hand_relative_angles = tf.tile(hand_relative_angles, [1, 1, 3]) # [163, 33, 3]にする            
            
            # body ----------
            # dist
            pose_data = data
            pose_joint_distances = []
            for i, j in LEFT_POSE_BONE_IDXS0:
                pose_joint_distances.append(tf.norm(pose_data[:, i] - pose_data[:, j], axis=-1))
            pose_joint_distances = tf.stack(pose_joint_distances, axis=-1)
            pose_joint_distances = tf.expand_dims(pose_joint_distances, axis=-1) # [163, 33, 1]にする
            pose_joint_distances = tf.tile(pose_joint_distances, [1, 1, 3]) # [163, 33, 3]にする         
            
            # angle
            pose_relative_angles = []
            for (i1, j1), (i2, j2) in zip(LEFT_POSE_BONE_IDXS0[:-1], LEFT_POSE_BONE_IDXS0[1:]):
                v1 = pose_data[:, i1] - pose_data[:, j1]
                v2 = pose_data[:, i2] - pose_data[:, j2]

                pose_relative_angles.append(angle_between_vectors_tf(v1, v2))
            pose_relative_angles = tf.stack(pose_relative_angles, axis=-1)    
            pose_relative_angles = tf.expand_dims(pose_relative_angles, axis=-1) # [163, 33, 1]にする
            pose_relative_angles = tf.tile(pose_relative_angles, [1, 1, 3]) # [163, 33, 3]にする     

            #######################################
            # angle motion
            #######################################
            if N_FRAMES > 1:
                padded_hand_relative_angles_velocity_data = tf.pad(hand_relative_angles[1:] - hand_relative_angles[:-1], [[0, 1], [0, 0], [0, 0]], "CONSTANT", constant_values=np.NaN)
            else:
                padded_hand_relative_angles_velocity_data = tf.zeros_like(hand_relative_angles)
            
            if N_FRAMES > 1:
                padded_pose_relative_angles_velocity_data = tf.pad(pose_relative_angles[1:] - pose_relative_angles[:-1], [[0, 1], [0, 0], [0, 0]], "CONSTANT", constant_values=np.NaN)
            else:
                padded_pose_relative_angles_velocity_data = tf.zeros_like(pose_relative_angles)
                
            #######################################
            # pose 実行順番に注意。ここでdataが削減されます。
            #######################################
            data = tf.gather(data, LANDMARK_IDXS_LEFT_DOMINANT0, axis=1)
            
            #######################################
            # motion
            #######################################
            if N_FRAMES > 1:
                padded_velocity_data = tf.pad(data[1:] - data[:-1], [[0, 1], [0, 0], [0, 0]], "CONSTANT", constant_values=np.NaN)
            else:
                padded_velocity_data = tf.zeros_like(data)
                
            #######################################
            # acceraration
            #######################################
            if N_FRAMES > 2:
                acceleration_data = (data[1:] - data[:-1])[1:] - (data[1:] - data[:-1])[:-1]
                padded_acceleration_data = tf.pad(acceleration_data, [[0, 2], [0, 0], [0, 0]], "CONSTANT", constant_values=np.NaN)
            else:
                padded_acceleration_data = tf.zeros_like(data)  
                

            
            #######################################
            # concat
            #######################################
            data = tf.concat([data,
                              hand_bone, pose_bone, 
                              padded_velocity_data, padded_acceleration_data, 
                              hand_joint_distances, hand_relative_angles, 
                              pose_joint_distances, pose_relative_angles, 
                              padded_hand_relative_angles_velocity_data, 
                              padded_pose_relative_angles_velocity_data,
                             hand_shape, pose_shape
                             ], axis=1)
            
        else:
            #######################################
            # bone
            #######################################
            hand_bone = tf.expand_dims(data[:, RIGHT_HAND_BONE_IDXS0[0][1]] - data[:, RIGHT_HAND_BONE_IDXS0[0][0]], axis=1)
            for i in range(1, len(RIGHT_HAND_BONE_IDXS0)):
                hand_bone = tf.concat([hand_bone, tf.expand_dims(data[:, RIGHT_HAND_BONE_IDXS0[i][1]] - data[:, RIGHT_HAND_BONE_IDXS0[i][0]], axis=1)], axis=1)
                
            pose_bone = tf.expand_dims(data[:, RIGHT_POSE_BONE_IDXS0[0][1]] - data[:, RIGHT_POSE_BONE_IDXS0[0][0]], axis=1)
            for i in range(1, len(RIGHT_POSE_BONE_IDXS0)):
                pose_bone = tf.concat([pose_bone, tf.expand_dims(data[:, RIGHT_POSE_BONE_IDXS0[i][1]] - data[:, RIGHT_POSE_BONE_IDXS0[i][0]], axis=1)], axis=1)

            hand_shape = tf.expand_dims(data[:, RIGHT_HAND_SHAPE_IDXS0[0][1]] - data[:, RIGHT_HAND_SHAPE_IDXS0[0][0]], axis=1)
            for i in range(1, len(RIGHT_HAND_SHAPE_IDXS0)):
                hand_shape = tf.concat([hand_shape, tf.expand_dims(data[:, RIGHT_HAND_SHAPE_IDXS0[i][1]] - data[:, RIGHT_HAND_SHAPE_IDXS0[i][0]], axis=1)], axis=1)
                
            pose_shape = tf.expand_dims(data[:, RIGHT_POSE_SHAPE_IDXS0[0][1]] - data[:, RIGHT_POSE_SHAPE_IDXS0[0][0]], axis=1)
            for i in range(1, len(RIGHT_POSE_SHAPE_IDXS0)):
                pose_shape = tf.concat([pose_shape, tf.expand_dims(data[:, RIGHT_POSE_SHAPE_IDXS0[i][1]] - data[:, RIGHT_POSE_SHAPE_IDXS0[i][0]], axis=1)], axis=1)
                
            hand_bone = hand_bone * tf.constant([-1., 1., 1.])
            pose_bone = pose_bone * tf.constant([-1., 1., 1.])
            hand_shape = hand_shape * tf.constant([-1., 1., 1.])
            pose_shape = pose_shape * tf.constant([-1., 1., 1.])

            #######################################
            # distance, angle, 
            #######################################
            # drop Z...?   
            # hand ----------
            # dist
            pose_data = tf.identity(data)
            pose_data = pose_data * tf.constant([-1., 1., 1.])
            hand_joint_distances = []
            for i, j in RIGHT_HAND_BONE_IDXS0:
                hand_joint_distances.append(tf.norm(pose_data[:, i] - pose_data[:, j], axis=-1))
            hand_joint_distances = tf.stack(hand_joint_distances, axis=-1)
            hand_joint_distances = tf.expand_dims(hand_joint_distances, axis=-1) # [163, 33, 1]にする
            hand_joint_distances = tf.tile(hand_joint_distances, [1, 1, 3]) # [163, 33, 3]にする
            
            # angle
            hand_relative_angles = []
            for (i1, j1), (i2, j2) in zip(RIGHT_HAND_BONE_IDXS0[:-1], RIGHT_HAND_BONE_IDXS0[1:]):
                v1 = pose_data[:, i1] - pose_data[:, j1]
                v2 = pose_data[:, i2] - pose_data[:, j2]

                hand_relative_angles.append(angle_between_vectors_tf(v1, v2))
            hand_relative_angles = tf.stack(hand_relative_angles, axis=-1)
            hand_relative_angles = tf.expand_dims(hand_relative_angles, axis=-1) # [163, 33, 1]にする
            hand_relative_angles = tf.tile(hand_relative_angles, [1, 1, 3]) # [163, 33, 3]にする            
            
            # body ----------
            # dist
            # pose_data = data
            pose_joint_distances = []
            for i, j in RIGHT_POSE_BONE_IDXS0:
                pose_joint_distances.append(tf.norm(pose_data[:, i] - pose_data[:, j], axis=-1))
            pose_joint_distances = tf.stack(pose_joint_distances, axis=-1)
            pose_joint_distances = tf.expand_dims(pose_joint_distances, axis=-1) # [163, 33, 1]にする
            pose_joint_distances = tf.tile(pose_joint_distances, [1, 1, 3]) # [163, 33, 3]にする         
            
            # angle
            pose_relative_angles = []
            for (i1, j1), (i2, j2) in zip(RIGHT_POSE_BONE_IDXS0[:-1], RIGHT_POSE_BONE_IDXS0[1:]):
                v1 = pose_data[:, i1] - pose_data[:, j1]
                v2 = pose_data[:, i2] - pose_data[:, j2]

                pose_relative_angles.append(angle_between_vectors_tf(v1, v2))
            pose_relative_angles = tf.stack(pose_relative_angles, axis=-1)    
            pose_relative_angles = tf.expand_dims(pose_relative_angles, axis=-1) # [163, 33, 1]にする
            pose_relative_angles = tf.tile(pose_relative_angles, [1, 1, 3]) # [163, 33, 3]にする     
            
            #######################################
            # angle motion
            #######################################
            if N_FRAMES > 1:
                padded_hand_relative_angles_velocity_data = tf.pad(hand_relative_angles[1:] - hand_relative_angles[:-1], [[0, 1], [0, 0], [0, 0]], "CONSTANT", constant_values=np.NaN)
            else:
                padded_hand_relative_angles_velocity_data = tf.zeros_like(hand_relative_angles)
            
            if N_FRAMES > 1:
                padded_pose_relative_angles_velocity_data = tf.pad(pose_relative_angles[1:] - pose_relative_angles[:-1], [[0, 1], [0, 0], [0, 0]], "CONSTANT", constant_values=np.NaN)
            else:
                padded_pose_relative_angles_velocity_data = tf.zeros_like(pose_relative_angles)
                
            #######################################
            # pose 実行順番に注意。ここでdataが削減されます。
            #######################################
            data = tf.gather(data, LANDMARK_IDXS_RIGHT_DOMINANT0, axis=1)
            data = (
                    self.normalisation_correction + (
                        (data - self.normalisation_correction) * tf.where(self.normalisation_correction != 0, -1.0, 1.0))
                )
            
            #######################################
            # motion
            #######################################
            if N_FRAMES > 1:
                padded_velocity_data = tf.pad(data[1:] - data[:-1], [[0, 1], [0, 0], [0, 0]], "CONSTANT", constant_values=np.NaN)
            else:
                padded_velocity_data = tf.zeros_like(data)
                
            #######################################
            # acceraration
            #######################################
            if N_FRAMES > 2:
                acceleration_data = (data[1:] - data[:-1])[1:] - (data[1:] - data[:-1])[:-1]
                padded_acceleration_data = tf.pad(acceleration_data, [[0, 2], [0, 0], [0, 0]], "CONSTANT", constant_values=np.NaN)
            else:
                padded_acceleration_data = tf.zeros_like(data)    
            
                
            #######################################
            # concat
            #######################################
            data = tf.concat([data,
                              hand_bone, pose_bone, 
                              padded_velocity_data, padded_acceleration_data, 
                              hand_joint_distances, hand_relative_angles, 
                              pose_joint_distances, pose_relative_angles, 
                              padded_hand_relative_angles_velocity_data, 
                              padded_pose_relative_angles_velocity_data,
                             hand_shape, pose_shape
                             ], axis=1)
            
            
        #######################################
        # resize
        #######################################       
        # Video fits in INPUT_SIZE
        if N_FRAMES < INPUT_SIZE:
            # Pad With -1 to indicate padding
            non_empty_frames_idxs = tf.pad(non_empty_frames_idxs, [[0, INPUT_SIZE-N_FRAMES]], constant_values=-1)
            # Pad Data With Zeros
            data = tf.pad(data, [[0, INPUT_SIZE-N_FRAMES], [0,0], [0,0]], constant_values=0)
            # Fill NaN Values With 0
            data = tf.where(tf.math.is_nan(data), 0.0, data)
            return data, non_empty_frames_idxs
        # Video needs to be downsampled to INPUT_SIZE
        else:
            # Repeat
            if N_FRAMES < INPUT_SIZE**2:
                repeats = tf.math.floordiv(INPUT_SIZE * INPUT_SIZE, N_FRAMES0)
                data = tf.repeat(data, repeats=repeats, axis=0)
                non_empty_frames_idxs = tf.repeat(non_empty_frames_idxs, repeats=repeats, axis=0)

            # Pad To Multiple Of Input Size
            pool_size = tf.math.floordiv(len(data), INPUT_SIZE)
            if tf.math.mod(len(data), INPUT_SIZE) > 0:
                pool_size += 1

            if pool_size == 1:
                pad_size = (pool_size * INPUT_SIZE) - len(data)
            else:
                pad_size = (pool_size * INPUT_SIZE) % len(data)

            # Pad Start/End with Start/End value
            pad_left = tf.math.floordiv(pad_size, 2) + tf.math.floordiv(INPUT_SIZE, 2)
            pad_right = tf.math.floordiv(pad_size, 2) + tf.math.floordiv(INPUT_SIZE, 2)
            if tf.math.mod(pad_size, 2) > 0:
                pad_right += 1

            # Pad By Concatenating Left/Right Edge Values
            data = self.pad_edge(data, pad_left, 'LEFT')
            data = self.pad_edge(data, pad_right, 'RIGHT')

            # Pad Non Empty Frame Indices
            non_empty_frames_idxs = self.pad_edge(non_empty_frames_idxs, pad_left, 'LEFT')
            non_empty_frames_idxs = self.pad_edge(non_empty_frames_idxs, pad_right, 'RIGHT')

            # Reshape to Mean Pool
            data = tf.reshape(data, [INPUT_SIZE, -1, N_COLS, N_DIMS])
            non_empty_frames_idxs = tf.reshape(non_empty_frames_idxs, [INPUT_SIZE, -1])

            # Mean Pool
            data = tf.experimental.numpy.nanmean(data, axis=1)
            non_empty_frames_idxs = tf.experimental.numpy.nanmean(non_empty_frames_idxs, axis=1)

            # Fill NaN Values With 0
            data = tf.where(tf.math.is_nan(data), 0.0, data)
            
            return data, non_empty_frames_idxs
    
preprocess_layer = PreprocessLayer()

2023-04-30 09:53:25.306227: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-04-30 09:53:25.882066: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22290 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:68:00.0, compute capability: 8.6


# Interpolate NaN Values

In [27]:
"""
    face: 0:468
    left_hand: 468:489
    pose: 489:522
    right_hand: 522:544
        
"""
def get_data(file_path):
    # Load Raw Data
    data = load_relevant_data_subset(file_path)
    # Process Data Using Tensorflow
    data = preprocess_layer(data)
    
    return data

# 目視確認

In [28]:
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from IPython.display import HTML

In [29]:
train["file_path"].iloc[18]

'/home/jovyan/pvc-nfs-skobayashi/competition/kaggle/2023_kaggle_gislr/data/kaggle/train_landmark_files/26734/1001145816.parquet'

In [30]:
# data, non_empty_frame_idxs = get_data("/home/jovyan/pvc-nfs-skobayashi/competition/kaggle/2023_kaggle_gislr/data/kaggle/train_landmark_files/32319/1000278229.parquet")

In [31]:
# def animation_frame_xy(f):
#     frame = data[f]
#     lip = frame[LIPS_START:LIPS_START+40,:]
#     left = frame[40:61,:]
#     pose = frame[61:, :]
    
#     ax.clear()
#     ax.plot(lip[:,0], lip[:,1], '.')
#     ax.plot(left[:,0], left[:,1], '.')
#     ax.plot(pose[:,0], pose[:,1], '.')
        
#     plt.xlim(0, 1)
#     plt.ylim(0, 1)
#     plt.title(f"frame:{f}")

In [32]:
# # print(f"The sign being shown here is: {train_df[train_df.path==f'{path_to_sign}'].sign.values[0]}")

# ## These values set the limits on the graph to stabilize the video
# xmin = np.nanmin(data[:,:,0])
# xmax = np.nanmax(data[:,:,0])
# ymin = np.nanmin(data[:,:,1])
# ymax = np.nanmax(data[:,:,1])

# fig, ax = plt.subplots()
# l, = ax.plot([], [])
# animation = FuncAnimation(fig, func=animation_frame_xy, frames=list(range(len(data))))
# HTML(animation.to_html5_video())

# Create Dataset

In [33]:
# Get the full dataset
def preprocess_data():
    # Create arrays to save data
    X = np.zeros([N_SAMPLES, INPUT_SIZE, N_COLS, N_DIMS], dtype=np.float32)
    y = np.zeros([N_SAMPLES], dtype=np.int32)
    NON_EMPTY_FRAME_IDXS = np.full([N_SAMPLES, INPUT_SIZE], -1, dtype=np.float32)

    # Fill X/y
    for row_idx, (file_path, sign_ord) in enumerate(tqdm(train[['file_path', 'sign_ord']].values)):
        # Log message every 5000 samples
        if row_idx % 5000 == 0:
            print(f'Generated {row_idx}/{N_SAMPLES}')

        data, non_empty_frame_idxs = get_data(file_path)
        X[row_idx] = data
        y[row_idx] = sign_ord
        NON_EMPTY_FRAME_IDXS[row_idx] = non_empty_frame_idxs
        # Sanity check, data should not contain NaN values
        if np.isnan(data).sum() > 0:
            print(row_idx)
            return data

    # Save X/y
    np.save(f'{out_root}X.npy', X)
    np.save(f'{out_root}y.npy', y)
    np.save(F'{out_root}NON_EMPTY_FRAME_IDXS.npy', NON_EMPTY_FRAME_IDXS)
    

In [34]:
# def convert_row_parallel(row, right_handed=True):
#     data, non_empty_frame_idxs = get_data(row["file_path"])
#     return data, row["sign_ord"], non_empty_frame_idxs

# def func_parallel(idx):
#     row = train.iloc[idx]
#     (x,y, non_empty) = convert_row_parallel(row)
#     return [x,y, non_empty]

In [35]:
# from multiprocessing import Pool

# if not os.path.exists(f'{out_root}/X.npy'):     
#     length =len(train)
#     if False:
#         with Pool(8) as pool:
#             r = range(length)
#             imap = pool.imap(func_parallel, r)
#             result_all  = list(tqdm(imap, total=len(r)))        
#     else:
#         result_all = []
#         for idx in tqdm(range(length)):
#             metas = func_parallel(idx)
#             result_all.append(metas)

In [36]:
# if not os.path.exists(f'{out_root}/X.npy'):  
#     X = np.zeros([N_SAMPLES, INPUT_SIZE, N_COLS, N_DIMS], dtype=np.float32)
#     y = np.zeros([N_SAMPLES], dtype=np.int32)
#     NON_EMPTY_FRAME_IDXS = np.full([N_SAMPLES, INPUT_SIZE], -1, dtype=np.float32)
    
#     for i, (x_,y_,z_) in tqdm(enumerate(result_all), total=len(result_all)):
#         X[i,:] = x_
#         y[i] = y_
#         NON_EMPTY_FRAME_IDXS[i] = z_

#     np.save(f'{out_root}X.npy', X)
#     np.save(f'{out_root}y.npy', y)
#     np.save(F'{out_root}NON_EMPTY_FRAME_IDXS.npy', NON_EMPTY_FRAME_IDXS)

In [37]:
if not os.path.exists(f'{out_root}/X.npy'):
    preprocess_data()

  0%|          | 0/94477 [00:00<?, ?it/s]

Generated 0/94477
Generated 5000/94477
Generated 10000/94477
Generated 15000/94477
Generated 20000/94477
Generated 25000/94477
Generated 30000/94477
Generated 35000/94477
Generated 40000/94477
Generated 45000/94477
Generated 50000/94477
Generated 55000/94477
Generated 60000/94477
Generated 65000/94477
Generated 70000/94477
Generated 75000/94477
Generated 80000/94477
Generated 85000/94477
Generated 90000/94477


In [38]:
if not os.path.exists(f'{out_root}/y_val.npy'):

    # Save X/y
    X = np.load(f'{out_root}/X.npy')
    y = np.load(f'{out_root}/y.npy')
    NON_EMPTY_FRAME_IDXS = np.load(f'{out_root}/NON_EMPTY_FRAME_IDXS.npy')

    # Save Train
    X_train = X[train_idxs]
    NON_EMPTY_FRAME_IDXS_TRAIN = NON_EMPTY_FRAME_IDXS[train_idxs]
    y_train = y[train_idxs]
    np.save(f'{out_root}/X_train.npy', X_train)
    np.save(f'{out_root}/y_train.npy', y_train)
    np.save(f'{out_root}/NON_EMPTY_FRAME_IDXS_TRAIN.npy', NON_EMPTY_FRAME_IDXS_TRAIN)

    # Save Validation
    X_val = X[val_idxs]
    NON_EMPTY_FRAME_IDXS_VAL = NON_EMPTY_FRAME_IDXS[val_idxs]
    y_val = y[val_idxs]
    np.save(f'{out_root}/X_val.npy', X_val)
    np.save(f'{out_root}/y_val.npy', y_val)
    np.save(f'{out_root}/NON_EMPTY_FRAME_IDXS_VAL.npy', NON_EMPTY_FRAME_IDXS_VAL)

    # Split Statistics
    print(f'Patient ID Intersection Train/Val: {set(PARTICIPANT_IDS[train_idxs]).intersection(PARTICIPANT_IDS[val_idxs])}')
    print(f'X_train shape: {X_train.shape}, X_val shape: {X_val.shape}')
    print(f'y_train shape: {y_train.shape}, y_val shape: {y_val.shape}')


Patient ID Intersection Train/Val: {37779, 61333, 36257, 49445, 37055, 32319, 16069, 34503, 22343, 55372, 30680, 27610, 25571, 18796, 26734, 4718, 28656, 53618, 29302, 2044, 62590}
X_train shape: (75581, 32, 368, 3), X_val shape: (18896, 32, 368, 3)
y_train shape: (75581,), y_val shape: (18896,)


# 水増し１

In [39]:
class NanInterpolation(tf.keras.layers.Layer):
    def __init__(self,  **kwargs):
        super(NanInterpolation, self).__init__(**kwargs)
        self.order = 3
        self.limit = 3
        
    def call(self, inputs, training=False):
        if training:
            # 入力データをNumpy配列に変換
            data = inputs.numpy()

            # 補間処理
            interpolated_data = []
            for i in range(data.shape[-1]):
                df = pd.DataFrame(data[..., i])
                # df = df.interpolate(method="spline", order=self.order, limit=self.limit, limit_direction='both')
                # df = df.interpolate(method="spline", order=self.order, limit_direction='both')
                df = df.interpolate(limit_direction='both')
                # df.fillna(method="ffill", inplace=True)   
                # df.fillna(method="bfill", inplace=True)
                interpolated_data.append(df.to_numpy())

            # 補間後のデータをテンソルに変換
            result = np.stack(interpolated_data, axis=-1)
            inputs = tf.convert_to_tensor(result, dtype=inputs.dtype)
            
        return inputs

    
class Scaling3D(tf.keras.layers.Layer):
    def __init__(self, scale_range=(0.9, 1.1), **kwargs):
        super(Scaling3D, self).__init__(**kwargs)
        self.scale_range = scale_range

    def call(self, inputs, training=False):
        if training:
            # ランダムなスケーリング係数を生成
            scale_factor = tf.random.uniform(
                (), minval=self.scale_range[0], maxval=self.scale_range[1]
            )

            # ポーズデータにスケーリング係数を適用
            inputs = inputs * scale_factor

        return inputs 

class TimeSeriesAugmentation(tf.keras.layers.Layer):
    def __init__(self, framerate_factor_range=(0.8, 1.2),  **kwargs):
        super(TimeSeriesAugmentation, self).__init__(**kwargs)
        self.framerate_factor_range = framerate_factor_range

    def call(self, inputs, training=False):
        if training:
            # フレームレート変更
            framerate_factor = tf.random.uniform(
                (), minval=self.framerate_factor_range[0], maxval=self.framerate_factor_range[1]
            )
            new_length = tf.cast(tf.cast(tf.shape(inputs)[0], tf.float32) * framerate_factor, tf.int32)
            inputs_expanded = tf.expand_dims(inputs, axis=0)
            resized_inputs = tf.image.resize(inputs_expanded, (new_length, tf.shape(inputs)[-2]))
            inputs = resized_inputs[0]

        return inputs


In [40]:
out_root2 = out_root.replace("base", "augment")
os.makedirs(out_root2, exist_ok=True)

In [41]:
"""
    face: 0:468
    left_hand: 468:489
    pose: 489:522
    right_hand: 522:544
        
"""
def get_data2(file_path):
    # Load Raw Data
    data = load_relevant_data_subset(file_path)
    
    # augmentation
    data = NanInterpolation()(data, training=True)
    data = Scaling3D()(data, training=True)
    data = TimeSeriesAugmentation()(data, training=True)
    
    # Process Data Using Tensorflow
    data = preprocess_layer(data)
    
    return data

In [42]:
# def convert_row_parallel(row, right_handed=True):
#     data, non_empty_frame_idxs = get_data2(row["file_path"])
#     return data, row["sign_ord"], non_empty_frame_idxs

# def func_parallel(idx):
#     row = train.iloc[idx]
#     (x,y, non_empty) = convert_row_parallel(row)
#     return [x,y, non_empty]

In [43]:
# from multiprocessing import Pool

# if not os.path.exists(f'{out_root2}/X.npy'):     
#     length =len(train)
#     if False:
#         with Pool(8) as pool: #os.cpu_count()
#             r = range(length)
#             imap = pool.imap(func_parallel, r)
#             result_all  = list(tqdm(imap, total=len(r)))        
#     else:
#         result_all = []
#         for idx in tqdm(range(length)):
#             metas = func_parallel(idx)
#             result_all.append(metas)

In [44]:
# if not os.path.exists(f'{out_root2}/X.npy'):  
#     X = np.zeros([N_SAMPLES, INPUT_SIZE, N_COLS, N_DIMS], dtype=np.float32)
#     y = np.zeros([N_SAMPLES], dtype=np.int32)
#     NON_EMPTY_FRAME_IDXS = np.full([N_SAMPLES, INPUT_SIZE], -1, dtype=np.float32)
    
#     for i, (x_,y_,z_) in tqdm(enumerate(result_all), total=len(result_all)):
#         X[i,:] = x_
#         y[i] = y_
#         NON_EMPTY_FRAME_IDXS[i] = z_

#     np.save(f'{out_root2}X.npy', X)
#     np.save(f'{out_root2}y.npy', y)
#     np.save(F'{out_root2}NON_EMPTY_FRAME_IDXS.npy', NON_EMPTY_FRAME_IDXS)

In [45]:
# Get the full dataset
def preprocess_data2():
    # Create arrays to save data
    X = np.zeros([N_SAMPLES, INPUT_SIZE, N_COLS, N_DIMS], dtype=np.float32)
    y = np.zeros([N_SAMPLES], dtype=np.int32)
    NON_EMPTY_FRAME_IDXS = np.full([N_SAMPLES, INPUT_SIZE], -1, dtype=np.float32)

    # Fill X/y
    for row_idx, (file_path, sign_ord) in enumerate(tqdm(train[['file_path', 'sign_ord']].values)):
        # Log message every 5000 samples
        if row_idx % 5000 == 0:
            print(f'Generated {row_idx}/{N_SAMPLES}')

        data, non_empty_frame_idxs = get_data2(file_path)
        X[row_idx] = data
        y[row_idx] = sign_ord
        NON_EMPTY_FRAME_IDXS[row_idx] = non_empty_frame_idxs
        # Sanity check, data should not contain NaN values
        if np.isnan(data).sum() > 0:
            print(row_idx)
            return data

    # Save X/y
    np.save(f'{out_root2}X.npy', X)
    np.save(f'{out_root2}y.npy', y)
    np.save(F'{out_root2}NON_EMPTY_FRAME_IDXS.npy', NON_EMPTY_FRAME_IDXS)
    

In [46]:
if not os.path.exists(f'{out_root2}/X.npy'):
    preprocess_data2()

  0%|          | 0/94477 [00:00<?, ?it/s]

Generated 0/94477
Generated 5000/94477
Generated 10000/94477
Generated 15000/94477
Generated 20000/94477
Generated 25000/94477
Generated 30000/94477
Generated 35000/94477
Generated 40000/94477
Generated 45000/94477
Generated 50000/94477
Generated 55000/94477
Generated 60000/94477
Generated 65000/94477
Generated 70000/94477
Generated 75000/94477
Generated 80000/94477
Generated 85000/94477
Generated 90000/94477


In [47]:
if not os.path.exists(f'{out_root2}/y_val.npy'):

    # Save X/y
    X = np.load(f'{out_root2}/X.npy')
    y = np.load(f'{out_root2}/y.npy')
    NON_EMPTY_FRAME_IDXS = np.load(f'{out_root}/NON_EMPTY_FRAME_IDXS.npy')

    # Save Train
    X_train = X[train_idxs]
    NON_EMPTY_FRAME_IDXS_TRAIN = NON_EMPTY_FRAME_IDXS[train_idxs]
    y_train = y[train_idxs]
    np.save(f'{out_root2}/X_train.npy', X_train)
    np.save(f'{out_root2}/y_train.npy', y_train)
    np.save(f'{out_root2}/NON_EMPTY_FRAME_IDXS_TRAIN.npy', NON_EMPTY_FRAME_IDXS_TRAIN)

    # Save Validation
    X_val = X[val_idxs]
    NON_EMPTY_FRAME_IDXS_VAL = NON_EMPTY_FRAME_IDXS[val_idxs]
    y_val = y[val_idxs]
    np.save(f'{out_root2}/X_val.npy', X_val)
    np.save(f'{out_root2}/y_val.npy', y_val)
    np.save(f'{out_root2}/NON_EMPTY_FRAME_IDXS_VAL.npy', NON_EMPTY_FRAME_IDXS_VAL)

    # Split Statistics
    print(f'Patient ID Intersection Train/Val: {set(PARTICIPANT_IDS[train_idxs]).intersection(PARTICIPANT_IDS[val_idxs])}')
    print(f'X_train shape: {X_train.shape}, X_val shape: {X_val.shape}')
    print(f'y_train shape: {y_train.shape}, y_val shape: {y_val.shape}')


Patient ID Intersection Train/Val: {37779, 61333, 36257, 49445, 37055, 32319, 16069, 34503, 22343, 55372, 30680, 27610, 25571, 18796, 26734, 4718, 28656, 53618, 29302, 2044, 62590}
X_train shape: (75581, 32, 368, 3), X_val shape: (18896, 32, 368, 3)
y_train shape: (75581,), y_val shape: (18896,)


In [48]:
assert False

AssertionError: 

# 水増し２

In [None]:
out_root3 = out_root.replace("base", "augment_v02")
os.makedirs(out_root3, exist_ok=True)

In [None]:
"""
    face: 0:468
    left_hand: 468:489
    pose: 489:522
    right_hand: 522:544
        
"""
def get_data3(file_path):
    # Load Raw Data
    data = load_relevant_data_subset(file_path)
    
    # augmentation
    data = NanInterpolation()(data, training=True)
    data = Scaling3D()(data, training=True)
    data = TimeSeriesAugmentation()(data, training=True)
    
    # Process Data Using Tensorflow
    data = preprocess_layer(data)
    
    return data

In [None]:
# Get the full dataset
def preprocess_data3():
    # Create arrays to save data
    X = np.zeros([N_SAMPLES, INPUT_SIZE, N_COLS, N_DIMS], dtype=np.float32)
    y = np.zeros([N_SAMPLES], dtype=np.int32)
    NON_EMPTY_FRAME_IDXS = np.full([N_SAMPLES, INPUT_SIZE], -1, dtype=np.float32)

    # Fill X/y
    for row_idx, (file_path, sign_ord) in enumerate(tqdm(train[['file_path', 'sign_ord']].values)):
        # Log message every 5000 samples
        if row_idx % 5000 == 0:
            print(f'Generated {row_idx}/{N_SAMPLES}')

        data, non_empty_frame_idxs = get_data3(file_path)
        X[row_idx] = data
        y[row_idx] = sign_ord
        NON_EMPTY_FRAME_IDXS[row_idx] = non_empty_frame_idxs
        # Sanity check, data should not contain NaN values
        if np.isnan(data).sum() > 0:
            print(row_idx)
            return data

    # Save X/y
    np.save(f'{out_root3}X.npy', X)
    np.save(f'{out_root3}y.npy', y)
    np.save(F'{out_root3}NON_EMPTY_FRAME_IDXS.npy', NON_EMPTY_FRAME_IDXS)
    

In [None]:
if not os.path.exists(f'{out_root3}/X.npy'):
    preprocess_data3()

In [None]:
if not os.path.exists(f'{out_root3}/y_val.npy'):

    # Save X/y
    X = np.load(f'{out_root3}/X.npy')
    y = np.load(f'{out_root3}/y.npy')
    NON_EMPTY_FRAME_IDXS = np.load(f'{out_root}/NON_EMPTY_FRAME_IDXS.npy')

    # Save Train
    X_train = X[train_idxs]
    NON_EMPTY_FRAME_IDXS_TRAIN = NON_EMPTY_FRAME_IDXS[train_idxs]
    y_train = y[train_idxs]
    np.save(f'{out_root3}/X_train.npy', X_train)
    np.save(f'{out_root3}/y_train.npy', y_train)
    np.save(f'{out_root3}/NON_EMPTY_FRAME_IDXS_TRAIN.npy', NON_EMPTY_FRAME_IDXS_TRAIN)

    # Save Validation
    X_val = X[val_idxs]
    NON_EMPTY_FRAME_IDXS_VAL = NON_EMPTY_FRAME_IDXS[val_idxs]
    y_val = y[val_idxs]
    np.save(f'{out_root3}/X_val.npy', X_val)
    np.save(f'{out_root3}/y_val.npy', y_val)
    np.save(f'{out_root3}/NON_EMPTY_FRAME_IDXS_VAL.npy', NON_EMPTY_FRAME_IDXS_VAL)

    # Split Statistics
    print(f'Patient ID Intersection Train/Val: {set(PARTICIPANT_IDS[train_idxs]).intersection(PARTICIPANT_IDS[val_idxs])}')
    print(f'X_train shape: {X_train.shape}, X_val shape: {X_val.shape}')
    print(f'y_train shape: {y_train.shape}, y_val shape: {y_val.shape}')
