Hello Fellow Kagglers,

This notebook demonstrates the data processing and training process in Tensorflow.

I am excited about this competition, because my Master Thesis was on sign language recognition.

**Data Processing**

Only lips, hands and arm pose coordinates are used.

A custom Tensorflow layer handles the data processing. In short, it filters all frames without coordinates for the hands and downsamples the input to 32 frames if it is too long.

**Model**

A transformer based model is used. The embedding layer makes an ambedding per landmark(lips/left hand/right hand/arm pose) and merges these embedding with fully connected layers. The transformer consists of just 2 blocks with a simple mean pooling and fully connected layers for classification.


**V2**

* Learnable attention weights for each landmark
* Removed layer normalisation in embedding to prevent double layer normalisation at the end of embedding and start of transformer
* Removed additional fully connected layer in head before classification layer

**V3**

* Using all data for training
* Increased final embedding size 384 -> 512
* Added 10% dropout in classification layer
* Increased number of epoch 50 -> 100
* Number of transformer heads 8 -> 4

If you have any feedback or questions, please feel free to leave a comment.

Expect updates in the coming weeks!

In [1]:
# !pip install -q tensorflow_addons
# !pip install -q wandb
# !pip install -q pyarrow
# !pip install -q fastparquet

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_addons as tfa
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sn
import datetime

from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split, GroupShuffleSplit

import layers
from utils.Utils import print_shape_dtype, pd_read_s3_parquet, upload_file, get_dataset_partitions_tf 

import glob
import sys
import os
import math
import gc
import sys
import sklearn
import scipy
import boto3
import io
import wandb
import json

2023-04-15 19:21:50.916102: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.

TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



In [2]:
tf.get_logger().setLevel('INFO')

# Config

In [10]:
with open("./config/config.json") as fp:
    config = json.load(fp)

In [5]:
# # Setup Weights and Biases
# wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mromendiratta[0m ([33mw251-asl-fp[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [6]:
# LOG_DIR = './logs/fit'
# wandb.tensorboard.patch(root_logdir= LOG_DIR)
# wandb.init(project='w251-GISLR', 
#            config=config,
#           sync_tensorboard=True)

In [3]:
USE_VAL = True

DIM_NAMES = ['x', 'y']
SEED = 42

# Samples

In [8]:
# s3_client = boto3.client(
#     "s3"
# )

In [9]:
# AWS_S3_BUCKET = "w251-asl-data"
# TRAIN_CSV_FILE = "raw-data/train.csv"

In [10]:
# # data_version = config["DATA_VERSION"]

# data_version = 1


# X = s3_client.get_object(Bucket=AWS_S3_BUCKET, Key=f'processed-data/v{data_version}/X.npy')
# X = np.load(io.BytesIO(X['Body'].read()))

# y = s3_client.get_object(Bucket=AWS_S3_BUCKET, Key=f'processed-data/v{data_version}/y.npy')
# y = np.load(io.BytesIO(y['Body'].read()))

# NON_EMPTY_FRAME_IDXS = s3_client.get_object(Bucket=AWS_S3_BUCKET, Key=f'processed-data/v{data_version}/NON_EMPTY_FRAME_IDXS.npy')
# NON_EMPTY_FRAME_IDXS = np.load(io.BytesIO(NON_EMPTY_FRAME_IDXS['Body'].read()))
    
# print_shape_dtype([X, y, NON_EMPTY_FRAME_IDXS], ['X', 'y', 'NON_EMPTY_FRAME_IDXS'])
# print(f'# NaN Values X: {np.isnan(X).sum()}')

In [4]:
# Read in from local filesystem instead since reading from S3 takes too long. 

X = np.load("./X.npy")
y = np.load("./y.npy")
NON_EMPTY_FRAME_IDXS = np.load("./NON_EMPTY_FRAME_IDXS.npy")

print_shape_dtype([X, y, NON_EMPTY_FRAME_IDXS], ['X', 'y', 'NON_EMPTY_FRAME_IDXS'])
print(f'# NaN Values X: {np.isnan(X).sum()}')

X shape: (94477, 38, 454), dtype: float32
y shape: (94477,), dtype: int32
NON_EMPTY_FRAME_IDXS shape: (94477, 38), dtype: float32
# NaN Values X: 0


In [12]:
# def get_sample(X, y, NON_EMPTY_FRAME_IDXS, batch_size=config["BATCH_SIZE"]): #TODO: Change this in config.
    
#     # Arrays to store batch in
#     X_batch = np.zeros([batch_size, config["INPUT_SIZE"], config["N_COLS"], config["N_DIMS"]], dtype=np.float32)
#     y_batch = np.arange(0, batch_size, dtype=np.int32)
#     non_empty_frame_idxs_batch = np.zeros([batch_size, config["INPUT_SIZE"]], dtype=np.float32)
    
#     # Dictionary mapping ordinally encoded sign to corresponding sample indices
#     CLASS2IDXS = {}
#     for i in range(config["NUM_CLASSES"]):
#         CLASS2IDXS[i] = np.argwhere(y == i).squeeze().astype(np.int32)
            
#     while True:
#         # Fill batch arrays
#         for i in range(config["NUM_CLASSES"]):
#             idxs = np.random.choice(CLASS2IDXS[i], n)
#             X_batch[i*n:(i+1)*n] = X[idxs]
#             non_empty_frame_idxs_batch[i*n:(i+1)*n] = NON_EMPTY_FRAME_IDXS[idxs]
        
#         yield { 'frames': X_batch, 'non_empty_frame_idxs': non_empty_frame_idxs_batch }, y_batch

In [13]:
# # LIPS
# LIPS_MEAN_X = np.zeros([LIPS_IDXS.size], dtype=np.float32)
# LIPS_MEAN_Y = np.zeros([LIPS_IDXS.size], dtype=np.float32)
# LIPS_STD_X = np.zeros([LIPS_IDXS.size], dtype=np.float32)
# LIPS_STD_Y = np.zeros([LIPS_IDXS.size], dtype=np.float32)

# fig, axes = plt.subplots(3, 1, figsize=(15, config["N_DIMS"]*6))
   
# for col, ll in enumerate(tqdm( np.transpose(X[:,:,LIPS_IDXS], [2,3,0,1]).reshape([LIPS_IDXS.size, config["N_DIMS"], -1]) )):
#     for dim, l in enumerate(ll):
#         v = l[np.nonzero(l)]
#         if dim == 0: # X
#             LIPS_MEAN_X[col] = v.mean()
#             LIPS_STD_X[col] = v.std()
#         if dim == 1: # Y
#             LIPS_MEAN_Y[col] = v.mean()
#             LIPS_STD_Y[col] = v.std()
        
#         axes[dim].boxplot(v, notch=False, showfliers=False, positions=[col], whis=[5,95])
        
# for ax, dim_name in zip(axes, DIM_NAMES):
#     ax.set_title(f'Lips {dim_name.upper()} Dimension', size=24)
#     ax.tick_params(axis='x', labelsize=8)
#     ax.grid(axis='y')

# plt.subplots_adjust(hspace=0.50)
# plt.show()

# LIPS_MEAN = np.array([LIPS_MEAN_X, LIPS_MEAN_Y]).T
# LIPS_STD = np.array([LIPS_STD_X, LIPS_STD_Y]).T

In [14]:
# # LEFT HAND
# LEFT_HANDS_MEAN_X = np.zeros([LEFT_HAND_IDXS.size], dtype=np.float32)
# LEFT_HANDS_MEAN_Y = np.zeros([LEFT_HAND_IDXS.size], dtype=np.float32)
# LEFT_HANDS_STD_X = np.zeros([LEFT_HAND_IDXS.size], dtype=np.float32)
# LEFT_HANDS_STD_Y = np.zeros([LEFT_HAND_IDXS.size], dtype=np.float32)
# # RIGHT HAND
# RIGHT_HANDS_MEAN_X = np.zeros([RIGHT_HAND_IDXS.size], dtype=np.float32)
# RIGHT_HANDS_MEAN_Y = np.zeros([RIGHT_HAND_IDXS.size], dtype=np.float32)
# RIGHT_HANDS_STD_X = np.zeros([RIGHT_HAND_IDXS.size], dtype=np.float32)
# RIGHT_HANDS_STD_Y = np.zeros([RIGHT_HAND_IDXS.size], dtype=np.float32)

# fig, axes = plt.subplots(3, 1, figsize=(15, config["N_DIMS"]*6))
   
# for col, ll in enumerate(tqdm( np.transpose(X[:,:,HAND_IDXS], [2,3,0,1]).reshape([HAND_IDXS.size, config["N_DIMS"], -1]) )):
#     for dim, l in enumerate(ll):
#         v = l[np.nonzero(l)]
#         if dim == 0: # X
#             if col < RIGHT_HAND_IDXS.size: # LEFT HAND
#                 LEFT_HANDS_MEAN_X[col] = v.mean()
#                 LEFT_HANDS_STD_X[col] = v.std()
#             else:
#                 RIGHT_HANDS_MEAN_X[col - LEFT_HAND_IDXS.size] = v.mean()
#                 RIGHT_HANDS_STD_X[col - LEFT_HAND_IDXS.size] = v.std()
#         if dim == 1: # Y
#             if col < RIGHT_HAND_IDXS.size: # LEFT HAND
#                 LEFT_HANDS_MEAN_Y[col] = v.mean()
#                 LEFT_HANDS_STD_Y[col] = v.std()
#             else: # RIGHT HAND
#                 RIGHT_HANDS_MEAN_Y[col - LEFT_HAND_IDXS.size] = v.mean()
#                 RIGHT_HANDS_STD_Y[col - LEFT_HAND_IDXS.size] = v.std()
        
#         axes[dim].boxplot(v, notch=False, showfliers=False, positions=[col], whis=[5,95])
        
# for ax, dim_name in zip(axes, DIM_NAMES):
#     ax.set_title(f'Hands {dim_name.upper()} Dimension', size=24)
#     ax.tick_params(axis='x', labelsize=8)
#     ax.grid(axis='y')

# plt.subplots_adjust(hspace=0.50)
# plt.show()

# LEFT_HANDS_MEAN = np.array([LEFT_HANDS_MEAN_X, LEFT_HANDS_MEAN_Y]).T
# LEFT_HANDS_STD = np.array([LEFT_HANDS_STD_X, LEFT_HANDS_STD_Y]).T
# RIGHT_HANDS_MEAN = np.array([RIGHT_HANDS_MEAN_X, RIGHT_HANDS_MEAN_Y]).T
# RIGHT_HANDS_STD = np.array([RIGHT_HANDS_STD_X, RIGHT_HANDS_STD_Y]).T

In [15]:
# # POSE
# POSE_MEAN_X = np.zeros([POSE_IDXS.size], dtype=np.float32)
# POSE_MEAN_Y = np.zeros([POSE_IDXS.size], dtype=np.float32)
# POSE_STD_X = np.zeros([POSE_IDXS.size], dtype=np.float32)
# POSE_STD_Y = np.zeros([POSE_IDXS.size], dtype=np.float32)

# fig, axes = plt.subplots(3, 1, figsize=(15, config["N_DIMS"]*6))
   
# for col, ll in enumerate(tqdm( np.transpose(X[:,:,POSE_IDXS], [2,3,0,1]).reshape([POSE_IDXS.size, config["N_DIMS"], -1]) )):
#     for dim, l in enumerate(ll):
#         v = l[np.nonzero(l)]
#         if dim == 0: # X
#             POSE_MEAN_X[col] = v.mean()
#             POSE_STD_X[col] = v.std()
#         if dim == 1: # Y
#             POSE_MEAN_Y[col] = v.mean()
#             POSE_STD_Y[col] = v.std()
        
#         axes[dim].boxplot(v, notch=False, showfliers=False, positions=[col], whis=[5,95])
        
# for ax, dim_name in zip(axes, DIM_NAMES):
#     ax.set_title(f'Pose {dim_name.upper()} Dimension', size=24)
#     ax.tick_params(axis='x', labelsize=8)
#     ax.grid(axis='y')

# plt.subplots_adjust(hspace=0.50)
# plt.show()

# POSE_MEAN = np.array([POSE_MEAN_X, POSE_MEAN_Y]).T
# POSE_STD = np.array([POSE_STD_X, POSE_STD_Y]).T

# Model Config

In [5]:
TRANSFORMERV1 = False

# Epsilon value for layer normalisation
LAYER_NORM_EPS = 1e-6

# Dense layer units for landmarks
FACE_UNITS = 384
HANDS_UNITS = 384
POSE_UNITS = 384
# final embedding and transformer embedding size
UNITS = 512

# Transformer
NUM_BLOCKS = 2
MLP_RATIO = 2
NUM_HEADS = 8

# Dropout
EMBEDDING_DROPOUT = 0.00
MLP_DROPOUT_RATIO = 0.30
CLASSIFIER_DROPOUT_RATIO = 0.10

# Initiailizers
# INIT_HE_UNIFORM = tf.keras.initializers.he_uniform
INIT_GLOROT_UNIFORM = tf.keras.initializers.glorot_uniform
# INIT_ZEROS = tf.keras.initializers.constant(0.0)
# Activations
ACTIVATION = tf.keras.activations.gelu

VERBOSE = True

In [6]:
FACE_IDXS = [0, 6, 7, 11, 12, 13, 14, 15, 17, 22, 23, 24, 25, 26, 30, 31, 
                     33, 37, 38, 39, 40, 41, 42, 56, 61, 62, 72, 73, 74, 76, 77, 
                     78, 80, 81, 82, 84, 86, 87, 88, 89, 90, 91, 95, 96, 110, 112, 
                     113, 122, 128, 130, 133, 144, 145, 146, 153, 154, 155, 157, 158, 
                     159, 160, 161, 163, 168, 173, 178, 179, 180, 181, 183, 184, 185, 
                     188, 189, 190, 191, 193, 196, 197, 232, 233, 243, 244, 245, 246, 
                     247, 249, 252, 253, 254, 255, 256, 259, 260, 263, 267, 268, 269, 
                     270, 271, 272, 286, 291, 292, 302, 303, 304, 306, 307, 308, 310, 
                     311, 312, 314, 316, 317, 318, 319, 320, 321, 324, 325, 339, 341, 
                     351, 357, 359, 362, 373, 374, 375, 380, 381, 382, 384, 385, 386, 
                     387, 388, 390, 398, 402, 403, 404, 405, 407, 408, 409, 412, 413, 
                     414, 415, 417, 419, 453, 463, 464, 465, 466, 467]
POSE_IDXS = np.arange(489, 514)
LEFT_HAND_IDXS = np.arange(468, 489)
RIGHT_HAND_IDXS = np.arange(522, 543)

# All landmarks that are used for modeling. 
LANDMARK_IDXS = np.concatenate((FACE_IDXS, POSE_IDXS, LEFT_HAND_IDXS, RIGHT_HAND_IDXS))

# Indicies after landmarks have been filtered. 
FACE_START = 0
LEFT_HAND_START = len(FACE_IDXS)
POSE_START = LEFT_HAND_START + len(LEFT_HAND_IDXS)
RIGHT_HAND_START = POSE_START + len(POSE_IDXS)

# Length of landmarks.
FACE_LEN = len(FACE_IDXS)
POSE_LEN = POSE_IDXS.size
LEFT_HAND_LEN = LEFT_HAND_IDXS.size
RIGHT_HAND_LEN = RIGHT_HAND_IDXS.size

In [7]:
def get_model():
    # Inputs
    frames = tf.keras.layers.Input([config["INPUT_SIZE"], config["N_COLS"] * config["N_DIMS"]], dtype=tf.float32, name='FRAMES')
    non_empty_frame_idxs = tf.keras.layers.Input([config["INPUT_SIZE"]], dtype=tf.float32, name='NON_EMPTY_FRAME_IDXS')
    
    # Attention Mask
    mask = tf.cast(tf.math.not_equal(non_empty_frame_idxs, -1), tf.float32)
    mask = tf.expand_dims(mask, axis=2)
    
    # Slice out face indicies       
    face = tf.slice(frames, [0, 0, FACE_START], [-1, config["INPUT_SIZE"], FACE_LEN * 2])
    # face = tf.reshape(frames, [-1, config["INPUT_SIZE"], FACE_LEN*2])
    
     # Slice out left_hand indicies
    left_hand = tf.slice(frames, [0, 0, LEFT_HAND_START * 2], [-1, config["INPUT_SIZE"], LEFT_HAND_LEN * 2])
    # left_hand = tf.reshape(frames, [-1, config["INPUT_SIZE"], len(LEFT_HAND_IDXS)*2])

    # Slice out pose indicies
    pose = tf.slice(frames, [0, 0, POSE_START * 2], [-1, config["INPUT_SIZE"], POSE_LEN * 2])
    # pose = tf.reshape(frames, [-1, config["INPUT_SIZE"], len(POSE_IDXS)*2])

    # Slice out right_hand indicies
    right_hand = tf.slice(frames, [0, 0, RIGHT_HAND_START * 2], [-1, config["INPUT_SIZE"], RIGHT_HAND_LEN * 2])
    # right_hand = tf.reshape(frames, [-1, config["INPUT_SIZE"], len(RIGHT_HAND_IDXS)*2])
    
    embedding_layer = layers.Embedding(config["INPUT_SIZE"], FACE_UNITS, HANDS_UNITS, POSE_UNITS, UNITS, ACTIVATION)
    x = embedding_layer(face, left_hand, right_hand, pose, non_empty_frame_idxs)
    transformer_input_shape = x.shape
    
    if (TRANSFORMERV1):
        # Encoder Transformer Blocks
        transformer_layer = layers.Transformer(NUM_BLOCKS, LAYER_NORM_EPS, UNITS, MLP_RATIO, MLP_DROPOUT_RATIO, ACTIVATION)
        x = transformer_layer(x, mask)
    else:
        encoder_input_shape = transformer_input_shape
        for _ in range(NUM_BLOCKS):
            x = layers.TransformerV2(encoder_input_shape, NUM_HEADS, UNITS, MLP_DROPOUT_RATIO, LAYER_NORM_EPS)(x)
            encoder_input_shape = x.shape[1:]  # Update the input shape for the next encoder
    
    # Pooling
    x = tf.reduce_sum(x * mask, axis=1) / tf.reduce_sum(mask, axis=1)
    # Classifier Dropout
    x = tf.keras.layers.Dropout(CLASSIFIER_DROPOUT_RATIO)(x)
    # Classification Layer
    x = tf.keras.layers.Dense(config["NUM_CLASSES"], activation=tf.keras.activations.softmax, kernel_initializer=INIT_GLOROT_UNIFORM)(x)
    
    outputs = x
    
    # Create Tensorflow Model
    model = tf.keras.models.Model(inputs=[frames, non_empty_frame_idxs], outputs=outputs)
    
    # Simple Categorical Crossentropy Loss
    loss = tf.keras.losses.SparseCategoricalCrossentropy()
    
    # Adam Optimizer with weight decay
    optimizer = tfa.optimizers.AdamW(learning_rate=config["LEARNING_RATE"], weight_decay=config["WEIGHT_DECAY"])
    
    # TopK Metrics
    metrics = [
        tf.keras.metrics.SparseCategoricalAccuracy(name='acc'),
        tf.keras.metrics.SparseTopKCategoricalAccuracy(k=5, name='top_5_acc'),
        tf.keras.metrics.SparseTopKCategoricalAccuracy(k=10, name='top_10_acc'),
    ]
    
    model.compile(loss=loss, optimizer=optimizer, metrics=metrics, run_eagerly=True)
    
    return model

In [8]:
# Load dataset
# with tf.device('CPU'):
#     dataset = tf.data.Dataset.from_tensor_slices(({"FRAMES": X, "NON_EMPTY_FRAME_IDXS": NON_EMPTY_FRAME_IDXS}, y))
    
with tf.device('CPU'):
    dataset = tf.data.Dataset.from_tensor_slices(({"FRAMES": X[:100], "NON_EMPTY_FRAME_IDXS": NON_EMPTY_FRAME_IDXS[:100]}, y[:100]))

train, validation, test = get_dataset_partitions_tf(dataset, X.shape[0], train_split=0.8, val_split=0.1, 
                                                test_split=0.1, shuffle=True, shuffle_size=10000, seed=SEED)


2023-04-15 19:22:21.035864: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-04-15 19:22:21.056867: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-04-15 19:22:21.058900: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

In [47]:
# inputs = X[:2]
# attn_output = tf.keras.layers.MultiHeadAttention(num_heads=8, key_dim=inputs.shape[-1])(inputs, inputs)
# attn_output = tf.keras.layers.Dropout(.10)(attn_output)
# out1 = tf.keras.layers.LayerNormalization(epsilon=.000001)(inputs + attn_output)
# ff_output = tf.keras.layers.Dense(512, activation='relu')(out1)
# ff_output = tf.keras.layers.Dropout(.10)(ff_output)
# ff_output = tf.keras.layers.Dense(inputs.shape[-1])(ff_output)  # Add this line to match the dimensions
# out2 = tf.keras.layers.LayerNormalization(epsilon=.000001)(out1 + ff_output)
# out2

<tf.Tensor: shape=(2, 38, 454), dtype=float32, numpy=
array([[[ 0.89509386, -0.9504263 ,  0.1310055 , ..., -0.1938196 ,
          1.9093127 , -0.4845211 ],
        [ 0.9115601 , -0.9515057 ,  0.13027714, ..., -0.20001315,
          1.9195338 , -0.4888235 ],
        [ 0.90753496, -0.95507544,  0.12991904, ..., -0.1993928 ,
          1.9261645 , -0.4882155 ],
        ...,
        [ 0.6213834 , -0.8534421 ,  0.59416854, ...,  1.3270121 ,
         -0.18978399,  0.66611487],
        [ 0.6213834 , -0.8534421 ,  0.59416854, ...,  1.3270121 ,
         -0.18978399,  0.66611487],
        [ 0.6213834 , -0.8534421 ,  0.59416854, ...,  1.3270121 ,
         -0.18978399,  0.66611487]],

       [[ 1.0769463 , -1.086969  ,  0.31157184, ...,  0.04079201,
          1.5522891 , -0.16474871],
        [ 1.0775402 , -1.0897982 ,  0.311565  , ...,  0.03974567,
          1.5606757 , -0.16549842],
        [ 1.0716054 , -1.0900133 ,  0.31049377, ...,  0.04017531,
          1.56293   , -0.1663104 ],
        ...,


In [11]:
model = get_model()
model.evaluate(dataset.batch(2))

2023-04-15 19:23:07.336129: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_2' with dtype int32 and shape [100]
	 [[{{node Placeholder/_2}}]]
2023-04-15 19:23:07.996645: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:637] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.




[6.172268867492676,
 0.009999999776482582,
 0.03999999910593033,
 0.03999999910593033]

In [None]:
# Plot model summary
model.summary(expand_nested=True)

In [None]:
tf.keras.utils.plot_model(model, show_shapes=True, show_dtype=True, show_layer_names=True, expand_nested=True, show_layer_activations=True)

# Weight Initialization

In [None]:
# N = 32
# y_pred = model.predict(dummy_dataset, verbose=VERBOSE, steps=N).flatten()

# plt.figure(figsize=(12,5))
# plt.title(f'Softmax Output Initialized Model | µ={y_pred.mean():.3f}, σ={y_pred.std():.3f}', pad=25)
# pd.Series(y_pred).plot(kind='hist', bins=128, label='Class Probability')
# plt.xlim(0, max(y_pred) * 1.1)
# plt.grid()
# plt.legend()
# plt.show()

# Learning Rate Scheduler

In [12]:
def lrfn(current_step, num_warmup_steps, lr_max, num_cycles=0.50, num_training_steps=config["N_EPOCHS"], warm_method='log'):
    
    if current_step < num_warmup_steps:
        if warm_method == 'log':
            return lr_max * 0.10 ** (num_warmup_steps - current_step)
        else:
            return lr_max * 2 ** -(num_warmup_steps - current_step)
    else:
        progress = float(current_step - num_warmup_steps) / float(max(1, num_training_steps - num_warmup_steps))

        return max(0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress))) * lr_max

In [13]:
def plot_lr_schedule(lr_schedule, epochs):
    fig = plt.figure(figsize=(20, 10))
    plt.plot([None] + lr_schedule + [None])
    # X Labels
    x = np.arange(1, epochs + 1)
    x_axis_labels = [i if epochs <= 40 or i % 5 == 0 or i == 1 else None for i in range(1, epochs + 1)]
    plt.xlim([1, epochs])
    plt.xticks(x, x_axis_labels) # set tick step to 1 and let x axis start at 1
    
    # Increase y-limit for better readability
    plt.ylim([0, max(lr_schedule) * 1.1])
    
    # Title
    schedule_info = f'start: {lr_schedule[0]:.1E}, max: {max(lr_schedule):.1E}, final: {lr_schedule[-1]:.1E}'
    plt.title(f'Step Learning Rate Schedule, {schedule_info}', size=18, pad=12)
    
    # Plot Learning Rates
    for x, val in enumerate(lr_schedule):
        if epochs <= 40 or x % 5 == 0 or x is epochs - 1:
            if x < len(lr_schedule) - 1:
                if lr_schedule[x - 1] < val:
                    ha = 'right'
                else:
                    ha = 'left'
            elif x == 0:
                ha = 'right'
            else:
                ha = 'left'
            plt.plot(x + 1, val, 'o', color='black');
            offset_y = (max(lr_schedule) - min(lr_schedule)) * 0.02
            plt.annotate(f'{val:.1E}', xy=(x + 1, val + offset_y), size=12, ha=ha)
    
    plt.xlabel('Epoch', size=16, labelpad=5)
    plt.ylabel('Learning Rate', size=16, labelpad=5)
    plt.grid()
    plt.show()

# Learning rate for encoder
LR_SCHEDULE = [lrfn(step, num_warmup_steps=config["N_WARMUP_EPOCHS"], lr_max=config["LEARNING_RATE"], num_cycles=0.50) for step in range(config["N_EPOCHS"])]
# Plot Learning Rate Schedule
# plot_lr_schedule(LR_SCHEDULE, epochs=config["N_EPOCHS"])
# Learning Rate Callback
lr_callback = tf.keras.callbacks.LearningRateScheduler(lambda step: LR_SCHEDULE[step], verbose=1)

In [14]:
# Custom callback to update weight decay with learning rate
class WeightDecayCallback(tf.keras.callbacks.Callback):
    def __init__(self, wd_ratio=config['WD_RATIO']):
        self.step_counter = 0
        self.wd_ratio = wd_ratio
    
    def on_epoch_begin(self, epoch, logs=None):
        model.optimizer.weight_decay = model.optimizer.learning_rate * self.wd_ratio
        print(f'learning rate: {model.optimizer.learning_rate.numpy():.2e}, weight decay: {model.optimizer.weight_decay.numpy():.2e}')

# Weight Decay Callback

# Performance Benchmark

In [None]:
# %%timeit -n 100
# # Verify model prediction is <<<100ms
# model.predict_on_batch({ 'FRAMES': X[:1], 'NON_EMPTY_FRAME_IDXS': NON_EMPTY_FRAME_IDXS[:1] })
# pass

# Training

# Train

In [15]:
# Clear all models in GPU
tf.keras.backend.clear_session()

# Get new fresh model
model = get_model()

# Sanity Check
model.summary()

log_dir = "./logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
# stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
# Actual Training
history = model.fit(
        train.batch(20),
        epochs=config["N_EPOCHS"],
        validation_data=validation.batch(config["BATCH_SIZE_VAL"]),
        callbacks=[
            lr_callback,
            WeightDecayCallback(config["WD_RATIO"]),
            tensorboard_callback
          ],
        verbose = VERBOSE,
    )

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 FRAMES (InputLayer)            [(None, 38, 454)]    0           []                               
                                                                                                  
 tf.slice (TFOpLambda)          (None, 38, 320)      0           ['FRAMES[0][0]']                 
                                                                                                  
 tf.slice_1 (TFOpLambda)        (None, 38, 42)       0           ['FRAMES[0][0]']                 
                                                                                                  
 tf.slice_3 (TFOpLambda)        (None, 38, 42)       0           ['FRAMES[0][0]']                 
                                                                                              

2023-04-15 19:23:29.515256: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_2' with dtype int32 and shape [100]
	 [[{{node Placeholder/_2}}]]




2023-04-15 19:23:30.350806: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_2' with dtype int32 and shape [100]
	 [[{{node Placeholder/_2}}]]



Epoch 2: LearningRateScheduler setting learning rate to 0.0009997532801828658.
learning rate: 1.00e-03, weight decay: 5.00e-05
Epoch 2/100

Epoch 3: LearningRateScheduler setting learning rate to 0.0009990133642141358.
learning rate: 9.99e-04, weight decay: 5.00e-05
Epoch 3/100

Epoch 4: LearningRateScheduler setting learning rate to 0.00099778098230154.
learning rate: 9.98e-04, weight decay: 4.99e-05
Epoch 4/100

Epoch 5: LearningRateScheduler setting learning rate to 0.000996057350657239.
learning rate: 9.96e-04, weight decay: 4.98e-05
Epoch 5/100

Epoch 6: LearningRateScheduler setting learning rate to 0.0009938441702975688.
learning rate: 9.94e-04, weight decay: 4.97e-05
Epoch 6/100

Epoch 7: LearningRateScheduler setting learning rate to 0.0009911436253643444.
learning rate: 9.91e-04, weight decay: 4.96e-05
Epoch 7/100

Epoch 8: LearningRateScheduler setting learning rate to 0.0009879583809693738.
learning rate: 9.88e-04, weight decay: 4.94e-05
Epoch 8/100

Epoch 9: LearningRateS

KeyboardInterrupt: 

In [None]:
# Save Model Weights
model.save_weights(f'tf_models/{version}_model.h5')

s3_client.upload_file(Filename=f'tf_models/{version}_model.h5',
                  Bucket=AWS_S3_BUCKET,
                  Key=f'tf_models/{version}_model.h5')

In [None]:
# Dictionaries to translate sign <-> ordinal encoded sign
train_file = s3_client.get_object(Bucket=AWS_S3_BUCKET, Key=TRAIN_CSV_FILE)
train = pd.read_csv(train_file.get("Body"))

# Add ordinally Encoded Sign (assign number to each sign name)
train['sign_ord'] = train['sign'].astype('category').cat.codes

SIGN2ORD = train[['sign', 'sign_ord']].set_index('sign').squeeze().to_dict()
ORD2SIGN = train[['sign_ord', 'sign']].set_index('sign_ord').squeeze().to_dict()

In [None]:
if USE_VAL:
    # Validation Predictions
    y_val_pred = model.predict({ 'frames': X_val, 'non_empty_frame_idxs': NON_EMPTY_FRAME_IDXS_VAL }, verbose=2).argmax(axis=1)
    # Label
    labels = [ORD2SIGN.get(i).replace(' ', '_') for i in range(config["NUM_CLASSES"])]

# Landmark Attention Weights

In [None]:
# Landmark Weights
weights = scipy.special.softmax(model.get_layer('embedding').weights[15])
landmarks = ['lips_embedding', 'left_hand_embedding', 'right_hand_embedding', 'pose_embedding']

# Learned attention weights, initialized at uniform 25%
for w, lm in zip(weights, landmarks):
    print(f'{lm} weight: {(w*100):.1f}%')

# Classification Report

In [None]:
def print_classification_report():
    # Classification report for all signs
    classification_report = sklearn.metrics.classification_report(
            y_val,
            y_val_pred,
            target_names=labels,
            output_dict=True,
        )
    # Round Data for better readability
    classification_report = pd.DataFrame(classification_report).T
    classification_report = classification_report.round(2)
    classification_report = classification_report.astype({
            'support': np.uint16,
        })
    # Add signs
    classification_report['sign'] = [e if e in SIGN2ORD else -1 for e in classification_report.index]
    classification_report['sign_ord'] = classification_report['sign'].apply(SIGN2ORD.get).fillna(-1).astype(np.int16)
    # Sort on F1-score
    classification_report = pd.concat((
        classification_report.head(config["NUM_CLASSES"]).sort_values('f1-score', ascending=False),
        classification_report.tail(3),
    ))

    pd.options.display.max_rows = 999
    display(classification_report)

In [None]:
if USE_VAL:
    print_classification_report()

# Training History

In [None]:
def plot_history_metric(metric, f_best=np.argmax, ylim=None, yscale=None, yticks=None):
    plt.figure(figsize=(20, 10))
    
    values = history.history[metric]
    config["N_EPOCHS"] = len(values)
    val = 'val' in ''.join(history.history.keys())
    # Epoch Ticks
    if config["N_EPOCHS"] <= 20:
        x = np.arange(1, config["N_EPOCHS"] + 1)
    else:
        x = [1, 5] + [10 + 5 * idx for idx in range((config["N_EPOCHS"] - 10) // 5 + 1)]

    x_ticks = np.arange(1, config["N_EPOCHS"]+1)

    # Validation
    if val:
        val_values = history.history[f'val_{metric}']
        val_argmin = f_best(val_values)
        plt.plot(x_ticks, val_values, label=f'val')

    # summarize history for accuracy
    plt.plot(x_ticks, values, label=f'train')
    argmin = f_best(values)
    plt.scatter(argmin + 1, values[argmin], color='red', s=75, marker='o', label=f'train_best')
    if val:
        plt.scatter(val_argmin + 1, val_values[val_argmin], color='purple', s=75, marker='o', label=f'val_best')

    plt.title(f'Model {metric}', fontsize=24, pad=10)
    plt.ylabel(metric, fontsize=20, labelpad=10)

    if ylim:
        plt.ylim(ylim)

    if yscale is not None:
        plt.yscale(yscale)
        
    if yticks is not None:
        plt.yticks(yticks, fontsize=16)

    plt.xlabel('epoch', fontsize=20, labelpad=10)        
    plt.tick_params(axis='x', labelsize=8)
    plt.xticks(x, fontsize=16) # set tick step to 1 and let x axis start at 1
    plt.yticks(fontsize=16)
    
    plt.legend(prop={'size': 10})
    plt.grid()
    plt.show()

In [None]:
plot_history_metric('loss', f_best=np.argmin)

In [None]:
plot_history_metric('acc', ylim=[0,1], yticks=np.arange(0.0, 1.1, 0.1))

In [None]:
plot_history_metric('top_5_acc', ylim=[0,1], yticks=np.arange(0.0, 1.1, 0.1))

In [None]:
plot_history_metric('top_10_acc', ylim=[0,1], yticks=np.arange(0.0, 1.1, 0.1))

# Submission

Submission code loosley based on [this notebook](https://www.kaggle.com/code/dschettler8845/gislr-learn-eda-baseline#baseline) by [Darien Schettler
](https://www.kaggle.com/dschettler8845)

In [None]:
def load_relevant_data_subset(pq_path):
    data_columns = ['x', 'y']
    data = pd_read_s3_parquet(pq_path[14:], AWS_S3_BUCKET, s3_client, columns=data_columns)
    n_frames = int(len(data) / ROWS_PER_FRAME)
    data = data.values.reshape(n_frames, ROWS_PER_FRAME, len(data_columns))
    return data.astype(np.float32)

In [None]:
preprocess_layer = PreprocessLayer(config["N_ROWS"], config["N_DIMS"], HAND_IDXS0, LANDMARK_IDXS0, config["INPUT_SIZE"])

In [None]:
# TFLite model for submission
class TFLiteModel(tf.Module):
    def __init__(self, model):
        super(TFLiteModel, self).__init__()

        # Load the feature generation and main models
        preprocess_layer = preprocess_layer
        model = model
    
    @tf.function(input_signature=[tf.TensorSpec(shape=[None, config["N_ROWS"], config["N_DIMS"]], dtype=tf.float32, name='inputs')])
    def __call__(self, inputs):
        # Preprocess Data
        x, non_empty_frame_idxs = preprocess_layer(inputs)
        # Add Batch Dimension
        x = tf.expand_dims(x, axis=0)
        non_empty_frame_idxs = tf.expand_dims(non_empty_frame_idxs, axis=0)
        # Make Prediction
        outputs = model({ 'frames': x, 'non_empty_frame_idxs': non_empty_frame_idxs })
        # Squeeze Output 1x250 -> 250
        outputs = tf.squeeze(outputs, axis=0)

        # Return a dictionary with the output tensor
        return {'outputs': outputs}

# Define TF Lite Model
tflite_keras_model = TFLiteModel(model)

In [None]:
# Sanity Check
demo_raw_data = load_relevant_data_subset(train['path'].values[5])
print(f'demo_raw_data shape: {demo_raw_data.shape}, dtype: {demo_raw_data.dtype}')
demo_output = tflite_keras_model(demo_raw_data)["outputs"]
print(f'demo_output shape: {demo_output.shape}, dtype: {demo_output.dtype}')
demo_prediction = demo_output.numpy().argmax()
print(f'demo_prediction: {demo_prediction}, correct: {train.iloc[0]["sign_ord"]}')

In [None]:
# Create Model Converter
keras_model_converter = tf.lite.TFLiteConverter.from_keras_model(tflite_keras_model)
# Convert Model
tflite_model = keras_model_converter.convert()
# Write Model
with open(f'tflite_models/{version}_model.tflite', 'wb') as f:
    f.write(tflite_model)
    
# Zip Model
# !zip submission.zip /kaggle/working/model.tflite

In [None]:
# Verify TFLite model can be loaded and used for prediction
!pip install tflite-runtime
import tflite_runtime.interpreter as tflite

interpreter = tflite.Interpreter("tflite_models/model.tflite")
found_signatures = list(interpreter.get_signature_list().keys())
prediction_fn = interpreter.get_signature_runner("serving_default")

output = prediction_fn(inputs=demo_raw_data)
sign = output['outputs'].argmax()

print("PRED : ", ORD2SIGN.get(sign), f'[{sign}]')
print("TRUE : ", train.sign.values[0], f'[{train.sign_ord.values[0]}]')