In [1]:
import os
import math
import pandas as pd
import numpy as np
#Load parquet data into dataset_parquet for training.
import tensorflow as tf
from tensorflow import keras
from keras.regularizers import l2
from glob import glob
from tqdm import tqdm
from collections import Counter
import random
import time
import wandb

In [50]:
class CONFIG:
    # root = os.path.join("/", "kaggle", "input", "asl-signs") 
    root = os.path.join("..","data", "ASL-ds")
    DATA_LIMIT = 600
    BATCH_SIZE = 8
    VIDEO_LENGTH = 25
    TRAIN_VAL_SPLIT = 0.9
    WANDB_RUN = "mediapipe-asl-dataset"


In [34]:
LIPS_IDXS0 = np.array([
        61, 185, 40, 39, 37, 0, 267, 269, 270, 409,
        291, 146, 91, 181, 84, 17, 314, 405, 321, 375,
        78, 191, 80, 81, 82, 13, 312, 311, 310, 415,
        95, 88, 178, 87, 14, 317, 402, 318, 324, 308,
    ])

EYE_LEFT = np.array([33, 7, 246, 161, 160, 159, 158, 157, 173, 133, 155, 154, 153, 145, 144, 163, 471, 470, 469, 472])
EYE_RIGHT = np.array([362, 398, 384, 385, 386, 387, 388, 466, 263, 249, 390, 373, 374, 380, 381, 382, 476, 475, 474, 477])
# Landmark indices in original data
LEFT_HAND_IDXS0 = np.arange(468,489)
RIGHT_HAND_IDXS0 = np.arange(522,543)
LEFT_POSE_IDXS0 = np.array([502, 504, 506, 508, 510])
RIGHT_POSE_IDXS0 = np.array([503, 505, 507, 509, 511])

print("LIPS : ",len(LIPS_IDXS0))
print("EYE_LEFT : ",len(EYE_LEFT))
print("EYE_RIGHT : ",len(EYE_RIGHT))
print("LEFT_HAND : ",len(LEFT_HAND_IDXS0))
print("RIGHT_HAND : ",len(RIGHT_HAND_IDXS0))
print("LEFT_POSE : ",len(LEFT_POSE_IDXS0))
print("RIGHT_POSE : ",len(RIGHT_POSE_IDXS0))

all_selection = np.concatenate([LIPS_IDXS0, EYE_LEFT, EYE_RIGHT, LEFT_HAND_IDXS0, RIGHT_HAND_IDXS0, LEFT_POSE_IDXS0, RIGHT_POSE_IDXS0])
print(len(all_selection))

LIPS :  40
EYE_LEFT :  20
EYE_RIGHT :  20
LEFT_HAND :  21
RIGHT_HAND :  21
LEFT_POSE :  5
RIGHT_POSE :  5
132


In [5]:
#this code sorts out a parquet files and rearrange the order to pose,face, left-hand, right-hand
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import plotly.graph_objs as go
import plotly.io as pio

ids = None

order_global = {"pose" : 10000, "face" : 1000, "left_hand" : 100, "right_hand" : 10}

def visualize_keypoints(frames : np.ndarray, point_size : int):
    if len(frames.shape) == 1:
        frames = np.array([frames])
    
    for frame in frames:
        frame = frame.reshape(-1, 3)
        sizes = point_size * np.ones(frame.shape[0])

        fig = go.Figure(data=go.Scatter(x=frame[:,0], y=2.5 - frame[:,1], mode='markers',
                                        marker=dict(
                                            size=sizes
                                            )))

    # Customize the layout
    fig.update_layout(title='visualization of human keypoints',
                        xaxis_title='',
                        yaxis_title='',
                        width=1000,
                        height=1600)

    fig.update_xaxes(range=[-0.2, 1.4])  # Set x-axis range from 0 to 6
    fig.update_yaxes(range=[0, 2.5])  # Set y-axis range from 10 to 20

    # Show the plot
    fig.show()


def process_parquet(ds, idxes = None):
    ret = []
    frames_unique = sorted(np.unique(ds["frame"]))
    for i,frame in enumerate(frames_unique):
        frame_ds = ds[ds['frame'] == frame]
        
        order = []
        for el in frame_ds["row_id"]:
            _frame, part, keypoint = el.split("-")
            order.append(order_global[part] - int(keypoint))

        order = np.array(order)
        frame_ds.iloc[:, 1] = order
        frame_ds = frame_ds.sort_values(by="row_id", ascending=False)
    
        vals = np.array(frame_ds[["x", "y", "z"]])
        if idxes is not None:
            vals = vals[idxes]
    
        vals = vals.flatten()

        ret.append(vals)
        
    return np.array(ret)


def process_parquet2(ds, idxes = None):
    ret = []    
    frame_size = 543
    it = len(ds) // frame_size
    assert it == len(ds) / frame_size
    
    for i in range(it):
        vals = ds.iloc[ i * frame_size : (i + 1 ) * frame_size ]        
        
        if idxes is not None:          
            vals = ds.iloc[idxes]
                        
        ret.append(np.array(vals[["x","y", "z"]]).flatten())
        
    return np.array(np.array(ret))

In [103]:
f = process_parquet(pd.read_parquet("79631423.parquet"))
visualize_keypoints(f[0], point_size=10)

In [104]:
f2 = process_parquet2(pd.read_parquet("79631423.parquet"), idxes=all_selection)
visualize_keypoints(f2[0], point_size=10)

In [52]:
df = pd.read_parquet( os.path.join("..", "data","ASL-ds", "train_landmark_files", "16069", "695046.parquet"))

In [7]:
from sklearn.model_selection import train_test_split

#custom class to load data from Parquet files for training ML models.
class ParquetDataset(keras.utils.Sequence):
    def __init__(self, dataset_folder, csv_file : str, batch_size=CONFIG.BATCH_SIZE, 
                 data_limit :int= CONFIG.DATA_LIMIT, check_if_file_exists = True, 
                 preprocessing_func=None, frame_length :int = CONFIG.VIDEO_LENGTH,
                 split : str = "train", train_val_split : float = CONFIG.TRAIN_VAL_SPLIT,
                 sort_by_counts : bool = True, **kwargs
                ):
        super().__init__(**kwargs)
        #taking keras sequence for .fit(), .evaluate(), .predict() methods
        #load csv - it has the path to parquet file, and another to store label
        self.csv_path = csv_file
        self.root_folder = dataset_folder
        self.batch_size = batch_size
        #optional pre-processing function to the parquet files.
        self.preprocessing_func = preprocessing_func
        
        self.csv_data = pd.read_csv(self.csv_path)
        
        self.all_files = []
        self.not_exists = []
        self.frame_length = frame_length

        
        for path, label in tqdm(list(zip(self.csv_data["path"], self.csv_data["sign"]))):
            prop_path = os.path.join(self.root_folder, path)
            
            if check_if_file_exists:
                if os.path.exists(prop_path):
                    self.all_files.append((prop_path, label))
                else:
                    self.not_exists.append(prop_path)
            else:
                self.all_files.append((prop_path, label))
                
                    
        self.all_files = np.array(self.all_files)
        self.unique_labels = np.unique(self.all_files[:, 1])
        self.label_2_id = { key : i for i, key in enumerate(self.unique_labels)}
    
        # sort the values by popularity
        if sort_by_counts:
            cnt = Counter(self.all_files[:, 1])
            vals = []
            
            for i,row in enumerate(self.all_files):
                vals.append((int(1e6 * cnt[row[1]] + self.label_2_id [row[1]]),i))
            
            vals = np.array(sorted(vals)[::-1])
            self.all_files = self.all_files[vals[:,1]]

        
        if data_limit < 0:
            train_ds, val_ds = train_test_split(self.all_files, train_size=train_val_split, random_state=42)
        else:
            train_ds, val_ds = train_test_split(self.all_files[:data_limit], train_size=train_val_split, random_state=42)
            self.unique_labels = np.unique(self.all_files[:data_limit, 1])
            self.label_2_id = { key : i for i, key in enumerate(self.unique_labels)}
            
        if split.lower() == "train":
            self.dataset = train_ds
            
        elif split.lower() == "val":
            self.dataset = val_ds 
            
        else:
            raise Exception("please specify split to be either train or val")
            
        np.random.shuffle(self.dataset)
                   

    def __len__(self):
        # Assuming each Parquet file should be one batch; adjust if necessary
        return math.ceil(len(self.dataset) / self.batch_size)
    
    def get_single(self, idx):
        # Load one file per batch
        #take the idx value, 1st label, 
        path, label = self.dataset[idx]
        
        df = pd.read_parquet( path)
        
        # Apply preprocessing if specified
        if self.preprocessing_func:
            df = self.preprocessing_func(df, self.frame_length)
        
        one_hot_encoded_label = np.zeros(len(self.unique_labels))
        one_hot_encoded_label[self.label_2_id[label]] = 1  
        
        return df, one_hot_encoded_label

    def __getitem__(self, idx):
        X, Y = [], []
        
        low = idx * self.batch_size
        high = min(low + self.batch_size, len(self.dataset))
        
        for i in range(low, high):
            x, y = self.get_single(i)
            X.append(x)
            Y.append(y)
        
        return np.array(X), np.array(Y)
                
        
    def on_epoch_end(self):
        # Shuffle files for the next epoch
        np.random.shuffle(self.dataset)

def my_preprocessing_func(df, frame_length):
    
    # Define your preprocessing steps here
    # Example: normalize numerical features
    frames_mediapipe = process_parquet(df)
    
    current_length, num_features = frames_mediapipe.shape

    if current_length >= frame_length:
            # TODO: a better than uniform value ? Could place gaussian in the middle
            random_start = random.randint(0, current_length - frame_length)
            return np.nan_to_num(frames_mediapipe[random_start : (random_start + frame_length)])
        
    # padd the video to contain zeros 
    return np.concatenate([np.nan_to_num(frames_mediapipe), np.zeros((frame_length - current_length, num_features))], axis=0)
    
# Usage example
parquet_folder_path = CONFIG.root
train_dataset_parquet = ParquetDataset(parquet_folder_path, csv_file = os.path.join(CONFIG.root, "train.csv"), 
                                 batch_size=CONFIG.BATCH_SIZE, data_limit=1000,
                                 preprocessing_func=my_preprocessing_func,
                                check_if_file_exists = True,
                                split="train")

val_dataset_parquet = ParquetDataset(parquet_folder_path, csv_file = os.path.join(CONFIG.root, "train.csv"), 
                                 batch_size=CONFIG.BATCH_SIZE, data_limit=1000,
                                 preprocessing_func=my_preprocessing_func,
                                 check_if_file_exists= True,
                                 split="val")

print(f"cardinality of train : {len(train_dataset_parquet)}, cardinality of validation : {len(val_dataset_parquet)}")

100%|██████████| 94477/94477 [00:02<00:00, 40931.96it/s]
100%|██████████| 94477/94477 [00:02<00:00, 41566.14it/s]


cardinality of train : 113, cardinality of validation : 13


In [70]:
from sklearn.model_selection import train_test_split

#custom class to load data from Parquet files for training ML models.
class ParquetDatasetCached(keras.utils.Sequence):
    def __init__(self, dataset_folder, csv_file : str, batch_size=CONFIG.BATCH_SIZE, 
                 data_limit :int= CONFIG.DATA_LIMIT, check_if_file_exists = True, 
                 preprocessing_func=None, frame_length :int = CONFIG.VIDEO_LENGTH,
                 split : str = "train", train_val_split : float = CONFIG.TRAIN_VAL_SPLIT,
                 sort_by_counts : bool = True, **kwargs
                ):
        super().__init__(**kwargs)
        #taking keras sequence for .fit(), .evaluate(), .predict() methods
        #load csv - it has the path to parquet file, and another to store label
        self.csv_path = csv_file
        self.root_folder = dataset_folder
        self.batch_size = batch_size
        #optional pre-processing function to the parquet files.
        self.preprocessing_func = preprocessing_func
        
        self.csv_data = pd.read_csv(self.csv_path)
        
        self.all_files = []
        self.not_exists = []
        self.frame_length = frame_length

        
        for path, label in tqdm(list(zip(self.csv_data["path"], self.csv_data["sign"]))):
            prop_path = os.path.join(self.root_folder, path)
            
            if check_if_file_exists:
                if os.path.exists(prop_path):
                    self.all_files.append((prop_path, label))
                else:
                    self.not_exists.append(prop_path)
            else:
                self.all_files.append((prop_path, label))
                
                    
        self.all_files = np.array(self.all_files)
        self.unique_labels = np.unique(self.all_files[:, 1])
        self.label_2_id = { key : i for i, key in enumerate(self.unique_labels)}
    
        # sort the values by popularity
        if sort_by_counts:
            cnt = Counter(self.all_files[:, 1])
            vals = []
            
            for i,row in enumerate(self.all_files):
                vals.append((int(1e6 * cnt[row[1]] + self.label_2_id [row[1]]),i))
            
            vals = np.array(sorted(vals)[::-1])
            self.all_files = self.all_files[vals[:,1]]

        
        if data_limit < 0:
            train_ds, val_ds = train_test_split(self.all_files, train_size=train_val_split, random_state=42)
        else:
            train_ds, val_ds = train_test_split(self.all_files[:data_limit], train_size=train_val_split, random_state=42)
            self.unique_labels = np.unique(self.all_files[:data_limit, 1])
            self.label_2_id = { key : i for i, key in enumerate(self.unique_labels)}
            
        if split.lower() == "train":
            self.dataset = train_ds
            
        elif split.lower() == "val":
            self.dataset = val_ds 
            
        else:
            raise Exception("please specify split to be either train or val")
            
        self.cache_data()
                   

    def cache_data(self):
        self.cached_X, self.cached_Y = [], []
        
        pb = tqdm(range(len(self.dataset)), desc="Cacheing")

        for i in pb:
            
            path, label = self.dataset[i]
            df = pd.read_parquet(path)
                        
            one_hot_encoded_label = np.zeros(len(self.unique_labels))
            one_hot_encoded_label[self.label_2_id[label]] = 1

            self.cached_X.append(process_parquet2(df)) 
            self.cached_Y.append(one_hot_encoded_label)        

    def __len__(self):
        # Assuming each Parquet file should be one batch; adjust if necessary
        return len(self.dataset)

    def __getitem__(self, idx):
        return self.preprocessing_func(self.cached_X[idx]), self.cached_Y[idx]                
        
    def on_epoch_end(self):
        # Shuffle files for the next epoch
        np.random.shuffle(self.dataset)


def preprocess(frames):
    current_length, num_features = frames.shape

    if current_length >= CONFIG.VIDEO_LENGTH:
            # TODO: a better than uniform value ? Could place gaussian in the middle
            random_start = random.randint(0, current_length - CONFIG.VIDEO_LENGTH)
            return np.nan_to_num(frames[random_start : (random_start + CONFIG.VIDEO_LENGTH)])
        
    # padd the video to contain zeros 
    return np.concatenate([np.nan_to_num(frames), np.zeros((CONFIG.VIDEO_LENGTH - current_length, num_features))], axis=0)

    
# Usage example
parquet_folder_path = CONFIG.root
train_dataset_parquet = ParquetDatasetCached(parquet_folder_path, csv_file = os.path.join(CONFIG.root, "train.csv"), 
                                 batch_size=CONFIG.BATCH_SIZE, data_limit=1000,
                                 preprocessing_func=preprocess,
                                check_if_file_exists = True,
                                split="train")

val_dataset_parquet = ParquetDatasetCached(parquet_folder_path, csv_file = os.path.join(CONFIG.root, "train.csv"), 
                                 batch_size=CONFIG.BATCH_SIZE, data_limit=1000,
                                 preprocessing_func=preprocess,
                                 check_if_file_exists= True,
                                 split="val")

print(f"cardinality of train : {len(train_dataset_parquet)}, cardinality of validation : {len(val_dataset_parquet)}")

100%|██████████| 94477/94477 [00:02<00:00, 41181.96it/s]
Cacheing: 100%|██████████| 900/900 [00:26<00:00, 34.54it/s]
100%|██████████| 94477/94477 [00:02<00:00, 42228.25it/s]
Cacheing: 100%|██████████| 100/100 [00:02<00:00, 35.05it/s]

cardinality of train : 900, cardinality of validation : 100





In [87]:
X_shape = train_dataset_parquet[0][0].shape
Y_shape = train_dataset_parquet[0][1].shape
print(f"X_shape = {X_shape}, Y_shape = {Y_shape}")

X_shape = (25, 1629), Y_shape = (10,)


In [90]:
def dataset_train_generator():
    # Instantiate your existing dataset loader

    for i in range(len(train_dataset_parquet)):
        X_batch, Y_batch = train_dataset_parquet[i]
        yield X_batch, Y_batch
        
def dataset_val_generator():
    # Instantiate your existing dataset loader

    for i in range(len(val_dataset_parquet)):
        X_batch, Y_batch = val_dataset_parquet[i]
        yield X_batch, Y_batch        


train_dataset = tf.data.Dataset.from_generator(
    lambda: dataset_train_generator(),
    output_types=(tf.float32, tf.float32),  # Adjust types based on your actual data
    output_shapes=(X_shape, Y_shape)
).prefetch(tf.data.AUTOTUNE).batch(CONFIG.BATCH_SIZE)

val_dataset = tf.data.Dataset.from_generator(
    lambda: dataset_val_generator(),
    output_types=(tf.float32, tf.float32),  # Adjust types based on your actual data
    output_shapes=(X_shape, Y_shape)
).prefetch(tf.data.AUTOTUNE).batch(CONFIG.BATCH_SIZE)

In [91]:
start = time.time()
isnans =False

f = True
labels_batches = []
for el in tqdm(train_dataset):
    if f:
        print(el[0].shape, el[1].shape)
        f = False
    labels_batches.append(el[1])
        
    isnans |= np.any(np.isnan(el[0]))
    if isnans:
        print("FOUND NAN!")
        break


print(f"Iterating through dataset took : {round( time.time() - start , 4)}s")

13it [00:00, 109.38it/s]

(8, 25, 1629) (8, 10)
Iterating through dataset took : 0.1245s





# Train experiment

In [92]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, LeakyReLU
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras import backend as K
from wandb.keras import WandbMetricsLogger
from keras.callbacks import Callback
import tensorflow as tf
import wandb


class CosineAnnealingLearningRateScheduler(Callback):
    def __init__(self, max_lr, min_lr, T_max):
        super(CosineAnnealingLearningRateScheduler, self).__init__()
        self.max_lr = max_lr  # Maximum learning rate (i.e., start learning rate)
        self.min_lr = min_lr  # Minimum learning rate
        self.T_max = T_max    # Specifies the number of epochs per cycle
        self.t = 0            # Current epoch

    def on_epoch_begin(self, epoch, logs=None):
        self.t += 1
        cos = np.cos(np.pi * (self.t % self.T_max) / self.T_max)
        lr = self.min_lr + 0.5 * (self.max_lr - self.min_lr) * (1 + cos)

        keras.backend.set_value(self.model.optimizer.lr, lr)

def keras_train(model, filepath : str, max_lr = 1e-4, min_lr = 5e-5, T_max=50, epochs=100, run_name="",
                mediapipe_features = "all", USE_WANDB=True): 
    
    
    checkpoint = keras.callbacks.ModelCheckpoint(filepath,
                                                 monitor="val_categorical_accuracy",
                                                 verbose=0,
                                                 save_best_only=True,
                                                 mode="max",
                                                 save_freq="epoch")
    
    cosine_annealer = CosineAnnealingLearningRateScheduler(max_lr=max_lr,
                                                           min_lr=min_lr,
                                                           T_max=T_max)
    
    #Adam Optimizer - fixed learning rate.
    adam_optimizer = tf.keras.optimizers.Adam(learning_rate=max_lr, clipnorm=1.)

    model.compile(optimizer=adam_optimizer, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
    
    
    callbacks  = [checkpoint, cosine_annealer]
    
    if USE_WANDB:
        wandb.init(project=CONFIG.WANDB_RUN,
                        name=run_name,
                        notes="Model summary : \n" + str(model),
                        config={"max_lr" : max_lr, 
                                "min_lr" : 5e-5, 
                                "scheduler" : "cosineAnnealer", 
                                "epochs" : epochs, 
                                "T_max" : T_max, 
                                "train_size" : len(train_dataset_parquet.dataset),
                                "val_size" : len(val_dataset_parquet.dataset),
                                "unique_classes" : len(train_dataset_parquet.unique_labels), 
                                "video_length" : CONFIG.VIDEO_LENGTH,
                                "features" : mediapipe_features
                                })
        callbacks.append(WandbMetricsLogger())


    history = model.fit(train_dataset, epochs=epochs, validation_data = val_dataset, callbacks=callbacks)
    
    if USE_WANDB:      
        wandb.finish()
    
    return history

# Train simple LSTM

In [94]:
model = Sequential()
#x,y,z -> y,z as the input shape
model.add(LSTM(128, return_sequences=False, input_shape=(CONFIG.VIDEO_LENGTH, 1629),
               kernel_regularizer=l2(0.0001), 
               activity_regularizer=l2(0.0001)))
model.add(LeakyReLU(alpha=0.1))
model.add(Dense(128, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(len(train_dataset_parquet.unique_labels), activation='softmax'))


keras_train(model, filepath=os.path.join("models", "LSTM1.tf"),
            run_name="LSTM128-Dense128-Dense256-allfeatures",
            USE_WANDB=False)

Epoch 1/100
     12/Unknown - 5s 57ms/step - loss: 2.3933 - categorical_accuracy: 0.0521INFO:tensorflow:Assets written to: models\LSTM1.tf\assets


INFO:tensorflow:Assets written to: models\LSTM1.tf\assets


Epoch 2/100


INFO:tensorflow:Assets written to: models\LSTM1.tf\assets


Epoch 3/100


INFO:tensorflow:Assets written to: models\LSTM1.tf\assets


Epoch 4/100


INFO:tensorflow:Assets written to: models\LSTM1.tf\assets


Epoch 5/100


INFO:tensorflow:Assets written to: models\LSTM1.tf\assets


Epoch 6/100


INFO:tensorflow:Assets written to: models\LSTM1.tf\assets


Epoch 7/100


INFO:tensorflow:Assets written to: models\LSTM1.tf\assets


Epoch 8/100


INFO:tensorflow:Assets written to: models\LSTM1.tf\assets


Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100


INFO:tensorflow:Assets written to: models\LSTM1.tf\assets


Epoch 13/100

KeyboardInterrupt: 

In [None]:
model = Sequential()
#x,y,z -> y,z as the input shape
model.add(LSTM(64, return_sequences=False, input_shape=(CONFIG.VIDEO_LENGTH, 1629),
               kernel_regularizer=l2(0.0001), 
               activity_regularizer=l2(0.0001)))
model.add(LeakyReLU(alpha=0.1))
model.add(Dense(128, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(len(train_dataset_parquet.unique_labels), activation='softmax'))


keras_train(model, filepath=os.path.join("models", "LSTM1.tf"),
            run_name="LSTM64-Dense128-Dense256-allfeatures")

In [None]:
model = Sequential()
#x,y,z -> y,z as the input shape
model.add(LSTM(256, return_sequences=False, input_shape=(CONFIG.VIDEO_LENGTH, 1629),
               kernel_regularizer=l2(0.0001), 
               activity_regularizer=l2(0.0001)))
model.add(LeakyReLU(alpha=0.1))
model.add(Dense(128, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(len(train_dataset_parquet.unique_labels), activation='softmax'))


keras_train(model, filepath=os.path.join("models", "LSTM256-Dense128-Dense256-allfeatures.tf"),
            run_name="LSTM256-Dense128-Dense256-allfeatures")

In [None]:
model = Sequential()
#x,y,z -> y,z as the input shape
model.add(LSTM(128, return_sequences=False, input_shape=(CONFIG.VIDEO_LENGTH, 1629),
               kernel_regularizer=l2(0.0001), 
               activity_regularizer=l2(0.0001)))
model.add(LeakyReLU(alpha=0.1))
model.add(Dense(256, activation='relu'))
model.add(Dense(len(train_dataset_parquet.unique_labels), activation='softmax'))


keras_train(model, filepath=os.path.join("models", "LSTM128-Dense256-allfeatures.tf"),
            run_name="LSTM128-Dense256-allfeatures")

In [None]:
model = Sequential()
#x,y,z -> y,z as the input shape
model.add(LSTM(128, return_sequences=False, input_shape=(CONFIG.VIDEO_LENGTH, 1629),
               kernel_regularizer=l2(0.0001), 
               activity_regularizer=l2(0.0001)))
model.add(LeakyReLU(alpha=0.1))
model.add(Dense(128, activation='relu'))
model.add(Dense(len(train_dataset_parquet.unique_labels), activation='softmax'))


keras_train(model, filepath=os.path.join("models", "LSTM128-Dense128-allfeatures.tf"),
            run_name="LSTM128-Dense128-allfeatures")

In [None]:
model = Sequential()
#x,y,z -> y,z as the input shape
model.add(LSTM(128, return_sequences=False, activation='relu', input_shape=(CONFIG.VIDEO_LENGTH, 1629),
               kernel_regularizer=l2(0.0000001), 
               activity_regularizer=l2(0.0000001)))
model.add(Dense(128, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(len(train_dataset_parquet.unique_labels), activation='softmax'))


keras_train(model, filepath=os.path.join("models", "LSTM128_l2-Dense128-Dense256-allfeatures.tf"),
            run_name="LSTM128_l2-Dense128-Dense256-allfeatures")

In [46]:
model = Sequential()
#x,y,z -> y,z as the input shape
model.add(LSTM(64, return_sequences=False, input_shape=(CONFIG.VIDEO_LENGTH, 1629),
               kernel_regularizer=l2(0.005), 
               activity_regularizer=l2(0.005)))
model.add(LeakyReLU(alpha=0.2))
model.add(Dense(128, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(len(train_dataset_parquet.unique_labels), activation='softmax'))


keras_train(model, filepath=os.path.join("models", "LSTM-L64-D128-D256-reg=0.005.tf"),
            run_name="LSTM64-Dense128-Dense256-allfeatures_bigger_reg")

VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch/categorical_accuracy,▁
epoch/epoch,▁
epoch/learning_rate,▁
epoch/loss,▁
epoch/val_categorical_accuracy,▁
epoch/val_loss,▁

0,1
epoch/categorical_accuracy,0.36889
epoch/epoch,0.0
epoch/learning_rate,0.0001
epoch/loss,2.82214
epoch/val_categorical_accuracy,0.45
epoch/val_loss,2.39018


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113070862160788, max=1.0…

Epoch 1/100


INFO:tensorflow:Assets written to: models/LSTM-L64-D128-D256-reg=0.005.tf/assets


Epoch 2/100


INFO:tensorflow:Assets written to: models/LSTM-L64-D128-D256-reg=0.005.tf/assets


Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100


INFO:tensorflow:Assets written to: models/LSTM-L64-D128-D256-reg=0.005.tf/assets


Epoch 7/100


INFO:tensorflow:Assets written to: models/LSTM-L64-D128-D256-reg=0.005.tf/assets


Epoch 8/100
Epoch 9/100


INFO:tensorflow:Assets written to: models/LSTM-L64-D128-D256-reg=0.005.tf/assets


Epoch 10/100


INFO:tensorflow:Assets written to: models/LSTM-L64-D128-D256-reg=0.005.tf/assets


Epoch 11/100


INFO:tensorflow:Assets written to: models/LSTM-L64-D128-D256-reg=0.005.tf/assets


Epoch 12/100


INFO:tensorflow:Assets written to: models/LSTM-L64-D128-D256-reg=0.005.tf/assets


Epoch 13/100


INFO:tensorflow:Assets written to: models/LSTM-L64-D128-D256-reg=0.005.tf/assets


Epoch 14/100
Epoch 15/100


INFO:tensorflow:Assets written to: models/LSTM-L64-D128-D256-reg=0.005.tf/assets


Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100


INFO:tensorflow:Assets written to: models/LSTM-L64-D128-D256-reg=0.005.tf/assets


Epoch 20/100


INFO:tensorflow:Assets written to: models/LSTM-L64-D128-D256-reg=0.005.tf/assets


Epoch 21/100


INFO:tensorflow:Assets written to: models/LSTM-L64-D128-D256-reg=0.005.tf/assets


Epoch 22/100


INFO:tensorflow:Assets written to: models/LSTM-L64-D128-D256-reg=0.005.tf/assets


Epoch 23/100
Epoch 24/100
Epoch 25/100


INFO:tensorflow:Assets written to: models/LSTM-L64-D128-D256-reg=0.005.tf/assets


Epoch 26/100


INFO:tensorflow:Assets written to: models/LSTM-L64-D128-D256-reg=0.005.tf/assets


Epoch 27/100


INFO:tensorflow:Assets written to: models/LSTM-L64-D128-D256-reg=0.005.tf/assets


Epoch 28/100


INFO:tensorflow:Assets written to: models/LSTM-L64-D128-D256-reg=0.005.tf/assets


Epoch 29/100
Epoch 30/100
Epoch 31/100


INFO:tensorflow:Assets written to: models/LSTM-L64-D128-D256-reg=0.005.tf/assets


Epoch 32/100


INFO:tensorflow:Assets written to: models/LSTM-L64-D128-D256-reg=0.005.tf/assets


Epoch 33/100


INFO:tensorflow:Assets written to: models/LSTM-L64-D128-D256-reg=0.005.tf/assets


Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100


INFO:tensorflow:Assets written to: models/LSTM-L64-D128-D256-reg=0.005.tf/assets


Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100


INFO:tensorflow:Assets written to: models/LSTM-L64-D128-D256-reg=0.005.tf/assets


Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100


INFO:tensorflow:Assets written to: models/LSTM-L64-D128-D256-reg=0.005.tf/assets


Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100


INFO:tensorflow:Assets written to: models/LSTM-L64-D128-D256-reg=0.005.tf/assets


Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100


INFO:tensorflow:Assets written to: models/LSTM-L64-D128-D256-reg=0.005.tf/assets


Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100


INFO:tensorflow:Assets written to: models/LSTM-L64-D128-D256-reg=0.005.tf/assets


Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch/categorical_accuracy,▁▁▁▁▃▃▃▃▄▄▅▆▆▇▆▇▆▇▇▇▇▇▇█▇█▇▇▇█▇██▇▇████▇
epoch/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch/learning_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch/loss,█▅▃▃▃▂▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch/val_categorical_accuracy,▁▁▁▂▂▃▃▄▄▄▃▅▅▇▇▆▆▆▇▆▅▅▇▇▇▇▇▇▇█████▇████▇
epoch/val_loss,█▅▄▃▃▃▃▃▂▃▃▂▂▁▂▂▂▂▁▂▃▂▁▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▂

0,1
epoch/categorical_accuracy,0.84667
epoch/epoch,99.0
epoch/learning_rate,0.0001
epoch/loss,0.50475
epoch/val_categorical_accuracy,0.8
epoch/val_loss,0.66957


<keras.src.callbacks.History at 0x7fda6bc02690>

In [49]:
model = Sequential()
#x,y,z -> y,z as the input shape
model.add(LSTM(128, return_sequences=False, input_shape=(CONFIG.VIDEO_LENGTH, 1629),
               kernel_regularizer=l2(0.0001), 
               activity_regularizer=l2(0.0001)))
model.add(LeakyReLU(alpha=0.1))
model.add(Dense(128, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(len(train_dataset_parquet.unique_labels), activation='softmax'))


keras_train(model, filepath=os.path.join("models", "LSTM128-Dense128-Dense256-allfeatures.tf"),
            run_name="LSTM128-Dense128-Dense256-allfeatures")

VBox(children=(Label(value='0.006 MB of 0.010 MB uploaded\r'), FloatProgress(value=0.5380860274477296, max=1.0…

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01111298907134268, max=1.0)…

Epoch 1/100


INFO:tensorflow:Assets written to: models/LSTM128-Dense128-Dense256-allfeatures.tf/assets


Epoch 2/100
Epoch 3/100
Epoch 4/100


INFO:tensorflow:Assets written to: models/LSTM128-Dense128-Dense256-allfeatures.tf/assets


Epoch 5/100


INFO:tensorflow:Assets written to: models/LSTM128-Dense128-Dense256-allfeatures.tf/assets


Epoch 6/100


INFO:tensorflow:Assets written to: models/LSTM128-Dense128-Dense256-allfeatures.tf/assets


Epoch 7/100
Epoch 8/100


INFO:tensorflow:Assets written to: models/LSTM128-Dense128-Dense256-allfeatures.tf/assets


Epoch 9/100


INFO:tensorflow:Assets written to: models/LSTM128-Dense128-Dense256-allfeatures.tf/assets


Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100


INFO:tensorflow:Assets written to: models/LSTM128-Dense128-Dense256-allfeatures.tf/assets


Epoch 18/100


INFO:tensorflow:Assets written to: models/LSTM128-Dense128-Dense256-allfeatures.tf/assets


Epoch 19/100


INFO:tensorflow:Assets written to: models/LSTM128-Dense128-Dense256-allfeatures.tf/assets


Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100


INFO:tensorflow:Assets written to: models/LSTM128-Dense128-Dense256-allfeatures.tf/assets


Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100


INFO:tensorflow:Assets written to: models/LSTM128-Dense128-Dense256-allfeatures.tf/assets


Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100


INFO:tensorflow:Assets written to: models/LSTM128-Dense128-Dense256-allfeatures.tf/assets


Epoch 35/100
Epoch 36/100
Epoch 37/100


INFO:tensorflow:Assets written to: models/LSTM128-Dense128-Dense256-allfeatures.tf/assets


Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100


INFO:tensorflow:Assets written to: models/LSTM128-Dense128-Dense256-allfeatures.tf/assets


Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100


INFO:tensorflow:Assets written to: models/LSTM128-Dense128-Dense256-allfeatures.tf/assets


Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
  6/113 [>.............................] - ETA: 38s - loss: 0.2516 - categorical_accuracy: 0.9375

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [53]:
model = Sequential()
#x,y,z -> y,z as the input shape
model.add(LSTM(64, return_sequences=False, input_shape=(CONFIG.VIDEO_LENGTH, 1629),
               kernel_regularizer=l2(0.0001), 
               activity_regularizer=l2(0.0001)))
model.add(LeakyReLU(alpha=0.1))
model.add(Dense(64, activation='relu'))
model.add(Dense(len(train_dataset_parquet.unique_labels), activation='softmax'))


keras_train(model, filepath=os.path.join("models", "LSTM64-Dense64-allfeatures.tf"),
            run_name="LSTM64-Dense64-allfeatures", T_max=75, epochs=100, 
            max_lr = 1e-4, min_lr = 2.5e-5)

VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch/categorical_accuracy,▁
epoch/epoch,▁
epoch/learning_rate,▁
epoch/loss,▁
epoch/val_categorical_accuracy,▁
epoch/val_loss,▁

0,1
epoch/categorical_accuracy,0.41667
epoch/epoch,0.0
epoch/learning_rate,0.0001
epoch/loss,1.13316
epoch/val_categorical_accuracy,0.44
epoch/val_loss,1.11618


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112615217765172, max=1.0…

Epoch 1/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense64-allfeatures.tf/assets


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense64-allfeatures.tf/assets


Epoch 8/100
Epoch 9/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense64-allfeatures.tf/assets


Epoch 10/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense64-allfeatures.tf/assets


Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense64-allfeatures.tf/assets


Epoch 20/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense64-allfeatures.tf/assets


Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense64-allfeatures.tf/assets


Epoch 25/100
Epoch 26/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense64-allfeatures.tf/assets


Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense64-allfeatures.tf/assets


Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense64-allfeatures.tf/assets


Epoch 36/100
Epoch 37/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense64-allfeatures.tf/assets


Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense64-allfeatures.tf/assets


Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense64-allfeatures.tf/assets


Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense64-allfeatures.tf/assets


Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch/categorical_accuracy,▁▁▁▂▃▃▃▃▃▄▅▆▇▆▇▇▇▇█▇▇█████████▇▇▇▇▇▇▇▇██
epoch/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch/learning_rate,█████▇▇▇▇▆▆▆▅▅▅▄▄▄▃▃▃▂▂▂▂▁▁▁▁▁█████▇▇▇▇▆
epoch/loss,██▇▇▆▆▅▅▅▅▅▃▃▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▁▁
epoch/val_categorical_accuracy,▁▁▁▂▃▃▃▃▃▄▆▆▆▆▆▆▅▆▆▆▇▇▇███▇███▇█▇██▇█▇▇▆
epoch/val_loss,██▇▇▆▅▅▅▅▅▄▃▃▄▃▃▄▄▄▅▃▂▂▂▂▂▂▁▂▂▄▂▂▁▂▂▁▂▂▅

0,1
epoch/categorical_accuracy,0.91111
epoch/epoch,99.0
epoch/learning_rate,8e-05
epoch/loss,0.30305
epoch/val_categorical_accuracy,0.74
epoch/val_loss,0.75053


<keras.src.callbacks.History at 0x7fda527cdad0>

In [54]:
model = Sequential()
#x,y,z -> y,z as the input shape
model.add(LSTM(64, return_sequences=False, input_shape=(CONFIG.VIDEO_LENGTH, 1629),
               kernel_regularizer=l2(0.0001), 
               activity_regularizer=l2(0.0001)))
model.add(LeakyReLU(alpha=0.1))
model.add(Dense(128, activation='relu'))
model.add(Dense(len(train_dataset_parquet.unique_labels), activation='softmax'))


keras_train(model, filepath=os.path.join("models", "LSTM64-Dense128allfeatures.tf"),
            run_name="LSTM64-Dense128-allfeatures", T_max=75, epochs=100, 
            max_lr = 1e-4, min_lr = 2.5e-5)

Epoch 1/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense128allfeatures.tf/assets


Epoch 2/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense128allfeatures.tf/assets


Epoch 3/100
Epoch 4/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense128allfeatures.tf/assets


Epoch 5/100
Epoch 6/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense128allfeatures.tf/assets


Epoch 7/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense128allfeatures.tf/assets


Epoch 8/100
Epoch 9/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense128allfeatures.tf/assets


Epoch 10/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense128allfeatures.tf/assets


Epoch 11/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense128allfeatures.tf/assets


Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense128allfeatures.tf/assets


Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense128allfeatures.tf/assets


Epoch 27/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense128allfeatures.tf/assets


Epoch 28/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense128allfeatures.tf/assets


Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense128allfeatures.tf/assets


Epoch 34/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense128allfeatures.tf/assets


Epoch 35/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense128allfeatures.tf/assets


Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense128allfeatures.tf/assets


Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense128allfeatures.tf/assets


Epoch 99/100
Epoch 100/100


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch/categorical_accuracy,▁▁▂▂▃▃▃▃▄▄▅▆▆▇▇▇▇▇████████████▇▇██▇▇▇███
epoch/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch/learning_rate,█████▇▇▇▇▆▆▆▅▅▅▄▄▄▃▃▃▂▂▂▂▁▁▁▁▁█████▇▇▇▇▆
epoch/loss,██▇▇▆▅▅▅▅▅▄▃▃▂▂▂▂▂▁▁▂▁▂▁▁▁▁▁▁▁▂▂▂▂▂▂▂▁▁▁
epoch/val_categorical_accuracy,▁▁▂▂▄▃▃▃▃▄▄▇▇▇▇▇█▇▇██▆███████▇▇█▇█▆▇█▇▇▇
epoch/val_loss,██▇▇▅▅▅▅▅▅▄▃▂▂▂▂▁▂▃▂▂▄▂▂▂▁▂▂▂▂▄▂▃▂▄▂▂▂▂▂

0,1
epoch/categorical_accuracy,0.89778
epoch/epoch,99.0
epoch/learning_rate,8e-05
epoch/loss,0.3411
epoch/val_categorical_accuracy,0.86
epoch/val_loss,0.45587


<keras.src.callbacks.History at 0x7fda52b45090>

In [55]:
model = Sequential()
#x,y,z -> y,z as the input shape
model.add(LSTM(64, return_sequences=False, input_shape=(CONFIG.VIDEO_LENGTH, 1629),
               kernel_regularizer=l2(0.0001), 
               activity_regularizer=l2(0.0001)))
model.add(LeakyReLU(alpha=0.1))
model.add(Dense(256, activation='relu'))
model.add(Dense(len(train_dataset_parquet.unique_labels), activation='softmax'))


keras_train(model, filepath=os.path.join("models", "LSTM64-Dense256allfeatures.tf"),
            run_name="LSTM64-Dense257-allfeatures", T_max=75, epochs=100, 
            max_lr = 1e-4, min_lr = 2.5e-5)

Epoch 1/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense256allfeatures.tf/assets


Epoch 2/100
Epoch 3/100
Epoch 4/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense256allfeatures.tf/assets


Epoch 5/100
Epoch 6/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense256allfeatures.tf/assets


Epoch 7/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense256allfeatures.tf/assets


Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense256allfeatures.tf/assets


Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense256allfeatures.tf/assets


Epoch 24/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense256allfeatures.tf/assets


Epoch 25/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense256allfeatures.tf/assets


Epoch 26/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense256allfeatures.tf/assets


Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense256allfeatures.tf/assets


Epoch 31/100
Epoch 32/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense256allfeatures.tf/assets


Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense256allfeatures.tf/assets


Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense256allfeatures.tf/assets


Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100


INFO:tensorflow:Assets written to: models/LSTM64-Dense256allfeatures.tf/assets


Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch/categorical_accuracy,▁▁▂▃▃▃▃▃▄▄▆▆▇▇▇▇▇▇▇▇██████████▇▇▇▇▇▇▇▇██
epoch/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch/learning_rate,█████▇▇▇▇▆▆▆▅▅▅▄▄▄▃▃▃▂▂▂▂▁▁▁▁▁█████▇▇▇▇▆
epoch/loss,██▇▆▆▆▅▅▅▅▄▃▃▃▃▃▂▂▂▂▂▁▂▂▁▁▁▁▁▁▃▂▃▂▂▂▃▂▂▂
epoch/val_categorical_accuracy,▁▁▃▃▃▂▃▄▃▅▇▆▆▇▇█▇█▇██▇▇████▇▇██▅█▇▇▅▇▇██
epoch/val_loss,██▆▆▅▅▅▅▅▄▃▃▅▃▂▂▂▂▃▃▂▃▂▃▂▂▂▃▂▂▂▅▁▂▄▆▂▃▂▁

0,1
epoch/categorical_accuracy,0.89556
epoch/epoch,99.0
epoch/learning_rate,8e-05
epoch/loss,0.34329
epoch/val_categorical_accuracy,0.88
epoch/val_loss,0.3723


<keras.src.callbacks.History at 0x7fda49b2fa50>

# Transformer

In [13]:
# Epsilon value for layer normalisation
LAYER_NORM_EPS = 1e-6

# Dense layer units for landmarks
LIPS_UNITS = 384
HANDS_UNITS = 384
POSE_UNITS = 384
# final embedding and transformer embedding size
UNITS = 512

# Transformer
NUM_BLOCKS = 2
MLP_RATIO = 2

# Dropout
EMBEDDING_DROPOUT = 0.00
MLP_DROPOUT_RATIO = 0.30
CLASSIFIER_DROPOUT_RATIO = 0.10

# Initiailizers
INIT_HE_UNIFORM = tf.keras.initializers.he_uniform
INIT_GLOROT_UNIFORM = tf.keras.initializers.glorot_uniform
INIT_ZEROS = tf.keras.initializers.constant(0.0)
# Activations
GELU = tf.keras.activations.gelu

print(f'UNITS: {UNITS}')

UNITS: 512


In [14]:
import tensorflow
print(tensorflow.__version__)

2.14.0


In [16]:
# Tensorflow port of Transformer, done by : https://www.kaggle.com/code/markwijkhuizen/gislr-tf-data-processing-transformer-training

def scaled_dot_product(q,k,v, softmax, attention_mask):
    #calculates Q . K(transpose)
    qkt = tf.matmul(q,k,transpose_b=True)
    #caculates scaling factor
    dk = tf.math.sqrt(tf.cast(q.shape[-1],dtype=tf.float32))
    scaled_qkt = qkt/dk
    softmax = softmax(scaled_qkt, mask=attention_mask)
    
    z = tf.matmul(softmax,v)
    #shape: (m,Tx,depth), same shape as q,k,v
    return z


class MultiHeadAttention(tf.keras.layers.Layer):
    def __init__(self,d_model,num_of_heads):
        super(MultiHeadAttention,self).__init__()
        self.d_model = d_model
        self.num_of_heads = num_of_heads
        self.depth = d_model//num_of_heads
        self.wq = [tf.keras.layers.Dense(self.depth) for i in range(num_of_heads)]
        self.wk = [tf.keras.layers.Dense(self.depth) for i in range(num_of_heads)]
        self.wv = [tf.keras.layers.Dense(self.depth) for i in range(num_of_heads)]
        self.wo = tf.keras.layers.Dense(d_model)
        self.softmax = tf.keras.layers.Softmax()

    def call(self,x, attention_mask):
        
        multi_attn = []
        for i in range(self.num_of_heads):
            Q = self.wq[i](x)
            K = self.wk[i](x)
            V = self.wv[i](x)
            multi_attn.append(scaled_dot_product(Q,K,V, self.softmax, attention_mask))
            
        multi_head = tf.concat(multi_attn,axis=-1)
        multi_head_attention = self.wo(multi_head)
        return multi_head_attention

# Full Transformer
class Transformer(tf.keras.Model):
    def __init__(self, num_blocks):
        super(Transformer, self).__init__(name='transformer')
        self.num_blocks = num_blocks
    
    def build(self, input_shape):
        self.ln_1s = []
        self.mhas = []
        self.ln_2s = []
        self.mlps = []
        # Make Transformer Blocks
        for i in range(self.num_blocks):
            # Multi Head Attention
            self.mhas.append(MultiHeadAttention(UNITS, 8))
            # Multi Layer Perception
            self.mlps.append(tf.keras.Sequential([
                tf.keras.layers.Dense(UNITS * MLP_RATIO, activation=GELU, kernel_initializer=INIT_GLOROT_UNIFORM),
                tf.keras.layers.Dropout(MLP_DROPOUT_RATIO),
                tf.keras.layers.Dense(UNITS, kernel_initializer=INIT_HE_UNIFORM),
            ]))
        
    def call(self, x, attention_mask):
        # Iterate input over transformer blocks
        for mha, mlp in zip(self.mhas, self.mlps):
            x = x + mha(x, attention_mask)
            x = x + mlp(x)
    
        return x

In [None]:
from keras.layers import Dense, Embedding


def get_model():
    # Inputs
    frames = tf.keras.layers.Input([INPUT_SIZE, N_COLS, N_DIMS], dtype=tf.float32, name='frames')
    non_empty_frame_idxs = tf.keras.layers.Input([INPUT_SIZE], dtype=tf.float32, name='non_empty_frame_idxs')
    # Padding Mask
    mask0 = tf.cast(tf.math.not_equal(non_empty_frame_idxs, -1), tf.float32)
    mask0 = tf.expand_dims(mask0, axis=2)
    # Random Frame Masking
    mask = tf.where(
        (tf.random.uniform(tf.shape(mask0)) > 0.25) & tf.math.not_equal(mask0, 0.0),
        1.0,
        0.0,
    )
    # Correct Samples Which are all masked now...
    mask = tf.where(
        tf.math.equal(tf.reduce_sum(mask, axis=[1,2], keepdims=True), 0.0),
        mask0,
        mask,
    )
    
    
    """
        left_hand: 468:489
        pose: 489:522
        right_hand: 522:543
    """
    x = frames
    x = tf.slice(x, [0,0,0,0], [-1,INPUT_SIZE, N_COLS, 2])
    # LIPS
    lips = tf.slice(x, [0,0,LIPS_START,0], [-1,INPUT_SIZE, 40, 2])
    lips = tf.where(
            tf.math.equal(lips, 0.0),
            0.0,
            (lips - LIPS_MEAN) / LIPS_STD,
        )
    # LEFT HAND
    left_hand = tf.slice(x, [0,0,40,0], [-1,INPUT_SIZE, 21, 2])
    left_hand = tf.where(
            tf.math.equal(left_hand, 0.0),
            0.0,
            (left_hand - LEFT_HANDS_MEAN) / LEFT_HANDS_STD,
        )
    # POSE
    pose = tf.slice(x, [0,0,61,0], [-1,INPUT_SIZE, 5, 2])
    pose = tf.where(
            tf.math.equal(pose, 0.0),
            0.0,
            (pose - POSE_MEAN) / POSE_STD,
        )
    
    # Flatten
    lips = tf.reshape(lips, [-1, INPUT_SIZE, 40*2])
    left_hand = tf.reshape(left_hand, [-1, INPUT_SIZE, 21*2])
    pose = tf.reshape(pose, [-1, INPUT_SIZE, 5*2])
        
    # Embedding
    x = Embedding()(lips, left_hand, pose, non_empty_frame_idxs)
    
    # Encoder Transformer Blocks
    x = Transformer(NUM_BLOCKS)(x, mask)
    
    # Pooling
    x = tf.reduce_sum(x * mask, axis=1) / tf.reduce_sum(mask, axis=1)
    # Classifier Dropout
    x = tf.keras.layers.Dropout(CLASSIFIER_DROPOUT_RATIO)(x)
    # Classification Layer
    x = tf.keras.layers.Dense(NUM_CLASSES, activation=tf.keras.activations.softmax, kernel_initializer=INIT_GLOROT_UNIFORM)(x)
    
    outputs = x
    
    # Create Tensorflow Model
    model = tf.keras.models.Model(inputs=[frames, non_empty_frame_idxs], outputs=outputs)
    
    # Sparse Categorical Cross Entropy With Label Smoothing
    loss = scce_with_ls
    
    # Adam Optimizer with weight decay
    optimizer = tfa.optimizers.AdamW(learning_rate=1e-3, weight_decay=1e-5, clipnorm=1.0)
    
    # TopK Metrics
    metrics = [
        tf.keras.metrics.SparseCategoricalAccuracy(name='acc'),
        tf.keras.metrics.SparseTopKCategoricalAccuracy(k=5, name='top_5_acc'),
        tf.keras.metrics.SparseTopKCategoricalAccuracy(k=10, name='top_10_acc'),
    ]
    
    model.compile(loss=loss, optimizer=optimizer, metrics=metrics)
    
    return model