In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (LSTM, Dense, Dropout, BatchNormalization, 
                                     Bidirectional, LayerNormalization, GlobalAveragePooling1D)
from tensorflow.keras.callbacks import (ModelCheckpoint, EarlyStopping, 
                                       ReduceLROnPlateau, TensorBoard)
from tensorflow.keras import mixed_precision
import cv2
import numpy as np
import os
import glob
import json
import re
from sklearn.model_selection import train_test_split
from datetime import datetime

In [None]:
# Enable mixed precision training for faster computation and lower memory usage
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_global_policy(policy)
print(f'Compute dtype: {policy.compute_dtype}')
print(f'Variable dtype: {policy.variable_dtype}')

In [None]:
DATA_PATH = 'dataset/augmented'
LABEL_MAP_PATH = 'dataset/label/label_map.json'
BATCH_SIZE = 32
AUTOTUNE = tf.data.AUTOTUNE
VAL_SPLIT = 0.1
TEST_SPLIT = 0.1

# Load label mapping and get number of classes
with open(LABEL_MAP_PATH, 'r', encoding='utf-8') as f:
    label_map = json.load(f)
NUM_CLASSES = len(label_map)
print(f"Number of classes: {NUM_CLASSES}")

In [None]:
file_pattern = os.path.join(DATA_PATH, '*.npz')
all_files = glob.glob(file_pattern)
print(f"Found {len(all_files)} samples.")

def get_video_prefix(filepath):
    """Extract video prefix for stratified splitting"""
    filename = os.path.basename(filepath)
    match = re.match(r'(.+?)_(orig|aug_\d+)', filename)
    return match.group(1) if match else filename.split('.')[0]

# Stratified split to ensure balanced distribution across train/val/test
stratify_labels = [get_video_prefix(p) for p in all_files]

train_files, temp_files = train_test_split(
    all_files, test_size=VAL_SPLIT + TEST_SPLIT,
    shuffle=True, random_state=42, stratify=stratify_labels
)

stratify_temp = [get_video_prefix(p) for p in temp_files]
val_files, test_files = train_test_split(
    temp_files, test_size=TEST_SPLIT / (VAL_SPLIT + TEST_SPLIT),
    shuffle=True, random_state=42, stratify=stratify_temp
)

print(f"Train: {len(train_files)} | Val: {len(val_files)} | Test: {len(test_files)}")

In [None]:
def _load_npz(path):
    """Load sequence and label from npz file"""
    npz_path = path.decode('utf-8')
    data = np.load(npz_path)
    seq = data['sequence'].astype(np.float32)
    lbl = np.int32(data['label'])
    return seq, lbl

def apply_time_masking(seq):
    """Apply random time masking augmentation"""
    mask_length = tf.random.uniform([], 5, 15, dtype=tf.int32)
    start = tf.random.uniform([], 0, 60 - mask_length, dtype=tf.int32)
    
    mask = tf.concat([
        tf.ones([start, 201]),
        tf.zeros([mask_length, 201]),
        tf.ones([60 - start - mask_length, 201])
    ], axis=0)
    
    return seq * mask

def apply_noise(seq):
    """Add random Gaussian noise"""
    noise = tf.random.normal(tf.shape(seq), mean=0.0, stddev=0.01)
    return seq + noise

@tf.function
def augment_sequence(seq, label):
    """
    Apply data augmentation with probability control
    Uses tf.cond for proper graph execution
    """
    # Apply time masking with 30% probability
    seq = tf.cond(
        tf.random.uniform([]) < 0.3,
        lambda: apply_time_masking(seq),
        lambda: seq
    )
    
    # Apply noise with 30% probability
    seq = tf.cond(
        tf.random.uniform([]) < 0.3,
        lambda: apply_noise(seq),
        lambda: seq
    )
    
    return seq, label

def parse_fn(path):
    """Load data without augmentation"""
    seq, lbl = tf.numpy_function(
        func=_load_npz, inp=[path], Tout=[tf.float32, tf.int32]
    )
    seq.set_shape([60, 201])
    lbl.set_shape([])
    
    return seq, lbl

def make_dataset(file_list, shuffle=False, repeat=False, augment=False):
    """
    Create optimized tf.data.Dataset pipeline
    
    IMPORTANT: Cache is placed BEFORE augmentation
    - Raw data is cached (loaded once from disk)
    - Augmentation happens after cache (different each epoch)
    """
    ds = tf.data.Dataset.from_tensor_slices(file_list)
    
    if shuffle:
        ds = ds.shuffle(len(file_list), reshuffle_each_iteration=True)
    
    if repeat:
        ds = ds.repeat()
    
    # Step 1: Load raw data
    ds = ds.map(parse_fn, num_parallel_calls=AUTOTUNE)
    
    # Step 2: Cache raw data (speeds up subsequent epochs)
    ds = ds.cache()
    
    # Step 3: Apply augmentation AFTER cache (random each epoch)
    if augment:
        ds = ds.map(augment_sequence, num_parallel_calls=AUTOTUNE)
    
    if shuffle:
        ds = ds.shuffle(1000)
    
    ds = ds.batch(BATCH_SIZE, drop_remainder=True)
    ds = ds.prefetch(AUTOTUNE)
    
    return ds

In [None]:
def build_optimized_model(input_shape=(60, 201), num_classes=3315):
    inputs = tf.keras.Input(shape=input_shape)
    
    x = Bidirectional(LSTM(128, return_sequences=True, dropout=0.2))(inputs)
    x = LayerNormalization()(x)
    
    lstm_out = Bidirectional(LSTM(128, return_sequences=True, dropout=0.2))(x)
    x = LayerNormalization()(lstm_out)
    
    x_avg = GlobalAveragePooling1D()(x)
    x_max = tf.keras.layers.GlobalMaxPooling1D()(x)
    x = tf.keras.layers.Concatenate()([x_avg, x_max])
    
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.4)(x)
    x = LayerNormalization()(x)
    
    x = Dense(num_classes, name='logits')(x)
    outputs = tf.keras.layers.Activation('softmax', dtype='float32', name='predictions')(x)
    
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    return model

model = build_optimized_model(num_classes=NUM_CLASSES)
model.summary()

initial_learning_rate = 1e-4
lr_schedule = tf.keras.optimizers.schedules.CosineDecay(
    initial_learning_rate=initial_learning_rate,
    decay_steps=steps_per_epoch * 50,
    alpha=1e-6
)

optimizer = tf.keras.optimizers.Adam(
    learning_rate=lr_schedule,
    clipnorm=1.0
)

optimizer = mixed_precision.LossScaleOptimizer(optimizer)

model.compile(
    optimizer=optimizer,
    loss=tf.keras.losses.SparseCategoricalCrossentropy(label_smoothing=0.1),
    metrics=[
        'accuracy',
        tf.keras.metrics.SparseTopKCategoricalAccuracy(k=5, name='top5_accuracy')
    ]
)

print("\n" + "="*80)
print("CREATING DATASETS...")
print("="*80)

train_ds = make_dataset(train_files, shuffle=True, repeat=True, augment=True)
print("Train dataset: shuffle=True, repeat=True, augment=True")

val_ds = make_dataset(val_files, shuffle=False, repeat=False, augment=False)
print("Validation dataset: shuffle=False, repeat=False, augment=False")

test_ds = make_dataset(test_files, shuffle=False, repeat=False, augment=False)
print("Test dataset: shuffle=False, repeat=False, augment=False")

steps_per_epoch = len(train_files) // BATCH_SIZE
validation_steps = len(val_files) // BATCH_SIZE

print(f"\nSteps per epoch: {steps_per_epoch}")
print(f"Validation steps: {validation_steps}")

In [None]:
initial_learning_rate = 1e-4
lr_schedule = tf.keras.optimizers.schedules.CosineDecay(
    initial_learning_rate=initial_learning_rate,
    decay_steps=steps_per_epoch * 50,
    alpha=1e-6
)

optimizer = tf.keras.optimizers.Adam(
    learning_rate=lr_schedule,
    clipnorm=1.0
)

optimizer = mixed_precision.LossScaleOptimizer(optimizer)

model.compile(
    optimizer=optimizer,
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=[
        'accuracy',
        tf.keras.metrics.SparseTopKCategoricalAccuracy(k=5, name='top5_accuracy')
    ]
)

In [None]:
checkpoint_dir = 'models'
log_dir = f'models/logs/{datetime.now().strftime("%Y%m%d-%H%M%S")}'
os.makedirs(checkpoint_dir, exist_ok=True)
os.makedirs(log_dir, exist_ok=True)

callbacks = [
    TensorBoard(
        log_dir=log_dir,
        histogram_freq=1,
        write_graph=True,
        update_freq='epoch'
    ),
    
    ModelCheckpoint(
        filepath=os.path.join(checkpoint_dir, 'best_model.keras'),
        monitor='val_loss',
        save_best_only=True,
        save_weights_only=False,
        verbose=1
    ),
    
    EarlyStopping(
        monitor='val_loss',
        patience=15,
        restore_best_weights=True,
        verbose=1
    ),
    
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=5,
        min_lr=1e-7,
        verbose=1
    )
]

In [None]:
print("\n" + "="*80)
print("STARTING TRAINING")
print("="*80)

history = model.fit(
    train_ds,
    epochs=100,
    steps_per_epoch=steps_per_epoch,
    validation_data=val_ds,
    validation_steps=validation_steps,
    callbacks=callbacks,
    verbose=1
)

In [None]:
print("\n" + "="*80)
print("EVALUATING ON TEST SET")
print("="*80)

test_results = model.evaluate(test_ds, verbose=1)
print(f"\nTest Loss: {test_results[0]:.4f}")
print(f"Test Accuracy: {test_results[1]:.4f}")
print(f"Test Top-5 Accuracy: {test_results[2]:.4f}")

# Save final model
final_model_path = 'models/final_model.keras'
model.save(final_model_path)
print(f"\nModel saved at: {final_model_path}")
print(f"TensorBoard logs: tensorboard --logdir={log_dir}")