## 0. Preparing Input for Training Model

In [1]:
import os
os.chdir("/Users/xuannguyet/Documents/SUDOCODE25/ViSL-100-words-Recognition-Project")

from src.utils.utils import update_label_map
from config.paths import label_map_path, augmented_data_dir
import json
import glob
from sklearn.model_selection import train_test_split
import numpy as np
import tensorflow as tf

# update_label_map(augmented_data_dir, label_map_path)

2025-10-29 14:04:56.109489: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
VAL_SPLIT = 0.1
TEST_SPLIT = 0.1
DATA_PATH = augmented_data_dir

with open(label_map_path, 'r', encoding='utf-8') as f:
    label_map = json.load(f)
    
labels = label_map.keys()

all_files = []
for label in labels:
    label_dir = os.path.join(DATA_PATH, label)
    for path in glob.glob(os.path.join(label_dir, '*.npy')):
        all_files.append((path, label_map[label]))

print(f"Found {len(all_files)} samples across {len(label_map)} classes.")

paths, labels = zip(*all_files)
train_files, temp_files, y_train, y_temp = train_test_split(
    paths, labels,
    test_size=VAL_SPLIT + TEST_SPLIT,
    shuffle=True, stratify=labels, random_state=42
)
val_files, test_files, y_val, y_test = train_test_split(
    temp_files, y_temp,
    test_size=TEST_SPLIT / (VAL_SPLIT + TEST_SPLIT),
    shuffle=True, stratify=y_temp, random_state=42
)

print(f"Train samples: {len(train_files)}")
print(f"Val samples: {len(val_files)}")
print(f"Test samples: {len(test_files)}")

Found 20000 samples across 100 classes.
Train samples: 16000
Val samples: 2000
Test samples: 2000


In [3]:
BATCH_SIZE = 32
AUTOTUNE = tf.data.AUTOTUNE

def _load_npy(path, label):
    data = np.load(path.decode('utf-8'), allow_pickle=True)
    frames = []
    for frame in data:
        # frame = [left_hand, right_hand, pose]
        concat = np.concatenate(frame)  # (n_keypoints_total, 3)
        frames.append(concat.flatten())  # vector 1D
    seq = np.stack(frames)  # (20, 177)
    return seq.astype(np.float32), np.int32(label)

def parse_fn(path, label):
    seq, lbl = tf.numpy_function(_load_npy, [path, label], [tf.float32, tf.int32])
    seq.set_shape([20, 177])  # ví dụ shape cố định (frames, features)
    lbl.set_shape([])
    return seq, lbl

def make_dataset(paths, labels, shuffle=False, repeat=False):
    ds = tf.data.Dataset.from_tensor_slices((list(paths), list(labels)))
    if shuffle:
        ds = ds.shuffle(len(paths), reshuffle_each_iteration=True)
    if repeat:
        ds = ds.repeat()
    ds = ds.map(parse_fn, num_parallel_calls=AUTOTUNE)
    ds = ds.batch(BATCH_SIZE).prefetch(AUTOTUNE)
    return ds

train_ds = make_dataset(train_files, y_train, shuffle=True, repeat=True)
val_ds   = make_dataset(val_files, y_val)
test_ds  = make_dataset(test_files, y_test)

steps_per_epoch = len(train_files) // BATCH_SIZE
validation_steps = len(val_files) // BATCH_SIZE

In [4]:
print(tf.data.experimental.get_structure(train_ds))

(TensorSpec(shape=(None, 20, 177), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))


In [5]:
for seq_batch, lbl_batch in train_ds.take(1):
    print("Sequence batch shape:", seq_batch.shape)
    print("Label batch shape:", lbl_batch.shape)
    print("First sequence sample:\n", seq_batch[0])
    print("First label:", lbl_batch[0].numpy())

Sequence batch shape: (32, 20, 177)
Label batch shape: (32,)
First sequence sample:
 tf.Tensor(
[[0.57240224 0.809707   0.         ... 0.4343589  0.83919317 0.        ]
 [0.5732106  0.8029092  0.         ... 0.42893115 0.8417214  0.        ]
 [0.5609208  0.76240504 0.         ... 0.43171892 0.8365464  0.        ]
 ...
 [0.52867615 0.66626966 0.         ... 0.43032545 0.86773926 0.        ]
 [0.54509044 0.7284521  0.         ... 0.42815924 0.8564395  0.        ]
 [0.56173635 0.7838249  0.         ... 0.43270388 0.8522289  0.        ]], shape=(20, 177), dtype=float32)
First label: 74


2025-10-29 14:05:57.901670: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


## 1. Build Model

In [6]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, Bidirectional,GlobalAveragePooling1D, Activation
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau


inputs = tf.keras.Input(shape=(20, 177))

# First LSTM
x = Bidirectional(LSTM(256, return_sequences=True, dropout=0.3))(inputs)
x = BatchNormalization()(x)

# Second LSTM
x = Bidirectional(LSTM(256, return_sequences=True, dropout=0.3))(x)
x = BatchNormalization()(x)

# Third LSTM
x = Bidirectional(LSTM(256, dropout=0.3))(x)
x = BatchNormalization()(x)

# Dense layers
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
x = BatchNormalization()(x)

x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
x = BatchNormalization()(x)

# Output layer
num_classes = len(np.unique(y_train))
outputs = Dense(num_classes, activation='softmax')(x)
model = tf.keras.Model(inputs=inputs, outputs=outputs)
model.summary()

In [7]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [8]:
checkpoint_dir = 'Models/checkpoints'
os.makedirs(checkpoint_dir, exist_ok=True)
checkpoint_path = os.path.join(checkpoint_dir, 'ViSL_model_v1.keras')

callbacks = [
    ModelCheckpoint(
        filepath=checkpoint_path,
        monitor='val_loss',
        save_best_only=True,
        save_weights_only=False,
        verbose=1
    ),
    EarlyStopping(
        monitor='val_loss',
        patience=5,
        restore_best_weights=True,
        verbose=1
    )
]

In [9]:
model.fit(
    train_ds,
    epochs=100,
    steps_per_epoch=steps_per_epoch,
    validation_data=val_ds,
    validation_steps=validation_steps,
    callbacks = callbacks
)

Epoch 1/100
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 772ms/step - accuracy: 0.0355 - loss: 4.5640
Epoch 1: val_loss improved from None to 2.23057, saving model to Models/checkpoints/ViSL_model_v1.keras
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m433s[0m 817ms/step - accuracy: 0.0815 - loss: 3.9480 - val_accuracy: 0.3513 - val_loss: 2.2306
Epoch 2/100
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 672ms/step - accuracy: 0.3184 - loss: 2.3600
Epoch 2: val_loss improved from 2.23057 to 1.04225, saving model to Models/checkpoints/ViSL_model_v1.keras
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m359s[0m 717ms/step - accuracy: 0.4075 - loss: 2.0243 - val_accuracy: 0.6895 - val_loss: 1.0423
Epoch 3/100
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 726ms/step - accuracy: 0.6204 - loss: 1.2457
Epoch 3: val_loss improved from 1.04225 to 0.53853, saving model to Models/checkpoints/ViSL_model_v1.keras
[1

<keras.src.callbacks.history.History at 0x1441bfd90>

## 2. Evaluate on Test set

In [11]:
model = tf.keras.models.load_model('Models/checkpoints/ViSL_model_v1.keras')

test_loss, test_acc = model.evaluate(test_ds)
print(f"Test Accuracy: {test_acc:.4f}")
print(f"Test Loss: {test_loss:.4f}")

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 268ms/step - accuracy: 1.0000 - loss: 1.3878e-04
Test Accuracy: 1.0000
Test Loss: 0.0001
