In [1]:
import numpy as np 
import matplotlib.pyplot as plt 
import mediapipe as mp 
import cv2 as cv
import os 

In [2]:
file_path_Dataset='Top_Classes_Landmarks/Top_Classes_Landmarks'
file_path_Preprocessed='Top_Classes_Landmarks_Preprocessed_No_SlidingWindow_OR_Mask/Top_Classes_Landmarks_Preprocessed_No_SlidingWindow_OR_Mask'


Dataset= []
Dataset_glosses = []


for file in os.listdir(file_path_Dataset):
    if not file.endswith(".npy"):
        continue

    data = np.load(os.path.join(file_path_Dataset, file))
    label = file.split(' ')[0].lower() 

    Dataset.append(data)
    Dataset_glosses.append(label)

Dataset_preprocessed= []
Dataset_preprocessed_glosses = []



for file in os.listdir(file_path_Preprocessed):
    if not file.endswith(".npy"):
        continue

    data = np.load(os.path.join(file_path_Preprocessed, file))
    label = file.split('_')[0].lower()

    Dataset_preprocessed.append(data)
    Dataset_preprocessed_glosses.append(label)

print(len(Dataset_preprocessed), len(Dataset_preprocessed_glosses))


5076 5076


### After preprocessing 

In [3]:
from sklearn.model_selection import train_test_split

data = np.array(Dataset_preprocessed)

pose = data[:, :, 0:132]          # pose
left_hand = data[:, :, 258:321]   # left hand
right_hand = data[:, :, 321:384]  # right hand

data_no_face = np.concatenate(
    [pose, left_hand, right_hand],
    axis=-1
)

X_train_preprocessed, X_temp, y_train_preprocessed, y_temp = train_test_split(
    data_no_face,
    Dataset_preprocessed_glosses,
    test_size=0.10,   
    random_state=42
)

X_val_preprocessed, X_test_preprocessed, y_val_preprocessed, y_test_preprocessed = train_test_split(
    X_temp,
    y_temp,
    test_size=0.50,  
    random_state=42
)

In [4]:

X_train_preprocessed = np.array(X_train_preprocessed)
X_val_preprocessed = np.array(X_val_preprocessed)
X_test_preprocessed = np.array(X_test_preprocessed)
y_train_preprocessed = np.array(y_train_preprocessed)
y_val_preprocessed = np.array(y_val_preprocessed)
y_test_preprocessed = np.array(y_test_preprocessed)

In [6]:
X_train_preprocessed.shape

(4568, 157, 258)

In [7]:
from sklearn.preprocessing import LabelEncoder


# Create label encoder
le = LabelEncoder()

# Fit on training labels and transform
y_train_encoded = le.fit_transform(y_train_preprocessed)
y_val_encoded   = le.transform(y_val_preprocessed)
y_test_encoded  = le.transform(y_test_preprocessed)

In [8]:
import tensorflow as tf
from tensorflow.keras.layers import Layer


class PositionalEncoding(Layer):
    def __init__(self, max_len, d_model):
        super().__init__()
        pos = np.arange(max_len)[:, np.newaxis]
        i = np.arange(d_model)[np.newaxis, :]
        angle_rates = 1 / np.power(10000, (2 * (i // 2)) / d_model)
        angle_rads = pos * angle_rates

        angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
        angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])

        self.pos_encoding = tf.constant(angle_rads[np.newaxis, ...], dtype=tf.float32)

    def call(self, x):
        return x + self.pos_encoding[:, :tf.shape(x)[1], :]


In [9]:
from tensorflow.keras.layers import MultiHeadAttention, Dense, Dropout, LayerNormalization

class TransformerEncoderBlock(Layer):
    def __init__(self, d_model, num_heads, ff_dim, dropout=0.1):
        super().__init__()
        self.att = MultiHeadAttention(num_heads=num_heads, key_dim=d_model)
        self.ffn = tf.keras.Sequential([
            Dense(ff_dim, activation="relu"),
            Dense(d_model)
        ])
        self.norm1 = LayerNormalization(epsilon=1e-6)
        self.norm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(dropout)
        self.dropout2 = Dropout(dropout)

    def call(self, x, training=False):
        attn_output = self.att(x, x)
        x = self.norm1(x + self.dropout1(attn_output, training=training))
        ffn_output = self.ffn(x)
        return self.norm2(x + self.dropout2(ffn_output, training=training))


In [10]:
from tensorflow.keras.layers import Input, Dense, GlobalAveragePooling1D
from tensorflow.keras.models import Model

def build_signbert_encoder(
   T=157,
    D=258,
    d_model=256,
    num_heads=8,
    ff_dim=512,
    num_layers=4
):
    inputs = Input(shape=(T, D))

    # Pose embedding
    x = Dense(d_model)(inputs)

    # Positional encoding
    x = PositionalEncoding(T, d_model)(x)

    # Transformer encoder stack
    for _ in range(num_layers):
        x = TransformerEncoderBlock(d_model, num_heads, ff_dim)(x)

    return Model(inputs, x, name="SignBERT_Encoder")


In [11]:
def build_signbert_word_model(
    T=157,
    D=258,
    num_classes=132
):
    encoder = build_signbert_encoder(T, D)

    inputs = encoder.input
    x = encoder.output

    # Pool over time
    x = GlobalAveragePooling1D()(x)

    # Classification head
    x = Dense(256, activation="relu")(x)
    x = Dropout(0.3)(x)
    outputs = Dense(num_classes, activation="softmax")(x)

    model = Model(inputs, outputs)
    return model


In [12]:
model = build_signbert_word_model(
   
    num_classes=132
)

model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-4),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

model.summary()


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 157, 258)]        0         
                                                                 
 dense (Dense)               (None, 157, 256)          66304     
                                                                 
 positional_encoding (Posit  (None, 157, 256)          0         
 ionalEncoding)                                                  
                                                                 
 transformer_encoder_block   (None, 157, 256)          2367488   
 (TransformerEncoderBlock)                                       
                                                                 
 transformer_encoder_block_  (None, 157, 256)          2367488   
 1 (TransformerEncoderBlock                                      
 )                                                           

In [13]:
from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(
    monitor="val_loss",
    patience=6,
    restore_best_weights=True
)



history = model.fit(
    X_train_preprocessed,      # encoder input
    y_train_encoded,           # target word labels
    validation_data=(X_val_preprocessed, y_val_encoded),
    batch_size=32,
    epochs=50,
     callbacks=[early_stop]
)



Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50


In [14]:
test_loss, test_acc = model.evaluate(X_test_preprocessed, y_test_encoded)
print("Test accuracy:", test_acc)


Test accuracy: 0.586614191532135
