In [17]:
import os
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint

In [18]:
def load_data(data_path):
    X = []
    y = []
    label_map = {}
    label_counter = 0

    for word_folder in os.listdir(data_path):
        word_path = os.path.join(data_path, word_folder)
        if os.path.isdir(word_path):
            if word_folder not in label_map:
                label_map[word_folder] = label_counter
                label_counter += 1

            for text_file in os.listdir(word_path):
                if text_file.endswith(".txt"):
                    text_file_path = os.path.join(word_path, text_file)
                    with open(text_file_path, "r") as f:
                        lines = f.readlines()
                        sequence_data = []
                        for line in lines:
                            landmark_data = list(map(float, line.strip().split(",")))
                            sequence_data.append(landmark_data)
                        X.append(sequence_data)
                        y.append(label_map[word_folder])

    X = np.array(X)
    y = np.array(y)
    return X, y, label_map


In [19]:
def create_model(input_shape, num_classes):
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(128, input_shape=input_shape, return_sequences=True),
        tf.keras.layers.LSTM(64),
        tf.keras.layers.Dense(32, activation="relu"),
        tf.keras.layers.Dense(num_classes, activation="softmax")
    ])
    model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
    return model

In [20]:
def train_model(X, y, validation_split=0.2, epochs=50, batch_size=32, log_dir="logs", model_dir="models"):
    input_shape = (X.shape[1], X.shape[2])
    num_classes = len(np.unique(y))

    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=validation_split, stratify=y)

    model = create_model(input_shape, num_classes)

    tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)
    checkpoint_callback = ModelCheckpoint(
        filepath=os.path.join(model_dir, "best_model.weights.h5"),
        save_weights_only=True,
        monitor="val_accuracy",
        mode="max",
        save_best_only=True,
        verbose=1
    )

    model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=epochs, batch_size=batch_size,
              callbacks=[tensorboard_callback, checkpoint_callback])
    return model

In [21]:
def export_model(model, model_path):
    model.save(model_path)
    print(f"Model exported to {model_path}")

In [22]:
# Example usage
data_path = "data/text_data"
model_path = "models/best_model.h5"
log_dir = "logs"
model_dir = "models"

In [23]:
X, y, label_map = load_data(data_path)

In [24]:
model = train_model(X, y, log_dir=log_dir, model_dir=model_dir)

  super().__init__(**kwargs)


Epoch 1/50
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - accuracy: 0.1594 - loss: 2.4921
Epoch 1: val_accuracy improved from -inf to 0.38500, saving model to models\best_model.weights.h5
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 42ms/step - accuracy: 0.1601 - loss: 2.4891 - val_accuracy: 0.3850 - val_loss: 1.7760
Epoch 2/50
[1m73/75[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 29ms/step - accuracy: 0.4426 - loss: 1.5031
Epoch 2: val_accuracy improved from 0.38500 to 0.64833, saving model to models\best_model.weights.h5
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 37ms/step - accuracy: 0.4454 - loss: 1.4955 - val_accuracy: 0.6483 - val_loss: 0.9506
Epoch 3/50
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - accuracy: 0.6644 - loss: 0.8804
Epoch 3: val_accuracy improved from 0.64833 to 0.71833, saving model to models\best_model.weights.h5
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

In [25]:
export_model(model, model_path)



Model exported to model.h5
