In [None]:
train_data = './data/train'
train_labels = './data/train.csv'
val_data = './data/val'
val_labels = './data/val.csv'

In [None]:
from utilities import GestureDataGenerator, plot_training_history, set_seed, get_callbacks, checks_and_balances
checks_and_balances()
set_seed(seed=42)

In [None]:
load_fraction = 1.0
batch_size = 64
image_size = (224, 224)

# Initialize the generator
train_generator = GestureDataGenerator(
    data_path=train_data,
    labels_csv=train_labels,
    batch_size=batch_size,
    image_size=image_size,
    augmentations={
        'rotation': 5,       # Rotate up to ±5 degrees
        'brightness': True,  # Random brightness adjustment
        'contrast': True,    # Random contrast adjustment
        'scaling': True,     # Random scaling (zoom)
        'translation': True, # Random translation (shift)
        'blur': True         # Apply Gaussian blur
    },    
    shuffle=True,
    load_fraction=load_fraction,
    debug=False,
    use_mediapipe=False,
    # workers=4,
    # use_multiprocessing=True,
    # max_queue_size=10
)

# # Get the first batch
X, y = train_generator[0]

# # Print outputs
print("Input batch shape (X):", X.shape)  # Expected shape: (batch_size, sequence_length, 224, 224, 3)
print("Labels batch shape (y):", y.shape)  # Expected shape: (batch_size, num_classes)
print("First label in batch (one-hot):", y[0])

val_generator = GestureDataGenerator(
    data_path=val_data,
    labels_csv=val_labels,
    batch_size=batch_size,
    image_size=image_size,
    augmentations=None,
    shuffle=False,
    load_fraction=1.0,
    debug=False,
    use_mediapipe=False,
    # workers=4,
    # use_multiprocessing=True,
    # max_queue_size=10
)

In [None]:
# Define parameters
sequence_length = train_generator.sequence_length  # Frames per video (from generator)
image_size = train_generator.image_size            # Image size (height, width)
num_classes = train_generator.num_classes          # Number of gesture classes
input_shape = (sequence_length, image_size[0], image_size[1], 3)  # input shape

In [None]:
# Updated Pretrained CNN as feature extractor
from tensorflow.keras.applications import MobileNetV2

# Pretrained CNN as feature extractor
cnn_base = MobileNetV2(weights="imagenet", include_top=False, input_shape=(image_size[0], image_size[1], 3))
cnn_base.trainable = False  # Allow fine-tuning of the top layers
cnn_base.summary()

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import TimeDistributed, GRU, Dropout, Dense, MaxPooling2D, Flatten, Input

# Updated model
model = Sequential([
    Input(shape=(30, 224, 224, 3), name="Input_Layer"),       # Input layer for sequences of 30 frames
    TimeDistributed(cnn_base, name="CNN_Layer"),              # CNN base to process each frame independently
    TimeDistributed(MaxPooling2D(), name="MaxPooling_Layer"), # Max Pooling to reduce spatial dimensions
    TimeDistributed(Flatten(), name="Flatten_Layer"),         # Flatten spatial dimensions into feature vectors
    GRU(32, return_sequences=False, name="GRU_Layer"),        # GRU for temporal feature extraction
    Dropout(0.5, name="Dropout_Layer"),                       # Dropout for regularization
    Dense(5, activation="softmax", name="Output_Layer")       # Dense layer for 5 gesture classes
], name="CNN_GRU_Model")

# Compile the model
model.compile(
    optimizer='adam',
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

# Model summary
model.summary()

In [None]:
model_save_location = 'best-models/pretrained-MobileNetV2+GRU.keras'

In [None]:
checkpoint_callback, reduce_lr_callback, early_stopping_callback = get_callbacks(filepath = model_save_location)

In [None]:
# Train the model
history_1 = model.fit(
    x=train_generator,
    validation_data=val_generator,
    epochs=30,
    callbacks=[checkpoint_callback, reduce_lr_callback, early_stopping_callback],
    verbose=1
)

plot_training_history(histories=[history_1])

In [None]:
# model.load_weights(model_save_location)

# # Train the model for more epochs
# history_2 = model.fit(
#     x=train_generator,
#     validation_data=val_generator,
#     epochs=50,                   # Train for more epochs
#     initial_epoch=30,            # Start counting previous epochs
#     callbacks=[checkpoint_callback, reduce_lr_callback, early_stopping_callback],
#     verbose=1
# )

# plot_training_history(histories=[history_1, history_2])

In [None]:
model.load_weights(model_save_location)

evaluation_results = model.evaluate(val_generator, verbose=1)

for metric, value in zip(model.metrics_names, evaluation_results):
    print(f"{metric}: {value:.4f}")