**I think we can start here with preprocessing and building up a pipeline for classifying with a traditional model from pose estimation**

In [5]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


# Claases

- Walking
- Standing
- Fast walking / jogging?
- Running

In [7]:
# import kagglehub
# Download latest version from kagglehub:

import kagglehub

# Download latest version

path = kagglehub.dataset_download("easonlll/hmdb51")


print("Path to dataset files:", path)

Using Colab cache for faster access to the 'hmdb51' dataset.
Path to dataset files: /kaggle/input/hmdb51


# Mount to my Drive

I'm just doing this here because I got tired of downloading the data again and again each time the runtime environment resets.

In [13]:
import os
import cv2
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, models, applications
import random
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam



# Dataset and training configuration
DATASET_PATH = path + "/HMDB51"
TARGET_CLASSES = ["run", "walk", "stand"]
FRAME_COUNT = 16
IMAGE_SIZE = (160, 160)
BATCH_SIZE = 8

# Define generator

In [14]:
class VideoSequenceGenerator(tf.keras.utils.Sequence):
    """
    Generates batches of videos for training.
    Handles short videos via last-frame padding.
    Returns integer labels (sparse) for sparse_categorical_crossentropy.
    """
    def __init__(self, sample_paths, labels, batch_size=8, frame_count=16, image_size=(160,160), shuffle=True):
        self.sample_paths = sample_paths
        self.labels = labels
        self.batch_size = batch_size
        self.frame_count = frame_count
        self.image_size = image_size
        self.shuffle = shuffle
        self.indexes = np.arange(len(self.sample_paths))
        self.on_epoch_end()

    def __len__(self):
        # number of batches per epoch
        return int(np.ceil(len(self.sample_paths) / self.batch_size))

    def __getitem__(self, index):
        batch_indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        X = np.zeros((len(batch_indexes), self.frame_count, *self.image_size, 3), dtype=np.float32)
        y = np.zeros((len(batch_indexes),), dtype=np.int32)

        for i, idx in enumerate(batch_indexes):
            video_path = self.sample_paths[idx]
            frames = sorted([f for f in os.listdir(video_path) if f.lower().endswith(('.jpg','.png','.jpeg'))])

            selected = []
            while len(selected) < self.frame_count and len(frames) > 0:
                selected += frames
            selected = selected[:self.frame_count]

            # load frames
            video_frames = []
            for fname in selected:
                img = cv2.imread(os.path.join(video_path, fname))
                img = cv2.resize(img, self.image_size)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                img = tf.keras.applications.efficientnet.preprocess_input(img)
                video_frames.append(img)

            video_frames = np.array(video_frames, dtype=np.float32)

            # pad if too short
            if len(video_frames) < self.frame_count:
                last_frame = video_frames[-1] if len(video_frames)>0 else np.zeros((*self.image_size,3))
                missing = self.frame_count - len(video_frames)
                pad = np.repeat(last_frame[np.newaxis, ...], missing, axis=0)
                video_frames = np.concatenate([video_frames, pad], axis=0)

            X[i] = video_frames
            y[i] = self.labels[idx]

        return X, y

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indexes)


# Data discovery

In [15]:
# --- STEP: DATASET SCANNING / GATHERING PATHS & LABELS ---
def gather_sample_dirs(dataset_root, target_classes):
    """
    Scans the dataset directory and returns lists of sample folders and their labels.

    Args:
        dataset_root (str): Root path to dataset (contains folders for each class)
        target_classes (list of str): Classes to include, e.g., ["run", "walk", "stand"]

    Returns:
        sample_paths (np.array): Paths to each video folder
        sample_labels (np.array): Integer labels corresponding to classes
    """
    sample_paths = []
    sample_labels = []
    class_map = {cls: i for i, cls in enumerate(target_classes)}

    print(f"Scanning directory: {dataset_root}")

    for class_name in target_classes:
        class_dir = os.path.join(dataset_root, class_name)

        if not os.path.isdir(class_dir):
            print(f"Warning: Class folder '{class_name}' NOT found at {class_dir}")
            continue

        label_id = class_map[class_name]

        # Every video is stored as a folder of frames
        for video_folder in os.listdir(class_dir):
            video_path = os.path.join(class_dir, video_folder)
            if os.path.isdir(video_path):
                sample_paths.append(video_path)
                sample_labels.append(label_id)

    sample_paths = np.array(sample_paths)
    sample_labels = np.array(sample_labels)

    print(f"\n--- RESULTS ---")
    print(f"Total samples found: {len(sample_paths)}")
    if len(sample_paths) > 0:
        print(f"Example path: {sample_paths[0]}")
        print(f"Example label: {sample_labels[0]} ({target_classes[sample_labels[0]]})")

    return sample_paths, sample_labels


# --- EXECUTE THE SCAN ---
X_all, y_all = gather_sample_dirs(DATASET_PATH, TARGET_CLASSES)


Scanning directory: /kaggle/input/hmdb51/HMDB51

--- RESULTS ---
Total samples found: 934
Example path: /kaggle/input/hmdb51/HMDB51/run/THE_PROTECTOR_run_f_cm_np1_le_med_42
Example label: 0 (run)


# Train test split

In [16]:
from sklearn.model_selection import train_test_split

# Ensure we have some data
if len(X_all) == 0:
    raise ValueError("No data found. Check your DATASET_PATH and class folders.")

# Stratified split ensures each class is proportionally represented
X_train, X_val, y_train, y_val = train_test_split(
    X_all, y_all,
    test_size=0.2,
    stratify=y_all,
    random_state=42
)

print(f"Training samples: {len(X_train)}")
print(f"Validation samples: {len(X_val)}")


Training samples: 747
Validation samples: 187


# Initialize generator and check shape

In [17]:
# Initialize
train_gen = VideoSequenceGenerator(
    X_train,
    y_train,
    batch_size=BATCH_SIZE,
    frame_count=FRAME_COUNT,
    image_size=IMAGE_SIZE  # <- pass a tuple (height, width)
)

val_gen = VideoSequenceGenerator(
    X_val,
    y_val,
    batch_size=BATCH_SIZE,
    frame_count=FRAME_COUNT,
    image_size=IMAGE_SIZE
)

# --- DEBUG CHECK ---
# Grab the first batch to verify shapes
try:
    X_sample, y_sample = train_gen.__getitem__(0)
    print("Generator Check Passed!")
    print(f"Input Shape (Batch, Frames, H, W, Ch): {X_sample.shape}")
    print(f"Labels Shape: {y_sample.shape}")
except Exception as e:
    print(f"Generator Failed: {e}")

Generator Check Passed!
Input Shape (Batch, Frames, H, W, Ch): (8, 16, 160, 160, 3)
Labels Shape: (8,)


# Wrap Generators as tf.data.Dataset

In [18]:
def convert_to_tf_dataset(generator):
    """
    Wraps a Keras Sequence into a tf.data.Dataset that repeats indefinitely.
    """
    def gen():
        for i in range(len(generator)):
            yield generator[i]

    output_signature = (
        tf.TensorSpec(shape=(None, FRAME_COUNT, IMAGE_SIZE[0], IMAGE_SIZE[1], 3), dtype=tf.float32),
        tf.TensorSpec(shape=(None,), dtype=tf.int32)
    )

    ds = tf.data.Dataset.from_generator(gen, output_signature=output_signature)
    ds = ds.repeat()  # repeat indefinitely for fit()
    ds = ds.prefetch(tf.data.AUTOTUNE)
    return ds


In [19]:
train_ds = convert_to_tf_dataset(train_gen)
val_ds   = convert_to_tf_dataset(val_gen)


# Model build (pre-trained efficientnet)

In [20]:
def build_tunable_model():
    video_input = layers.Input(shape=(FRAME_COUNT, *IMAGE_SIZE, 3))
    base_cnn = applications.EfficientNetB0(
        weights='imagenet',
        include_top=False,
        input_shape=(*IMAGE_SIZE, 3)
    )

    # fine-tune top 20 layers
    base_cnn.trainable = True
    for layer in base_cnn.layers[:-20]:
        layer.trainable = False

    encoded_frames = layers.TimeDistributed(base_cnn)(video_input)
    encoded_frames = layers.TimeDistributed(layers.GlobalAveragePooling2D())(encoded_frames)
    video_summary = layers.GlobalAveragePooling1D()(encoded_frames)

    output = layers.Dense(len(TARGET_CLASSES), activation='softmax')(video_summary)
    model = models.Model(inputs=video_input, outputs=output)
    model.compile(
        optimizer=Adam(1e-4),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

model = build_tunable_model()
model.summary()


Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


# Train the model on our classes

In [22]:
from google.colab import drive
drive.mount('/content/drive')  # Mount Google Drive first

# steps per epoch = number of batches in generator
train_steps = len(train_gen)
val_steps = len(val_gen)

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=15,
    steps_per_epoch=train_steps,
    validation_steps=val_steps,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=4,
            restore_best_weights=True
        )
    ]
)

model.save("hmdb51_efficientnet.h5")
# Save the trained model to Drive
model.save("/content/drive/MyDrive/hmdb51_efficientnet.h5")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Epoch 1/15
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m805s[0m 5s/step - accuracy: 0.5275 - loss: 0.9817 - val_accuracy: 0.6043 - val_loss: 0.7801
Epoch 2/15
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 923ms/step - accuracy: 0.7297 - loss: 0.6474 - val_accuracy: 0.6578 - val_loss: 0.7294
Epoch 3/15
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 559ms/step - accuracy: 0.8096 - loss: 0.5100 - val_accuracy: 0.6791 - val_loss: 0.7165
Epoch 4/15
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 425ms/step - accuracy: 0.8739 - loss: 0.3977 - val_accuracy: 0.6738 - val_loss: 0.7261
Epoch 5/15
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 428ms/step - accuracy: 0.9246 - loss: 0.3031 - val_accuracy: 0.6631 - val_loss: 0.7498
Epoch 6/15
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37



In [None]:
from tensorflow.keras.models import load_model
model = load_model("hmdb51_efficientnet.h5")

In [None]:
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report

# --- 1. Preload all validation videos into memory ---
X_val_all = []
y_val_all = []

for i in range(len(val_gen)):
    X_batch, y_batch = val_gen[i]
    X_val_all.append(X_batch)
    y_val_all.append(y_batch)

X_val_all = np.concatenate(X_val_all, axis=0)
y_val_all = np.concatenate(y_val_all, axis=0)

# --- 2. Predict all at once ---
preds = model.predict(X_val_all, verbose=1)
y_pred = np.argmax(preds, axis=1)
y_true = y_val_all

# --- 3. Confusion matrix ---
cm = confusion_matrix(y_true, y_pred)
print("Confusion Matrix:\n", cm)

# Optional nicer display
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(6,5))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=TARGET_CLASSES, yticklabels=TARGET_CLASSES)
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Validation Confusion Matrix")
plt.show()

# Classification report
print(classification_report(y_true, y_pred, target_names=TARGET_CLASSES))


NameError: name 'val_gen' is not defined