In [1]:
import os
import cv2
import numpy as np
import random
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, LSTM, TimeDistributed
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

In [2]:
# Set random seed for reproducibility
random.seed(42)

# Define the dataset path
dataset_path = "UCF11_updated_mpg"


In [3]:
# Function to load and preprocess video frames
def load_video_frames(video_path, frame_count=30):
    cap = cv2.VideoCapture(video_path)
    frames = []
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_interval = max(1, total_frames // frame_count)
    
    for i in range(0, total_frames, frame_interval):
        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
        ret, frame = cap.read()
        if ret:
            frame = cv2.resize(frame, (64, 64))
            frames.append(frame)
        if len(frames) == frame_count:
            break
            
    cap.release()
    
    if len(frames) < frame_count:
        for _ in range(frame_count - len(frames)):
            frames.append(frames[-1])  # Pad with last frame if fewer frames
    
    return np.array(frames)

In [4]:
# Load dataset and preprocess
def load_dataset(dataset_path, frame_count=30):
    data = []
    labels = []
    
    for category in os.listdir(dataset_path):
        category_path = os.path.join(dataset_path, category)
        for video in os.listdir(category_path):
            video_path = os.path.join(category_path, video)
            frames = load_video_frames(video_path, frame_count)
            data.append(frames)
            labels.extend([category] * frame_count)  # Assign the same label to all frames in a video
    
    return np.array(data), np.array(labels)

In [5]:

# Load data and labels
data, labels = load_dataset(dataset_path)

# Normalize pixel values
data = data.astype("float32") / 255.0

# Convert labels to categorical
unique_labels = np.unique(labels)
label_map = {label: idx for idx, label in enumerate(unique_labels)}
labels = np.array([label_map[label] for label in labels])
labels = to_categorical(labels, num_classes=len(unique_labels))

# Check the original shape of data and labels
print("Original data shape:", data.shape)
print("Original labels shape:", labels.shape)

# Determine the number of samples
num_samples = len(labels) // 30

# Calculate the total number of frames
total_frames = num_samples * 30
print(f"Total frames: {total_frames}")

# Reshape data and labels
data = data.reshape((num_samples, 30, 64, 64, 3))
labels = labels.reshape((num_samples, 30, len(unique_labels)))

# Verify the reshaping
print("Reshaped data shape:", data.shape)
print("Reshaped labels shape:", labels.shape)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

Original data shape: (464, 30, 64, 64, 3)
Original labels shape: (13920, 4)
Total frames: 13920
Reshaped data shape: (464, 30, 64, 64, 3)
Reshaped labels shape: (464, 30, 4)


In [6]:
# Build a CNN-LSTM model using TimeDistributed
def build_cnn_lstm_model(input_shape, num_classes):
    model = Sequential()

    # TimeDistributed wrapper around a simple CNN
    model.add(TimeDistributed(Conv2D(32, (3, 3), activation='relu'), input_shape=input_shape))
    model.add(TimeDistributed(MaxPooling2D((2, 2))))
    model.add(TimeDistributed(Dropout(0.25)))

    model.add(TimeDistributed(Conv2D(64, (3, 3), activation='relu')))
    model.add(TimeDistributed(MaxPooling2D((2, 2))))
    model.add(TimeDistributed(Dropout(0.25)))

    model.add(TimeDistributed(Conv2D(128, (3, 3), activation='relu')))
    model.add(TimeDistributed(MaxPooling2D((2, 2))))
    model.add(TimeDistributed(Dropout(0.25)))

    # Flatten the output and feed into an LSTM
    model.add(TimeDistributed(Flatten()))
    model.add(LSTM(256, return_sequences=True))  # Return sequences for each time step
    model.add(Dropout(0.5))

    # Output layer for each time step
    model.add(TimeDistributed(Dense(num_classes, activation='softmax')))
    
    return model

# Define model parameters
input_shape = (30, 64, 64, 3)  # 30 frames of 64x64 RGB images
num_classes = len(unique_labels)

# Build and compile the model
model = build_cnn_lstm_model(input_shape, num_classes)
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])




  super().__init__(**kwargs)


In [7]:
model.summary()

In [8]:
# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=8, validation_data=(X_test, y_test))

Epoch 1/10
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m92s[0m 2s/step - accuracy: 0.3427 - loss: 1.3442 - val_accuracy: 0.6240 - val_loss: 0.9606
Epoch 2/10
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 2s/step - accuracy: 0.6792 - loss: 0.8640 - val_accuracy: 0.6233 - val_loss: 0.8440
Epoch 3/10
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 2s/step - accuracy: 0.7574 - loss: 0.6250 - val_accuracy: 0.7720 - val_loss: 0.5645
Epoch 4/10
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 2s/step - accuracy: 0.8015 - loss: 0.5229 - val_accuracy: 0.7753 - val_loss: 0.5598
Epoch 5/10
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 2s/step - accuracy: 0.8440 - loss: 0.4062 - val_accuracy: 0.8133 - val_loss: 0.4660
Epoch 6/10
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 2s/step - accuracy: 0.8523 - loss: 0.3814 - val_accuracy: 0.7606 - val_loss: 0.6015
Epoch 7/10
[1m47/47[0m [32m━━━━━━━━━━

<keras.src.callbacks.history.History at 0x24fc3b5e410>

In [9]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 319ms/step - accuracy: 0.9303 - loss: 0.3025
Test Accuracy: 92.37%
