In [3]:
import os
import random
import shutil

# Function to split the data into training and testing sets
def split_ucf11_dataset(data_dir, train_dir, test_dir, train_split=0.8):
    if not os.path.exists(train_dir):
        os.makedirs(train_dir)
    if not os.path.exists(test_dir):
        os.makedirs(test_dir)
    
    classes = [d for d in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, d))]
    
    for cls in classes:
        cls_dir = os.path.join(data_dir, cls)
        videos = os.listdir(cls_dir)
        random.shuffle(videos)
        
        train_size = int(len(videos) * train_split)
        
        train_videos = videos[:train_size]
        test_videos = videos[train_size:]
        
        cls_train_dir = os.path.join(train_dir, cls)
        cls_test_dir = os.path.join(test_dir, cls)
        
        if not os.path.exists(cls_train_dir):
            os.makedirs(cls_train_dir)
        if not os.path.exists(cls_test_dir):
            os.makedirs(cls_test_dir)
        
        for video in train_videos:
            src_path = os.path.join(cls_dir, video)
            dst_path = os.path.join(cls_train_dir, video)
            shutil.move(src_path, dst_path)
        
        for video in test_videos:
            src_path = os.path.join(cls_dir, video)
            dst_path = os.path.join(cls_test_dir, video)
            shutil.move(src_path, dst_path)
        
        print(f"Class '{cls}': {len(train_videos)} videos in training, {len(test_videos)} videos in testing.")
    
    print("Data split complete.")

# Path to the directory where the dataset is extracted
extracted_data_dir = r"UCF11_updated_mpg"  # Update with your extracted data directory

# Directory to place training and testing datasets
train_path = "datasets/train"
test_path = "datasets/test"

# Split the dataset into training and testing sets
split_ucf11_dataset(extracted_data_dir, train_path, test_path)


Class 'basketball': 0 videos in training, 0 videos in testing.
Class 'biking': 0 videos in training, 0 videos in testing.
Class 'diving': 0 videos in training, 0 videos in testing.
Class 'golf_swing': 0 videos in training, 0 videos in testing.
Class 'horse_riding': 0 videos in training, 0 videos in testing.
Class 'soccer_juggling': 0 videos in training, 0 videos in testing.
Class 'swing': 0 videos in training, 0 videos in testing.
Class 'tennis_swing': 0 videos in training, 0 videos in testing.
Class 'trampoline_jumping': 0 videos in training, 0 videos in testing.
Class 'volleyball_spiking': 0 videos in training, 0 videos in testing.
Class 'walking': 0 videos in training, 0 videos in testing.
Data split complete.


In [8]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv3D, MaxPooling3D, Flatten, Dense, Dropout, LSTM, TimeDistributed

def build_model(input_shape, num_classes):
    model = Sequential()
    
    # First Conv3D Layer
    model.add(Conv3D(32, kernel_size=(3, 3, 3), activation='relu', input_shape=input_shape))
    model.add(MaxPooling3D(pool_size=(2, 2, 2)))
#     model.add(Dropout(0.2))

    # Second Conv3D Layer
    model.add(Conv3D(64, kernel_size=(3, 3, 3), activation='relu'))
    model.add(MaxPooling3D(pool_size=(2, 2, 2)))
#     model.add(Dropout(0.3))
    
    # Flatten and LSTM Layers
    model.add(TimeDistributed(Flatten()))
    model.add(LSTM(128, return_sequences=False))
    
    # Dense Layers
    model.add(Dense(256, activation='relu'))
#     model.add(Dropout(0.5))
    
    # Output Layer
    model.add(Dense(num_classes, activation='softmax'))
    
    return model

# Input shape (number of frames, height, width, channels)
input_shape = (30, 112, 112, 3)  

# Number of classes for UCF11
num_classes = 11

# Build and compile the model
model = build_model(input_shape, num_classes)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Print model summary
model.summary()


  super().__init__(


In [None]:
from sklearn.model_selection import train_test_split
import numpy as np

# Example shapes (replace with your actual data)
num_samples = 220  # Number of samples in your dataset
num_frames = 30     # Number of frames per video
height = 112        # Height of the frames
width = 112         # Width of the frames
channels = 3        # Number of color channels (RGB)

# Generate dummy data
X = np.random.rand(num_samples, num_frames, height, width, channels)
y = np.random.randint(0, 11, size=(num_samples,))  # 11 classes

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=25, batch_size=16)

model.save('video_action_recognition_model.h5')


Epoch 1/25
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 1s/step - accuracy: 0.0899 - loss: 2.4593 - val_accuracy: 0.1136 - val_loss: 2.4484
Epoch 2/25
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 1s/step - accuracy: 0.0819 - loss: 2.4270 - val_accuracy: 0.1136 - val_loss: 2.4081
Epoch 3/25
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 1s/step - accuracy: 0.1143 - loss: 2.3802 - val_accuracy: 0.0682 - val_loss: 2.4083
Epoch 4/25
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 1s/step - accuracy: 0.1191 - loss: 2.3833 - val_accuracy: 0.0682 - val_loss: 2.4122
Epoch 5/25
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 2s/step - accuracy: 0.1170 - loss: 2.3805 - val_accuracy: 0.1136 - val_loss: 2.4036
Epoch 6/25
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 1s/step - accuracy: 0.0815 - loss: 2.3761 - val_accuracy: 0.0682 - val_loss: 2.4144
Epoch 7/25
[1m11/11[0m [32m━━━━━━━━━━

In [None]:
from tensorflow.keras.models import load_model

model = load_model('video_action_recognition_model.h5')

def predict_action(video_path, model, label_map, target_size=(112, 112), max_frames=30):
    frames = preprocess_video(video_path, target_size, max_frames)
    frames = np.expand_dims(frames, axis=0)
    frames = np.expand_dims(frames, axis=-1)
    
    prediction = model.predict(frames)
    predicted_label = np.argmax(prediction)
    action = [label for label, idx in label_map.items() if idx == predicted_label][0]
    
    return action

video_path = '/path/to/new/video.mp4'
predicted_action = predict_action(video_path, model, label_map)
print(f'The predicted action is: {predicted_action}')
