In [None]:
import os
import cv2
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import ReduceLROnPlateau
from sklearn.utils.class_weight import compute_class_weight

In [2]:
import torch

print("PyTorch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())

if torch.cuda.is_available():
    print("GPU Name:", torch.cuda.get_device_name(0))
    print("Current Device:", torch.cuda.current_device())
    print("Total GPU Memory (MB):", torch.cuda.get_device_properties(0).total_memory // (1024 ** 2))

    # Test simple GPU operation
    x = torch.tensor([1.0, 2.0], device="cuda")
    y = torch.tensor([3.0, 4.0], device="cuda")
    z = x + y
    print("Result from GPU:", z)
else:
    print("No GPU detected by PyTorch.")


PyTorch version: 2.6.0+cu118
CUDA available: True
GPU Name: NVIDIA GeForce RTX 3050 Laptop GPU
Current Device: 0
Total GPU Memory (MB): 4095
Result from GPU: tensor([4., 6.], device='cuda:0')


In [None]:
from ctypes import WinDLL

# Should succeed silently if DLL is found
WinDLL("cudart64_110.dll")
WinDLL("cudnn64_8.dll")


In [1]:
import tensorflow as tf
print("TensorFlow version:", tf.__version__)
print("GPU Available:", tf.config.list_physical_devices('GPU'))


TensorFlow version: 2.10.0
GPU Available: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]


In [None]:
def setup_gpu():
    gpus = tf.config.experimental.list_physical_devices('GPU')
    if gpus:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    print("✅ GPU setup complete.")
setup_gpu()

In [None]:
# Load and preprocess .mp4 videos
def load_video(video_path, max_frames=30, target_size=(224, 224)):
    cap = cv2.VideoCapture(video_path.numpy().decode("utf-8"))
    frames = []
    
    for _ in range(max_frames):
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, target_size)  # Resize for MoViNet
        frame = frame / 255.0  # Normalize pixels
        frames.append(frame)

    cap.release()

    # Pad if fewer than max_frames
    while len(frames) < max_frames:
        frames.append(np.zeros(target_size + (3,)))

    return np.array(frames, dtype=np.float32)

In [None]:
def get_class_names(data_dir):
    class_names = sorted([
        folder for folder in os.listdir(data_dir)
        if os.path.isdir(os.path.join(data_dir, folder))
    ])
    print("Label Mapping (index → class):")
    for idx, name in enumerate(class_names):
        print(f"{idx:2d} → {name}")
    return class_names


def create_dataset(directory, batch_size=8):
    video_paths = []
    labels = []

    for idx, class_name in enumerate(class_names):
        class_dir = os.path.join(directory, class_name)
        for f in os.listdir(class_dir):
            if f.endswith(".mp4"):
                video_paths.append(os.path.join(class_dir, f))
                labels.append(idx)

    dataset = tf.data.Dataset.from_tensor_slices((video_paths, labels))

    def _parse_function(filename, label):
        video = tf.py_function(load_video, [filename], tf.float32)
        video.set_shape([30, 224, 224, 3])
        return video, label

    dataset = dataset.map(_parse_function, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.shuffle(100).batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset


In [None]:
class_names = get_class_names("../Dataset/split_data/train")
num_classes = len(class_names)

train_data = create_dataset("../Dataset/split_data/train", batch_size=8)
val_data = create_dataset("../Dataset/split_data/val", batch_size=8)
test_data = create_dataset("../Dataset/split_data/test", batch_size=8)

In [None]:
# Flatten all training labels
train_labels = []
for idx, cls in enumerate(class_names):
    count = len(os.listdir(os.path.join("../Dataset/split_data/train", cls)))
    train_labels += [idx] * count

# Compute balanced class weights
weights = compute_class_weight(class_weight="balanced", classes=np.arange(len(class_names)), y=train_labels)
class_weights = {i: w for i, w in enumerate(weights)}

In [None]:
# Load MoViNet-A3 from TensorFlow Hub
model_id = 'a3'
mode = 'base'
version = '3'
model_url = f"https://tfhub.dev/tensorflow/movinet/{model_id}/{mode}/kinetics-600/classification/{version}"

# input_layer = layers.Input(shape=(30, 224, 224, 3), name="input_video")
inputs = keras.Input(shape=(30, 224, 224, 3), name="input_video")

class MoViNetFeatureExtractor(tf.keras.layers.Layer):
    def __init__(self, hub_url, trainable=False):
        super().__init__()
        self.movinet_layer = hub.KerasLayer(hub_url, trainable=trainable)

    def call(self, inputs):
        # Expecting input of shape (None, 30, 224, 224, 3)
        return self.movinet_layer({'image': inputs})

x = MoViNetFeatureExtractor(model_url,trainable=True)(inputs)

x = layers.Dense(512, activation='relu')(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(256, activation='relu')(x)
x = layers.Dropout(0.3)(x)

outputs = layers.Dense(num_classes, activation='softmax')(x)


model = keras.Model(inputs=inputs, outputs=outputs)
model.summary()

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1e-4),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

lr_callback = ReduceLROnPlateau (
    monitor='val_loss',
    factor=0.5,
    patience=3,
    verbose=1,
    min_lr=1e-6
)

In [None]:
model.fit(
    train_data,
    validation_data=val_data,
    epochs=1,
    class_weight=class_weights,
    callbacks=[lr_callback]
)

model.save("movinet_final_model.h5")

In [None]:
loss, acc = model.evaluate(test_data)
print(f"Test Accuracy: {acc * 100:.2f}%")