In [1]:
import cv2
import os
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import tensorflow as tf
from tensorflow.keras import layers, models

# for videos load 
def load_videos(video_dir, frame_size=(64, 64), max_frames=60):
    video_data = []
    labels = []
    
    for label_dir in os.listdir(video_dir):
        label_path = os.path.join(video_dir, label_dir)
        if os.path.isdir(label_path):
            label = label_dir
            for video_file in os.listdir(label_path):
                video_path = os.path.join(label_path, video_file)
                cap = cv2.VideoCapture(video_path)
                frames = []
                
                while len(frames) < max_frames:
                    ret, frame = cap.read()
                    if not ret:
                        break
                    frame = cv2.resize(frame, frame_size)
                    frames.append(frame)
                
                cap.release()
                
                while len(frames) < max_frames:
                    frames.append(np.zeros((frame_size[0], frame_size[1], 3), dtype=np.uint8))
                
                video_data.append(np.array(frames))
                labels.append(label)

    return np.array(video_data), np.array(labels)

video_dir = '../input/spin-ball'  
X, y = load_videos(video_dir)

# Preprocess labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

X = X / 255.0

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

#the 3D CNN model
model = models.Sequential()

model.add(layers.Conv3D(32, (3, 3, 3), activation='relu', input_shape=(60, 64, 64, 3)))
model.add(layers.MaxPooling3D((2, 2, 2)))

model.add(layers.Conv3D(64, (3, 3, 3), activation='relu'))
model.add(layers.MaxPooling3D((2, 2, 2)))

model.add(layers.Conv3D(128, (3, 3, 3), activation='relu'))
model.add(layers.MaxPooling3D((2, 2, 2)))

model.add(layers.Flatten())

model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dropout(0.5))  # Dropout for regularization

model.add(layers.Dense(len(label_encoder.classes_), activation='softmax'))

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

# Train the model
history = model.fit(X_train, y_train, epochs=20, validation_data=(X_test, y_test))

test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc}")

# classification Report
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
print(classification_report(y_test, y_pred_classes, target_names=label_encoder.classes_))

model.save('video_classification_3dcnn_model.h5')

# function to classify new videos
def classify_video(video_path, model, label_encoder, frame_size=(64, 64), max_frames=60):
    cap = cv2.VideoCapture(video_path)
    frames = []
    
    while len(frames) < max_frames:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, frame_size)
        frames.append(frame)
    
    cap.release()
    
    while len(frames) < max_frames:
        frames.append(np.zeros((frame_size[0], frame_size[1], 3), dtype=np.uint8))
    
    frames = np.array(frames) / 255.0
    frames = np.expand_dims(frames, axis=0) 
    
    predictions = model.predict(frames)
    predicted_class = np.argmax(predictions, axis=1)
    
    return label_encoder.inverse_transform(predicted_class)[0]

# example
video_path = '/kaggle/input/spin-ball/Left-arm unorthodox/71 (24).mp4' 
predicted_label = classify_video(video_path, model, label_encoder)
print(f'Predicted label: {predicted_label}')


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20


I0000 00:00:1725989965.205928     464 service.cc:145] XLA service 0x7f7bd8407390 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1725989965.205982     464 service.cc:153]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1725989965.205988     464 service.cc:153]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
2024-09-10 17:39:33.406072: E external/local_xla/xla/service/slow_operation_alarm.cc:65] Trying algorithm eng20{k2=2,k3=0} for conv (f32[32,3,3,3,3]{4,3,2,1,0}, u8[0]{0}) custom-call(f32[32,3,60,64,64]{4,3,2,1,0}, f32[32,32,58,62,62]{4,3,2,1,0}), window={size=3x3x3}, dim_labels=bf012_oi012->bf012, custom_call_target="__cudnn$convBackwardFilter", backend_config={"operation_queue_id":"0","wait_on_operation_queues":[],"cudnn_conv_backend_config":{"conv_result_scale":1,"activation_mode":"kNone","side_input_scale":0,"leakyrelu_alpha":0}} is taking a while...
2024-09-10 17:39:33.676718: E external/

[1m1/3[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m27s[0m 14s/step - accuracy: 0.6875 - loss: 0.6616

I0000 00:00:1725989977.055026     464 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 5s/step - accuracy: 0.6176 - loss: 3.3029 - val_accuracy: 0.4211 - val_loss: 0.7363
Epoch 2/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 196ms/step - accuracy: 0.4465 - loss: 0.7067 - val_accuracy: 0.5789 - val_loss: 0.6758
Epoch 3/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 195ms/step - accuracy: 0.6727 - loss: 0.6511 - val_accuracy: 0.5789 - val_loss: 0.6824
Epoch 4/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 195ms/step - accuracy: 0.6688 - loss: 0.6302 - val_accuracy: 0.5789 - val_loss: 0.6784
Epoch 5/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 192ms/step - accuracy: 0.6414 - loss: 0.6468 - val_accuracy: 0.5789 - val_loss: 0.6709
Epoch 6/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 195ms/step - accuracy: 0.6805 - loss: 0.6214 - val_accuracy: 0.5789 - val_loss: 0.6817
Epoch 7/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m