<a href="https://colab.research.google.com/github/neel26desai/data_cleaning_and_eda/blob/main/EDAVideo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

We will be loading performing EDA and cleaning the  a subset of the UCF101 dataset and build a simple classification model (classifying actions as Bowling or GolfSwing).

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [1]:
import os

def load_video_paths_and_labels(data_dir):
    video_paths = []
    labels = []
    categories = os.listdir(data_dir)

    for category in categories:
        category_path = os.path.join(data_dir, category)
        if os.path.isdir(category_path):
            for i,video_filename in enumerate(os.listdir(category_path)):
              if i<20:
                video_path = os.path.join(category_path, video_filename)
                video_paths.append(video_path)
                labels.append(category)

    return video_paths, labels


In [2]:
# Define the directory where your dataset is stored
data_dir = '/content/drive/MyDrive/UCF101'

# Load the video paths and their corresponding labels
video_paths, labels = load_video_paths_and_labels(data_dir)


In [3]:
import cv2

def print_video_info(video_path):
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    cap.release()

    print(f"Video Path: {video_path}")
    print(f"Frame Count: {frame_count}")
    print(f"Resolution: {frame_width}x{frame_height}")
    print(f"FPS: {fps}")


In [4]:
# Now iterate over each video path and print its information
for video_path in video_paths:
    print_video_info(video_path)

Video Path: /content/drive/MyDrive/UCF101/Bowling/v_Bowling_g14_c04.avi
Frame Count: 127
Resolution: 320x240
FPS: 25.0
Video Path: /content/drive/MyDrive/UCF101/Bowling/v_Bowling_g12_c02.avi
Frame Count: 105
Resolution: 320x240
FPS: 25.0
Video Path: /content/drive/MyDrive/UCF101/Bowling/v_Bowling_g16_c01.avi
Frame Count: 113
Resolution: 320x240
FPS: 25.0
Video Path: /content/drive/MyDrive/UCF101/Bowling/v_Bowling_g04_c04.avi
Frame Count: 50
Resolution: 320x240
FPS: 25.0
Video Path: /content/drive/MyDrive/UCF101/Bowling/v_Bowling_g09_c02.avi
Frame Count: 135
Resolution: 320x240
FPS: 25.0
Video Path: /content/drive/MyDrive/UCF101/Bowling/v_Bowling_g14_c03.avi
Frame Count: 129
Resolution: 320x240
FPS: 25.0
Video Path: /content/drive/MyDrive/UCF101/Bowling/v_Bowling_g12_c04.avi
Frame Count: 90
Resolution: 320x240
FPS: 25.0
Video Path: /content/drive/MyDrive/UCF101/Bowling/v_Bowling_g13_c03.avi
Frame Count: 119
Resolution: 320x240
FPS: 25.0
Video Path: /content/drive/MyDrive/UCF101/Bowling/

## Data Cleaning
In this step you would go throught eh data to make sure all files are readble and labels are consistent

In [5]:
import numpy as np

def preprocess_video(video_path, num_frames=16):
    cap = cv2.VideoCapture(video_path)
    frames = []
    try:
        for _ in range(num_frames):
            ret, frame = cap.read()
            if not ret:
                break
            frame = cv2.resize(frame, (224, 224))  # Resize to match the model input
            frame = frame / 255.0  # Normalize pixel values
            frames.append(frame)
    finally:
        cap.release()

    # If the video is shorter than `num_frames`, we pad with zeros
    if len(frames) < num_frames:
        frames += [np.zeros_like(frames[0]) for _ in range(num_frames - len(frames))]

    return np.array(frames)


In [6]:
# Assume you have a list of video paths called video_paths
# Choose the number of frames you want to extract from each video
num_frames = 20

# Preprocess all videos
preprocessed_videos = [preprocess_video(video_path, num_frames=num_frames) for video_path in video_paths]

# Now preprocessed_videos is a list where each item is a numpy array of preprocessed frames from a video


# preprocessed_video now contains the preprocessed frames from the first video


In [7]:
from tensorflow.keras.layers import Dense, LSTM, TimeDistributed, Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.models import Sequential

def create_model(num_classes):
    model = Sequential([
        # Assuming that each frame is preprocessed to be 224x224 pixels
        TimeDistributed(Conv2D(32, (3, 3), activation='relu'), input_shape=(20, 224, 224, 3)),
        TimeDistributed(MaxPooling2D(2, 2)),
        TimeDistributed(Flatten()),
        LSTM(10),
        Dense(num_classes, activation='sigmoid')
    ])

    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model


In [9]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
# Initialize the LabelEncoder

from sklearn.preprocessing import LabelEncoder

# Sample labels list

# Initialize the LabelEncoder
label_encoder = LabelEncoder()

# Fit label encoder and return encoded labels
encoded_labels = label_encoder.fit_transform(labels)
# Fit label encoder and return encoded labels
num_classes = len(np.unique(labels))  # Determine the number of unique classes

# Convert labels to categorical one-hot encoding
labels_categorical = to_categorical(encoded_labels, num_classes=num_classes)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    np.array(preprocessed_videos), labels_categorical, test_size=0.2, random_state=42
)

# Create the model with the updated number of frames (20 in your case)
model = create_model(num_classes)

# Train the model
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=5,  # Adjust the number of epochs as necessary
    batch_size=10  # Adjust the batch size as necessary
)

# Evaluate the model on the test data
scores = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {scores[1]*100}%")



Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Accuracy: 62.5%
