In [None]:
import os
import cv2
import numpy as np
from tensorflow.keras.utils import to_categorical
from kaggle.api.kaggle_api_extended import KaggleApi

# Manually set Kaggle API credentials
os.environ['KAGGLE_USERNAME'] = "ajinkyasambare25"
os.environ['KAGGLE_KEY'] = "c3db6bdc01561f6b9d5a6276873696cf"

# Authenticate Kaggle API
api = KaggleApi()
api.authenticate()

# Download dataset using Kaggle API
dataset_name = "mohamedmustafa/real-life-violence-situations-dataset"
output_dir = "Our_Dataset"

# Ensure output directory exists and download the dataset if not already present
if not os.path.exists(output_dir):
    print("Downloading dataset...")
    api.dataset_download_files(dataset_name, path=output_dir, unzip=True)
    print("Dataset downloaded and extracted successfully.")

# Define preprocessing parameters
image_height, image_width = 96, 96  # Updated to match model input shape
sequence_length = 16
class_list = ["Violence", "Non Violence"]

def frame_extraction(video_path):
    video_reader = cv2.VideoCapture(video_path)
    video_frame_count = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
    skip_frames_window = max(video_frame_count // sequence_length, 1)
    frame_list = []
    
    for frame_counter in range(sequence_length):
        video_reader.set(cv2.CAP_PROP_POS_FRAMES, skip_frames_window * frame_counter)
        success, frame = video_reader.read()
        if not success or frame is None:
            print(f"Warning: Unable to read frame {frame_counter} from {video_path}")
            break
        resized_frame = cv2.resize(frame, (image_height, image_width))
        normalized_frame = resized_frame / 255.0  # Normalize pixel values
        frame_list.append(normalized_frame)

    video_reader.release()

    # Ensure sequence length consistency
    if len(frame_list) < sequence_length:
        print(f"Warning: {video_path} has insufficient frames. Padding with last frame.")
        frame_list.extend([frame_list[-1]] * (sequence_length - len(frame_list)))

    return np.array(frame_list)

def create_dataset():
    features = []
    labels = []
    video_file_paths = []

    for class_index, class_name in enumerate(class_list):
        print(f"Processing class: {class_name}")
        class_path = os.path.join(output_dir, "Real Life Violence Dataset", class_name)
        
        if not os.path.exists(class_path):
            print(f"Directory not found: {class_path}")
            continue
        
        files_list = os.listdir(class_path)

        for file_name in files_list:
            video_file_path = os.path.join(class_path, file_name)
            frames = frame_extraction(video_file_path)
            if frames.shape[0] == sequence_length:
                features.append(frames)
                labels.append(class_index)
                video_file_paths.append(video_file_path)

    features = np.asarray(features)
    labels = np.asarray(labels)

    # Convert labels to one-hot encoding if necessary
    if len(labels.shape) == 1:
        labels = to_categorical(labels, num_classes=len(class_list))

    # Remove any unnecessary extra dimensions
    labels = np.squeeze(labels)

    # Ensure labels have the correct shape (num_samples, num_classes)
    if labels.ndim == 3:
        labels = np.reshape(labels, (labels.shape[0], labels.shape[2]))

    print("Final Labels shape:", labels.shape)  # Expected: (num_samples, 2)
    return features, labels, video_file_paths


if __name__ == "__main__":
    print("Starting feature extraction...")
    features, labels, video_file_paths = create_dataset()

    # Check extracted data shape
    print("Features shape:", features.shape)  # Expected: (num_samples, 16, 96, 96, 3)
    print("Labels shape:", labels.shape)      # Expected: (num_samples, 2)

    # Save the extracted features and labels
    os.makedirs("Features", exist_ok=True)
    np.save("Features/features_Own.npy", features)
    np.save("Features/labels_Own.npy", labels)
    np.save("Features/video_file_paths_Own.npy", video_file_paths)

    print("Feature extraction and saving completed successfully.")



Starting feature extraction...
Processing class: Violence


OpenCV: Couldn't read video stream from file "Our_Dataset/Real Life Violence Dataset/Violence/.DS_Store"


IndexError: list index out of range