In [None]:
import os
import cv2
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.applications import MobileNetV3Small
from tensorflow.keras.layers import GRU, Dense, BatchNormalization, Flatten, TimeDistributed, GlobalAveragePooling2D
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, roc_curve, auc
import time
import pickle
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, LearningRateScheduler, ReduceLROnPlateau

from google.colab import drive

In [None]:
drive.mount('/content/drive')

In [None]:
DATASET_PATH = '/content/drive/My Drive/Shoplifting-Datasets/Shoplifting-Dataset-Merged-Balanced'
SEQUENCE_LENGTH = 30
FRAME_SIZE = (224, 224)
OUTPUT_PATH = '/content/drive/My Drive/Shoplifting-Datasets/preprocessed_data'

os.makedirs(OUTPUT_PATH, exist_ok=True)

# uniformly sample frames
def uniform_sampling(frames, num_samples):
    indices = np.linspace(0, len(frames) - 1, num_samples, dtype=int)
    return [frames[i] for i in indices]

# pad frames if a video is too short
def pad_frames(frames, num_samples):
    while len(frames) < num_samples:
        frames.append(frames[-1])
    return frames[:num_samples]

# preprocess a single video
def preprocess_video(video_path, sequence_length, frame_size):
    cap = cv2.VideoCapture(video_path)
    frames = []

    # Extract all frames
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # Convert to RGB
        frame = cv2.resize(frame, frame_size)  # Resize to target size
        frames.append(frame)

    cap.release()

    # Handle short or long videos
    if len(frames) >= sequence_length:
        frames = uniform_sampling(frames, sequence_length)
    else:
        frames = pad_frames(frames, sequence_length)

    # Normalize pixel values to [0, 1]
    frames = [frame / 255.0 for frame in frames]
    return np.array(frames)

# process and save the dataset incrementally
def process_and_save_dataset(dataset_path, sequence_length, frame_size, output_path):
    class_names = ['Normal', 'Shoplifting']

    for label, class_name in enumerate(class_names):
        class_path = os.path.join(dataset_path, class_name)
        for video_file in os.listdir(class_path):
            video_path = os.path.join(class_path, video_file)
            if not video_file.endswith(('.mp4', '.avi', '.mov', '.mkv')):
                continue

            try:
                # Preprocess the original video
                frames = preprocess_video(video_path, sequence_length, frame_size)
                video_data = [(frames, label)]

                # Save processed video incrementally
                for idx, (processed_frames, processed_label) in enumerate(video_data):
                    save_path = os.path.join(output_path, f"{class_name}_{video_file}_{idx}.pkl")
                    with open(save_path, 'wb') as file:
                        pickle.dump((processed_frames, processed_label), file)

            except Exception as e:
                print(f"Error processing video {video_path}: {e}")


print("Processing and saving dataset...")
process_and_save_dataset(DATASET_PATH, SEQUENCE_LENGTH, FRAME_SIZE, OUTPUT_PATH)
print("Dataset processing complete!")





In [None]:
import os

OUTPUT_PATH = '/content/drive/My Drive/Shoplifting-Datasets/preprocessed_data'

# Count the number of files in the directory
num_files = len([f for f in os.listdir(OUTPUT_PATH) if os.path.isfile(os.path.join(OUTPUT_PATH, f))])

print(f"Total number of files in the folder: {num_files}")



In [None]:
OUTPUT_PATH = '/content/drive/My Drive/Shoplifting-Datasets/preprocessed_data'

# Load and split the dataset into train, validation, and test sets
def load_and_split_dataset(output_path):
    data = []
    labels = []

    for file in os.listdir(output_path):
        file_path = os.path.join(output_path, file)
        with open(file_path, 'rb') as f:
            frames, label = pickle.load(f)
            data.append(frames)
            labels.append(label)

    data = np.array(data)
    labels = np.array(labels)

    X_train, X_temp, y_train, y_temp = train_test_split(data, labels, test_size=0.2, random_state=42, stratify=labels)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp)



    return (X_train, y_train), (X_val, y_val), (X_test, y_test)


print("Loading and splitting dataset...")
(X_train, y_train), (X_val, y_val), (X_test, y_test) = load_and_split_dataset(OUTPUT_PATH)
print("Loading and splitting dataset copmleted")
# Print data shapes
print(f"Training data shape: {X_train.shape}, Training labels shape: {y_train.shape}")
print(f"Validation data shape: {X_val.shape}, Validation labels shape: {y_val.shape}")
print(f"Test data shape: {X_test.shape}, Test labels shape: {y_test.shape}")

In [None]:


def save_data(data, file_name):
    with open(file_name, 'wb') as file:
        pickle.dump(data, file)


drive_save_path = '/content/drive/My Drive/Shoplifting-Datasets/processed_data_no_augmentation_splited801010'

import os
if not os.path.exists(drive_save_path):
    os.makedirs(drive_save_path)


print("Saving processed and splitted data...")
save_data((X_train, y_train), os.path.join(drive_save_path, f'train_data_{SEQUENCE_LENGTH}.pkl'))
save_data((X_val, y_val), os.path.join(drive_save_path, f'val_data_{SEQUENCE_LENGTH}.pkl'))
save_data((X_test, y_test), os.path.join(drive_save_path, f'test_data_{SEQUENCE_LENGTH}.pkl'))

print("Data saving complete!")
