In [1]:
pip install torch torchvision opencv-python


Note: you may need to restart the kernel to use updated packages.


In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
import os


In [7]:
import os
from sklearn.model_selection import train_test_split
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image

# Step 1: Define the Dataset Class
class VideoFrameDataset(Dataset):
    def __init__(self, frame_paths, labels, transform=None):
        self.frame_paths = frame_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.frame_paths)

    def __getitem__(self, idx):
        # Load image
        frame_path = self.frame_paths[idx]
        image = Image.open(frame_path).convert("RGB")
        
        # Apply transformations
        if self.transform:
            image = self.transform(image)
        
        # Load label
        label = self.labels[idx]
        return image, label

# Step 2: Gather all frame paths and labels
root_dir = "C:\\Users\\Rohini\\capstone_Project\\prjct2\\frames"  # Replace with your base folder
classes = {"frames_abuse": 0, "frames_arrest": 1, "frames_normal": 2}
frame_paths = []
labels = []

for class_folder, label in classes.items():
    class_path = os.path.join(root_dir, class_folder)
    if not os.path.exists(class_path):
        print(f"Warning: Directory not found: {class_path}")
        continue

    for video_folder in os.listdir(class_path):
        video_path = os.path.join(class_path, video_folder)
        if not os.path.isdir(video_path):
            continue
        
        # Collect all frames within the video folder
        for frame_file in os.listdir(video_path):
            if frame_file.endswith(".jpg"):
                frame_paths.append(os.path.join(video_path, frame_file))
                labels.append(label)





In [8]:
sequence_length = 10  # Number of frames per sequence (adjust based on your data)
frame_sequences = []  # List to store sequences of frames
sequence_labels = []  # List to store labels for each sequence

for class_folder, label in classes.items():
    class_path = os.path.join(root_dir, class_folder)
    if not os.path.exists(class_path):
        print(f"Warning: Directory not found: {class_path}")
        continue

    print(f"\nClass: {class_folder} (Label: {label})")
    for video_folder in os.listdir(class_path):
        video_path = os.path.join(class_path, video_folder)
        if not os.path.isdir(video_path):
            continue
        
        print(f"  Video Folder: {video_folder}")  # Print video folder names

        # Collect all frames within the video folder
        video_frames = [
            os.path.join(video_path, frame_file)
            for frame_file in sorted(os.listdir(video_path))
            if frame_file.endswith(".jpg")
        ]

        # Split frames into sequences
        for i in range(0, len(video_frames) - sequence_length + 1, sequence_length):
            frame_sequences.append(video_frames[i:i + sequence_length])
            sequence_labels.append(label)



Class: frames_abuse (Label: 0)
  Video Folder: Abuse001_x264
  Video Folder: Abuse002_x264
  Video Folder: Abuse003_x264
  Video Folder: Abuse004_x264
  Video Folder: Abuse005_x264
  Video Folder: Abuse006_x264
  Video Folder: Abuse007_x264
  Video Folder: Abuse008_x264
  Video Folder: Abuse009_x264
  Video Folder: Abuse010_x264

Class: frames_arrest (Label: 1)
  Video Folder: Arrest001_x264
  Video Folder: Arrest002_x264
  Video Folder: Arrest003_x264
  Video Folder: Arrest004_x264
  Video Folder: Arrest005_x264
  Video Folder: Arrest006_x264
  Video Folder: Arrest007_x264
  Video Folder: Arrest008_x264
  Video Folder: Arrest009_x264
  Video Folder: Arrest010_x264

Class: frames_normal (Label: 2)
  Video Folder: Normal_Videos_003_x264
  Video Folder: Normal_Videos_006_x264
  Video Folder: Normal_Videos_010_x264
  Video Folder: Normal_Videos_014_x264
  Video Folder: Normal_Videos_015_x264
  Video Folder: Normal_Videos_018_x264
  Video Folder: Normal_Videos_019_x264
  Video Folder: Nor

In [12]:
class VideoSequenceDataset(Dataset):
    def __init__(self, frame_sequences, labels, transform=None):
        self.frame_sequences = frame_sequences
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.frame_sequences)

    def __getitem__(self, idx):
        # Get the sequence of frame paths
        sequence_paths = self.frame_sequences[idx]
        frames = []
        for frame_path in sequence_paths:
            frame = Image.open(frame_path).convert("RGB")
            if self.transform:
                frame = self.transform(frame)
            frames.append(frame)

        # Stack frames into a tensor: (sequence_length, C, H, W)
        frames_tensor = torch.stack(frames)
        label = self.labels[idx]
        return frames_tensor, label


In [16]:
def create_sequences(frame_paths, labels, sequence_length=10):
    """
    Groups frames into sequences for temporal modeling with debug information.

    Args:
        frame_paths (list): List of frame paths.
        labels (list): Corresponding labels for each frame.
        sequence_length (int): Number of frames in each sequence.

    Returns:
        sequences (list): List of sequences (each sequence is a list of frame paths).
        sequence_labels (list): Labels corresponding to each sequence.
    """
    sequences = []
    sequence_labels = []
    video_to_frames = {}
    video_to_label = {}

    # Debug: Print initial input sizes
    print(f"Number of frame paths: {len(frame_paths)}, Number of labels: {len(labels)}")

    # Group frames by video folder
    for frame_path, label in zip(frame_paths, labels):
        video_folder = os.path.dirname(frame_path)
        if video_folder not in video_to_frames:
            video_to_frames[video_folder] = []
            video_to_label[video_folder] = label
        video_to_frames[video_folder].append(frame_path)

    # Debug: Print number of videos identified
    print(f"Number of videos: {len(video_to_frames)}")

    # Create sequences
    for video_folder, frames in video_to_frames.items():
        frames = sorted(frames)  # Ensure frames are in temporal order
        num_sequences = (len(frames) - sequence_length + 1) // sequence_length

        # Debug: Print sequence information for each video
        print(f"Video: {video_folder}, Total Frames: {len(frames)}, Sequences: {num_sequences}")

        for i in range(0, len(frames) - sequence_length + 1, sequence_length):
            sequences.append(frames[i:i + sequence_length])
            sequence_labels.append(video_to_label[video_folder])

    # Debug: Print final counts
    print(f"Total sequences created: {len(sequences)}, Total sequence labels: {len(sequence_labels)}")
    return sequences, sequence_labels


In [17]:
# Debugging the train sequence creation
print("\nCreating sequences for training set...")
frame_sequences_train, sequence_labels_train = create_sequences(frame_train, label_train, sequence_length)
print("\nSample training sequence paths:")
print(frame_sequences_train[:2])  # Print the first two sequences
print("\nSample training labels:")
print(sequence_labels_train[:2])  # Print the first two labels

# Debugging the validation sequence creation
print("\nCreating sequences for validation set...")
frame_sequences_val, sequence_labels_val = create_sequences(frame_val, label_val, sequence_length)

# Debugging the testing sequence creation
print("\nCreating sequences for testing set...")
frame_sequences_test, sequence_labels_test = create_sequences(frame_test, label_test, sequence_length)



Creating sequences for training set...
Number of frame paths: 2151, Number of labels: 2151
Number of videos: 40
Video: C:\Users\Rohini\capstone_Project\prjct2\frames\frames_abuse\Abuse006_x264, Total Frames: 90, Sequences: 8
Video: C:\Users\Rohini\capstone_Project\prjct2\frames\frames_abuse\Abuse004_x264, Total Frames: 297, Sequences: 28
Video: C:\Users\Rohini\capstone_Project\prjct2\frames\frames_normal\Normal_Videos_033_x264, Total Frames: 37, Sequences: 2
Video: C:\Users\Rohini\capstone_Project\prjct2\frames\frames_arrest\Arrest008_x264, Total Frames: 144, Sequences: 13
Video: C:\Users\Rohini\capstone_Project\prjct2\frames\frames_abuse\Abuse003_x264, Total Frames: 75, Sequences: 6
Video: C:\Users\Rohini\capstone_Project\prjct2\frames\frames_abuse\Abuse008_x264, Total Frames: 165, Sequences: 15
Video: C:\Users\Rohini\capstone_Project\prjct2\frames\frames_normal\Normal_Videos_027_x264, Total Frames: 90, Sequences: 8
Video: C:\Users\Rohini\capstone_Project\prjct2\frames\frames_normal\

In [19]:
# Create datasets and dataloaders
train_dataset = VideoSequenceDataset(frame_sequences_train, sequence_labels_train, transform)
val_dataset = VideoSequenceDataset(frame_sequences_val, sequence_labels_val, transform)
test_dataset = VideoSequenceDataset(frame_sequences_test, sequence_labels_test, transform)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)


In [21]:
# Transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Create datasets and dataloaders
train_dataset = VideoSequenceDataset(frame_sequences_train, sequence_labels_train, transform)
val_dataset = VideoSequenceDataset(frame_sequences_val, sequence_labels_val, transform)
test_dataset = VideoSequenceDataset(frame_sequences_test, sequence_labels_test, transform)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)


In [22]:





import torch
import torch.nn as nn
from torchvision import models

class TemporalModel(nn.Module):
    def __init__(self, num_classes, hidden_dim=256):
        super(TemporalModel, self).__init__()
        # Pretrained DenseNet-121 as the spatial feature extractor
        self.feature_extractor = models.densenet121(pretrained=True)
        self.feature_extractor.classifier = nn.Identity()  # Remove the classifier
        
        # Temporal model: LSTM
        self.lstm = nn.LSTM(input_size=1024, hidden_size=hidden_dim, num_layers=1, batch_first=True)
        
        # Classifier for sequence-level predictions
        self.classifier = nn.Linear(hidden_dim, num_classes)

    def forward(self, x):
        batch_size, seq_len, C, H, W = x.size()
        
        # Flatten sequences for DenseNet
        x = x.view(batch_size * seq_len, C, H, W)
        features = self.feature_extractor(x)  # Extract spatial features
        features = features.view(batch_size, seq_len, -1)  # Reshape for temporal model
        
        # Process with LSTM
        lstm_out, _ = self.lstm(features)
        output = self.classifier(lstm_out[:, -1, :])  # Use last LSTM output
        return output








































In [23]:
# Hyperparameters
num_classes = 3  # Adjust based on your dataset
hidden_dim = 256
learning_rate = 1e-4

# Initialize model
model = TemporalModel(num_classes=num_classes, hidden_dim=hidden_dim)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)




In [24]:
class EarlyStopping:
    def __init__(self, patience=3, verbose=False):
        """
        Args:
            patience (int): Number of epochs to wait before stopping if no improvement.
            verbose (bool): If True, prints messages when stopping.
        """
        self.patience = patience
        self.verbose = verbose
        self.best_accuracy = 0
        self.counter = 0
        self.early_stop = False

    def __call__(self, val_accuracy):
        if val_accuracy > self.best_accuracy:
            self.best_accuracy = val_accuracy
            self.counter = 0
        else:
            self.counter += 1
            if self.verbose:
                print(f"EarlyStopping counter: {self.counter} out of {self.patience}")
            if self.counter >= self.patience:
                self.early_stop = True


In [None]:
# Initialize early stopping
early_stopping = EarlyStopping(patience=3, verbose=True)

num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    correct_train = 0
    total_train = 0

    for frame_sequences, labels in train_loader:
        frame_sequences, labels = frame_sequences.to(device), labels.to(device)

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(frame_sequences)

        # Compute loss
        loss = criterion(outputs, labels)
        train_loss += loss.item()

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # Calculate training accuracy
        _, predicted = torch.max(outputs, 1)
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    train_accuracy = 100 * correct_train / total_train

    # Validate the model
    model.eval()
    correct_val = 0
    total_val = 0
    val_loss = 0.0
    with torch.no_grad():
        for frame_sequences, labels in val_loader:
            frame_sequences, labels = frame_sequences.to(device), labels.to(device)

            outputs = model(frame_sequences)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

    val_accuracy = 100 * correct_val / total_val

    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, "
          f"Train Accuracy: {train_accuracy:.2f}%, "
          f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%")

    # Check early stopping
    early_stopping(val_accuracy)
    if early_stopping.early_stop:
        print("Early stopping triggered. Stopping training.")
        break
