In [75]:
import torch
import torch.nn as nn
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
import cv2
import pandas as pd
import numpy as np
import os
from sklearn.metrics import accuracy_score, precision_score, recall_score


In [76]:
# Set device
DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(f"Using device: {DEVICE}")


Using device: cuda


In [77]:
# Define transforms
train_transforms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((256, 256)),
    transforms.RandomCrop((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_transforms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [78]:
class VideoDataset(Dataset):
    def __init__(self, df, num_frames=16, transform=None):
        self.df = df
        self.num_frames = num_frames
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        video_path = self.df.iloc[idx]['clip_path']
        label = self.df.iloc[idx]['encoded_label']

        # Construct absolute path if needed, assuming relative to CWD
        if not os.path.exists(video_path):
             # Try to fix path if it starts with ./
             if video_path.startswith("./"):
                 video_path = video_path[2:]
        
        cap = cv2.VideoCapture(video_path)
        frames = []
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

        if total_frames <= 0:
             print(f"Warning: Video {video_path} has 0 frames or cannot be read.")
             return torch.zeros(self.num_frames, 3, 224, 224), torch.tensor(label, dtype=torch.long)

        indices = torch.linspace(0, total_frames - 1, self.num_frames).long()

        for i in range(total_frames):
            ret, frame = cap.read()
            if not ret: break
            if i in indices:
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                if self.transform:
                    frame = self.transform(frame)
                else:
                    frame = torch.from_numpy(frame).permute(2, 0, 1).float() / 255.0
                frames.append(frame)
        cap.release()
        
        # padding if not enough frames
        while len(frames) < self.num_frames:
             if len(frames) > 0:
                frames.append(frames[-1])
             else:
                # Should not happen if total_frames > 0
                return torch.zeros(self.num_frames, 3, 224, 224), torch.tensor(label, dtype=torch.long)

        return torch.stack(frames), torch.tensor(label, dtype=torch.long)



In [79]:
def evaluate_model(model, loader):
    model.eval()
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for videos, labels in loader:
            videos, labels = videos.to(DEVICE), labels.to(DEVICE)
            outputs = model(videos)
            _, preds = torch.max(outputs, 1)
            
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            
    accuracy = accuracy_score(all_labels, all_preds)
    
    print(f"Val/Test Accuracy: {accuracy:.4f}")
    return accuracy



In [80]:
def train_model(model, train_loader, val_loader, epochs=10):
    criterion = nn.CrossEntropyLoss()
    # Optimizer for parameters that require grad
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4)
    
    # Scheduler
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=3)
    
    best_acc = 0.0

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for i, (videos, labels) in enumerate(train_loader):
            videos, labels = videos.to(DEVICE), labels.to(DEVICE)

            outputs = model(videos)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            
            if i % 10 == 0:
                print(f"Batch {i}, Loss: {loss.item():.4f}")

        epoch_loss = running_loss / len(train_loader)
        
        # Validation
        print("Validating...")
        val_acc = evaluate_model(model, val_loader)
        
        # Step scheduler
        scheduler.step(val_acc)
        
        print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss:.4f}, Val Acc: {val_acc:.4f}")

        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), 'best_model.pth')
            print(f"Saved best model with acc: {best_acc:.4f}")


In [81]:
# Load DataFrames
print("Loading DataFrames...")
train_df = pd.read_csv("./dataset/splits/train.csv", index_col='index')
val_df = pd.read_csv("./dataset/splits/validation.csv", index_col='index')
test_df = pd.read_csv("./dataset/splits/test.csv", index_col='index')
    
# Create Datasets
print("Creating Datasets...")
dataset = VideoDataset(train_df, 16, transform=train_transforms)
val_dataset = VideoDataset(val_df, 16, transform=test_transforms)
test_dataset = VideoDataset(test_df, 16, transform=test_transforms)

# Create Loaders
# Increased batch size to 4. Adjust as needed based on VRAM.
train_loader = DataLoader(dataset, batch_size=4, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, num_workers=2)

# Initialize Model
print("Initializing Model...")
model = VideoClassifier(num_classes=3).to(DEVICE)

# Train
print("Starting Training...")
train_model(model, train_loader, val_loader, epochs=10)

# Check best model on test set
print("Loading best model for testing...")
model.load_state_dict(torch.load('best_model.pth'))
print("Evaluating on Test Set...")
evaluate_model(model, test_loader)


Loading DataFrames...
Creating Datasets...
Initializing Model...
Starting Training...
Batch 0, Loss: 1.1065
Batch 10, Loss: 0.1080
Batch 20, Loss: 0.1245
Batch 30, Loss: 0.1030
Batch 40, Loss: 0.2507
Batch 50, Loss: 0.0453
Validating...
Val/Test Accuracy: 1.0000
Epoch 1/10, Loss: 0.4526, Val Acc: 1.0000
Saved best model with acc: 1.0000
Batch 0, Loss: 0.1088
Batch 10, Loss: 0.0149
Batch 20, Loss: 0.0212
Batch 30, Loss: 0.0203
Batch 40, Loss: 0.0694
Batch 50, Loss: 0.0089
Validating...
Val/Test Accuracy: 1.0000
Epoch 2/10, Loss: 0.1298, Val Acc: 1.0000
Batch 0, Loss: 0.0718
Batch 10, Loss: 0.0112
Batch 20, Loss: 0.0066
Batch 30, Loss: 0.0935
Batch 40, Loss: 0.0790
Batch 50, Loss: 0.0060
Validating...
Val/Test Accuracy: 1.0000
Epoch 3/10, Loss: 0.1678, Val Acc: 1.0000
Batch 0, Loss: 0.0360
Batch 10, Loss: 0.0617
Batch 20, Loss: 0.0239
Batch 30, Loss: 0.0655
Batch 40, Loss: 0.0708
Batch 50, Loss: 0.0034
Validating...
Val/Test Accuracy: 1.0000
Epoch 4/10, Loss: 0.1350, Val Acc: 1.0000
Batc

1.0