In [1]:
import os
import random
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms
from torchvision.models import efficientnet_b0, EfficientNet_B0_Weights
from torch.utils.data import Dataset, DataLoader, Subset
import numpy as np
from tqdm.auto import tqdm
from torch.amp import GradScaler, autocast

In [2]:
# Device configuration
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

Using device: cuda


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# Configure for maximum GPU utilization
torch.backends.cudnn.benchmark = True
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True

## Preparing and Loading the Dataset

In [5]:
train_dir = "/content/drive/MyDrive/Colab Notebooks/SP Cup 2025/Dataset/train"
valid_dir = "/content/drive/MyDrive/Colab Notebooks/SP Cup 2025/Dataset/valid"

In [6]:
# Hyperparameters
BATCH_SIZE = 128  # Increased batch size
NUM_WORKERS = 4
PREFETCH_FACTOR = 2
SAMPLES_PER_EPOCH = 100  # Number of batches per epoch as suggested by tutor
EPOCHS = 10
#LEARNING_RATE = 0.001  # Adjusted learning rate

In [7]:
# Data transforms
transform = transforms.Compose([
    transforms.Resize((160, 160)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

## Model

In [8]:
class FrequencyBranch(nn.Module):
    def __init__(self, output_size=128, hidden_size1=512, hidden_size2=256):
        super(FrequencyBranch, self).__init__()
        input_size = 3 * 160 * 160 * 2
        self.fc1 = nn.Linear(input_size, hidden_size1)
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.fc3 = nn.Linear(hidden_size2, output_size)
        self.relu = nn.ReLU()

    def forward(self, img):
        # GPU-optimized FFT operations
        f_transform = torch.fft.fft2(img)
        f_transform_shifted = torch.fft.fftshift(f_transform)
        amplitude = torch.abs(f_transform_shifted)
        phase = torch.angle(f_transform_shifted)
        features = torch.cat((amplitude.flatten(1), phase.flatten(1)), dim=1)

        x = self.relu(self.fc1(features))
        x = self.relu(self.fc2(x))
        return self.fc3(x)

class PreTrainedBranch(nn.Module):
    def __init__(self, input_channels=3, output_features=128):
        super(PreTrainedBranch, self).__init__()
        self.efficientnet = efficientnet_b0(weights=EfficientNet_B0_Weights.IMAGENET1K_V1)
        self.efficientnet.classifier = nn.Sequential(
            nn.Dropout(p=0.2, inplace=True),
            nn.Linear(self.efficientnet.classifier[1].in_features, output_features),
        )

    def forward(self, x):
        return self.efficientnet(x)

class CombinedModel(nn.Module):
    def __init__(self):
        super(CombinedModel, self).__init__()
        self.freq_branch = FrequencyBranch(output_size=128)
        self.conv_branch = PreTrainedBranch(output_features=128)
        self.fc1 = nn.Linear(256, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        freq_output = self.freq_branch(x)
        conv_output = self.conv_branch(x)
        combined = torch.cat((freq_output, conv_output), dim=1)
        x = torch.relu(self.fc1(combined))
        x = self.dropout(x)
        x = torch.relu(self.fc2(x))
        x = self.dropout(x)
        return self.fc3(x)

## Dataloaders

In [9]:
def create_data_loaders():
    # Create datasets
    train_dataset = datasets.ImageFolder(train_dir, transform=transform)
    valid_dataset = datasets.ImageFolder(valid_dir, transform=transform)

    # Create subset indices for training
    train_indices = torch.randperm(len(train_dataset))[:SAMPLES_PER_EPOCH * BATCH_SIZE]
    train_subset = Subset(train_dataset, train_indices)

    # Create data loaders with optimized settings
    train_loader = DataLoader(
        train_subset,
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=NUM_WORKERS,
        pin_memory=True,
        prefetch_factor=PREFETCH_FACTOR,
        persistent_workers=True
    )

    valid_loader = DataLoader(
        valid_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=NUM_WORKERS,
        pin_memory=True
    )

    return train_loader, valid_loader

## Training

In [None]:
def train_model():
    print("Initializing model...")
    model = CombinedModel().to(device)

    print("Setting up optimizer and loss function...")
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     mode='min',
                                                     patience=2,
                                                     factor=0.1)
    loss_fn = nn.BCEWithLogitsLoss()
    scaler = torch.amp.GradScaler('cuda')  # Updated to fix deprecation warning

    print("Creating data loaders...")
    try:
        train_loader, valid_loader = create_data_loaders()
        print(f"Train loader length: {len(train_loader)}")
        print(f"Number of training samples: {len(train_loader.dataset)}")
    except Exception as e:
        print(f"Error in data loader creation: {e}")
        return

    print("\nStarting training loop...")
    for epoch in range(EPOCHS):
        print(f"\nEpoch {epoch+1}/{EPOCHS}")
        model.train()
        train_loss = 0
        train_acc = 0

        # Wrap the training loop in a try-except block
        try:
            for batch_idx, (data, target) in enumerate(tqdm(train_loader)):
                try:
                    # Debug print for first batch
                    if batch_idx == 0:
                        print(f"\nBatch shape: {data.shape}")
                        print(f"Target shape: {target.shape}")

                    data, target = data.to(device), target.float().to(device)

                    # Mixed precision training
                    with autocast():
                        output = model(data).squeeze()
                        loss = loss_fn(output, target)

                    # Gradient scaling and optimization
                    optimizer.zero_grad(set_to_none=True)
                    scaler.scale(loss).backward()
                    scaler.unscale_(optimizer)
                    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                    scaler.step(optimizer)
                    scaler.update()

                    # Calculate accuracy
                    with torch.no_grad():
                        pred = torch.sigmoid(output) >= 0.5
                        train_acc += pred.eq(target.view_as(pred)).sum().item()
                        train_loss += loss.item()

                    # Print progress every 10 batches
                    if batch_idx % 10 == 0:
                        print(f"\nProcessed {batch_idx * len(data)}/{len(train_loader.dataset)} samples")
                        print(f"Current batch loss: {loss.item():.4f}")

                    # Clear cache periodically
                    if batch_idx % 10 == 0:
                        torch.cuda.empty_cache()

                except Exception as e:
                    print(f"Error in batch {batch_idx}: {e}")
                    continue

            # Calculate epoch statistics
            train_loss /= len(train_loader)
            train_acc = 100. * train_acc / len(train_loader.dataset)

            print(f'\nEpoch: {epoch+1}')
            print(f'Training Loss: {train_loss:.4f}, Training Accuracy: {train_acc:.2f}%')

            # Validation
            model.eval()
            valid_loss = 0
            valid_acc = 0

            print("\nStarting validation...")
            with torch.no_grad():
                for data, target in valid_loader:
                    data, target = data.to(device), target.float().to(device)
                    output = model(data).squeeze()
                    valid_loss += loss_fn(output, target).item()
                    pred = torch.sigmoid(output) >= 0.5
                    valid_acc += pred.eq(target.view_as(pred)).sum().item()

            valid_loss /= len(valid_loader)
            valid_acc = 100. * valid_acc / len(valid_loader.dataset)

            print(f'Validation Loss: {valid_loss:.4f}, Validation Accuracy: {valid_acc:.2f}%\n')

            scheduler.step(valid_loss)

            # Save model checkpoint
            checkpoint_path = f'checkpoint_epoch_{epoch+1}.pt'
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'train_loss': train_loss,
                'valid_loss': valid_loss,
            }, checkpoint_path)
            print(f"Saved checkpoint to {checkpoint_path}")

        except Exception as e:
            print(f"Error in epoch {epoch+1}: {e}")
            continue

if __name__ == "__main__":
    try:
        # Print GPU information
        if torch.cuda.is_available():
            print(f"GPU: {torch.cuda.get_device_name(0)}")
            print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

        # Print dataset information
        print(f"\nTraining directory: {train_dir}")
        print(f"Validation directory: {valid_dir}")

        train_model()
    except Exception as e:
        print(f"Fatal error: {e}")
        raise

GPU: NVIDIA A100-SXM4-40GB
GPU Memory: 42.48 GB

Training directory: /content/drive/MyDrive/Colab Notebooks/SP Cup 2025/Dataset/train
Validation directory: /content/drive/MyDrive/Colab Notebooks/SP Cup 2025/Dataset/valid
Initializing model...
Setting up optimizer and loss function...
Creating data loaders...
Train loader length: 100
Number of training samples: 12800

Starting training loop...

Epoch 1/10


  0%|          | 0/100 [00:00<?, ?it/s]