In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
import os
from collections import Counter
import matplotlib.pyplot as plt

In [2]:
class PTDataset(Dataset):
    def __init__(self, root_dir, target_size=(500, 500), transform=None):
        """
        Args:
            root_dir (str): Path to the dataset directory (e.g., Training folder).
            target_size (tuple): Desired output size (height, width).
            transform (callable, optional): Optional transformations (on CPU).
        """
        self.root_dir = root_dir
        self.target_size = target_size
        self.transform = transform
        self.classes = ['undetectable', 'low', 'medium', 'high']

        # Collect all file paths and labels
        self.file_list = []
        for label in self.classes:
            class_path = os.path.join(root_dir, label)
            if not os.path.exists(class_path):
                continue  # Skip if folder doesn't exist
            for file in os.listdir(class_path):
                if file.endswith('.pt'):
                    full_path = os.path.join(class_path, file)
                    class_index = self.classes.index(label)
                    self.file_list.append((full_path, class_index))

        # Pre-load everything into memory (CPU)
        self.data_list = []
        for file_path, label in self.file_list:
            # Load from disk to CPU memory
            tensor_data = torch.load(file_path, map_location='cpu')  # [7, H, W]

            # Resize on CPU
            if tensor_data.dim() == 3:
                tensor_data = tensor_data.unsqueeze(0)  # -> [1, 7, H, W]

            resized_tensor = F.interpolate(
                tensor_data,
                size=self.target_size,
                mode='bilinear',
                align_corners=False
            )

            # Optional transform
            if self.transform:
                resized_tensor = self.transform(resized_tensor)

            # Model expects input_channels=7, flatten [C=1, 7, H, W] -> [7, H, W]
            if resized_tensor.shape[0] == 1:
                resized_tensor = resized_tensor.squeeze(0)  # shape [7, H, W]

            # Store (tensor, label)
            self.data_list.append((resized_tensor, label))

    def __len__(self):
        return len(self.data_list)

    def __getitem__(self, idx):
        return self.data_list[idx]


In [None]:
def get_resnet_model(num_classes=4, input_channels=7, dropout_rate=0.1454303215712593):
    """
    Build ResNet18 with a custom first conv layer
    that expects `input_channels` and adds a Dropout layer.

    model_depth = 18 (ResNet18)
    """
    model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
    
    # Replace first conv to match your input_channels
    model.conv1 = nn.Conv2d(input_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
    
    # Replace FC layer to include Dropout before classification
    model.fc = nn.Sequential(
        nn.Dropout(p=dropout_rate),  # Dropout before final classification
        nn.Linear(model.fc.in_features, num_classes)
    )
    return model


def evaluate_model(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct, total = 0, 0

    with torch.no_grad():
        for inputs, labels in loader:
            inputs = inputs.to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True)

            # Use AMP if on GPU
            with torch.amp.autocast(device_type='cuda', enabled=(device.type == 'cuda')):
                outputs = model(inputs)
                loss = criterion(outputs, labels)

            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    avg_loss = running_loss / total
    avg_acc = 100.0 * correct / total
    return avg_loss, avg_acc


def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, device, num_epochs=25):
    """
    Basic training routine using CrossEntropyLoss
    for single-label, multi-class classification.
    """
    
    scaler = torch.amp.GradScaler(enabled=(device.type == 'cuda'))

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct, total = 0, 0

        for inputs, labels in train_loader:
            inputs = inputs.to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True)

            optimizer.zero_grad()

            
            with torch.amp.autocast(device_type='cuda', enabled=(device.type == 'cuda')):
                outputs = model(inputs)
                loss = criterion(outputs, labels)

            
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

        epoch_loss = running_loss / total
        epoch_acc = 100.0 * correct / total

        val_loss, val_acc = evaluate_model(model, val_loader, criterion, device)
        scheduler.step()

        print(f"Epoch [{epoch+1}/{num_epochs}] "
              f"Train Loss: {epoch_loss:.4f}, Train Acc: {epoch_acc:.2f}%, "
              f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")

    print("Training complete.")


train_dataset_path = 'H:/Datasets/reduced/Training/'
val_dataset_path   = 'H:/Datasets/reduced/Validation/'
test_dataset_path  = 'H:/Datasets/reduced/Testing/'


def main():
    #Check device for CUDA or CPU
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    
    train_dataset = PTDataset(root_dir=train_dataset_path, target_size=(500, 500))
    val_dataset   = PTDataset(root_dir=val_dataset_path,   target_size=(500, 500))
    test_dataset  = PTDataset(root_dir=test_dataset_path,  target_size=(500, 500))

   
    train_labels = [label for _, label in train_dataset.data_list]
    print("Labels in dataset:", set(train_labels))

    class_counts = Counter(train_labels)
    weights = [1.0 / class_counts[label] for label in train_labels]
    
    train_sampler = WeightedRandomSampler(
        weights=weights,
        num_samples=len(weights),
        replacement=True
    )

    
    use_pin_memory = (device.type == 'cuda')
    batch_size = 32 
    
    train_loader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        sampler=train_sampler,
        shuffle=False,
        num_workers=0,
        pin_memory=use_pin_memory
    )

    val_loader = DataLoader(
        val_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=0,
        pin_memory=use_pin_memory
    )

    test_loader = DataLoader(
        test_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=0,
        pin_memory=use_pin_memory
    )

    
    best_val_acc = -float('inf')
    best_model_state = None

    # Train/Evaluate 5 times
    for run_idx in range(5):
        print(f"\n=== Training Run {run_idx+1} of 5 ===")

        
        model = get_resnet_model(num_classes=4, input_channels=7)  
        model.to(device)

        criterion = nn.CrossEntropyLoss()

        learning_rate = 0.0038825206157311557
        weight_decay  = 0.0001931053552153856
        gamma_rate    = 0.9388047294838997
        
        optimizer = optim.SGD(
            model.parameters(),
            lr=learning_rate,
            weight_decay=weight_decay,
            momentum=0.9
        )

        scheduler = optim.lr_scheduler.ExponentialLR(optimizer=optimizer, gamma=gamma_rate)

        
        num_epochs = 18
        train_model(
            model=model,
            train_loader=train_loader,
            val_loader=val_loader,
            criterion=criterion,
            optimizer=optimizer,
            scheduler=scheduler,
            device=device,
            num_epochs=num_epochs
        )

        # Evaluate on validation set
        val_loss, val_acc = evaluate_model(model, val_loader, criterion, device)
        print(f"Run {run_idx+1} validation accuracy: {val_acc:.2f}%")

        # Keep track of best model so far
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_model_state = model.state_dict()
            print(f"New best model found with val_acc={val_acc:.2f}% (Run {run_idx+1}).")

    # After all 5 runs, save only the best model
    if best_model_state is not None:
        torch.save(best_model_state, 'resnet_model_Docker.pth')
        print(f"\nBest model saved with val_acc={best_val_acc:.2f}%")

        best_model = get_resnet_model(num_classes=4, input_channels=7)
        best_model.load_state_dict(best_model_state)
        best_model.to(device)

        test_loss, test_acc = evaluate_model(best_model, test_loader, criterion, device)
        print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_acc:.2f}% for the best model")

if __name__ == "__main__":
    main()


Using device: cuda
Labels in dataset: {0, 1, 2, 3}

=== Training Run 1 of 5 ===
Epoch [1/18] Train Loss: 1.2389, Train Acc: 42.55%, Val Loss: 1.4600, Val Acc: 20.41%
Epoch [2/18] Train Loss: 0.7317, Train Acc: 75.18%, Val Loss: 1.0365, Val Acc: 63.27%
Epoch [3/18] Train Loss: 0.3552, Train Acc: 86.52%, Val Loss: 0.6746, Val Acc: 73.47%


KeyboardInterrupt: 