In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
import os
from collections import Counter
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

## File Naming Convention

Each `.pt` file should follow this format:

```
<label>_<replicate>.pt
```

**Examples:**
- `150_1.pt` → Label: 150 (undetectable), Replicate: 1
- `500_2.pt` → Label: 500 (low), Replicate: 2
- `7000_3.pt` → Label: 7000 (medium), Replicate: 3
- `20000_5.pt` → Label: 20000 (high), Replicate: 5



## Class Definitions for Semi-Quantitative approach 

Infers clinical decision-making based on viral load counts (assuming 1:1 sample prep
)
1. `undetectable` → Label values `< 200`
2. `low` → Label values `200 ≤ label ≤ 1000`
3. `medium` → Label values `1000 < label ≤ 10000`
4. `high` → Label values `> 10000`

# Dataset Folder Structure

Dataset is organized into the following structure to ensure proper training, validation, and testing:

```
Datasets/
│-- SemiQuant/
│   │-- Training/             # Training dataset (60% of total data)
│   │   ├── undetectable/      # Class 0 (e.g., files with labels < 200)
│   │   │   ├── 20_1.pt
│   │   │   ├── 40_3.pt
│   │   │   └── ...
│   │   ├── low/               # Class 1 (200 ≤ label ≤ 1000)
|   |   |   ├── 300_2.pt
│   │   │   ├── 600_4.pt
│   │   │   └── ...
│   │   ├── medium/            # Class 2 (1000 < label ≤ 10000)
│   │   │   ├── 2000_1.pt
│   │   │   ├── 7000_2.pt
│   │   │   └── ...
│   │   ├── high/              # Class 3 (label > 10000)
│   │   │   ├── 10000_2.pt
│   │   │   ├── 90000_2.pt
│   │   │   └── ...
│
│   │-- Validation/            # Validation dataset (20% of total data)
│   │   ├── undetectable/
│   │   │   ├── 20_2.pt
│   │   │   ├── 40_4.pt
│   │   │   └── ...
│   │   ├── low/
│   │   ├── medium/
│   │   ├── high/
│
│   │-- Testing/               # Testing dataset (20% of total data)
│   │   ├── undetectable/
│   │   │   ├── 30_1.pt
│   │   │   ├── 50_2.pt
│   │   │   └── ...
│   │   ├── low/
│   │   ├── medium/
│   │   ├── high/
│
│-- torch_tensors/              # Original .pt files before splitting
│   │   ├── 100_1.pt
│   │   ├── 200_3.pt
│   │   ├── 5000_2.pt
│   │   ├── 15000_4.pt
│   │   └── ...
```

## Folder Descriptions

- **`Training/`** – Used to train the model (60% of total data).
- **`Validation/`** – Used to validate the model during training (20% of total data).
- **`Testing/`** – Used to evaluate the model after training (20% of total data).
- **`torch_tensors/`** – Stores the original `.pt` files before they were split.



In [2]:
class PTDataset(Dataset):
    def __init__(self, root_dir, target_size=(500, 500), transform=None):
        """
        Args:
            root_dir (str): Path to the dataset directory (e.g., Training folder).
            target_size (tuple): Desired output size (height, width).
            transform (callable, optional): Optional transformations (on CPU).
        """
        self.root_dir = root_dir
        self.target_size = target_size
        self.transform = transform
        self.classes = ['undetectable', 'low', 'medium', 'high']

        # Collect all file paths and labels
        self.file_list = []
        for label in self.classes:
            class_path = os.path.join(root_dir, label)
            if not os.path.exists(class_path):
                continue  # Skip if folder doesn't exist
            for file in os.listdir(class_path):
                if file.endswith('.pt'):
                    full_path = os.path.join(class_path, file)
                    class_index = self.classes.index(label)
                    self.file_list.append((full_path, class_index))

        # Pre-load everything into memory (CPU)
        self.data_list = []
        for file_path, label in self.file_list:
            # Load from disk to CPU memory
            tensor_data = torch.load(file_path, map_location='cpu')  # [C, T, H, W]

            # Ensure enough frames
            max_frames = tensor_data.shape[1]
            selected_frame_indices = [ 49, 59, 69, 79, 89, 99, 109, 119, 129, 139, 149, 159, 169, 179]
            # selected_frame_indices = [69, 89, 109, 129, 149, 179]
            selected_frame_indices = [i for i in selected_frame_indices if i < max_frames]
            if len(selected_frame_indices) < 6:
                raise ValueError(f"Not enough frames in {file_path}, available: {max_frames}, required: 180")

            # Compute average of the first 20 frames
            avg_first_20 = torch.mean(tensor_data[:, :20, :, :], dim=1, keepdim=True)  # [C, 1, H, W]
            selected_frames = tensor_data[:, selected_frame_indices, :, :]             # [C, 6, H, W]

            # Concatenate to form a 7-frame tensor
            final_tensor = torch.cat((avg_first_20, selected_frames), dim=1)  # [C, 7, H, W]
            final_tensor = final_tensor.squeeze(0) if final_tensor.shape[0] == 1 else final_tensor

            # Resize on CPU
            if final_tensor.dim() == 3:
                # shape [7, H, W]
                final_tensor = final_tensor.unsqueeze(0)  # -> [1, 7, H, W]

            resized_tensor = F.interpolate(
                final_tensor,
                size=self.target_size,
                mode='bilinear',
                align_corners=False
            )

            # Optional transform
            if self.transform:
                resized_tensor = self.transform(resized_tensor)

            # Model expects input_channels=7, flatten [C=1, frames=7, H, W] -> [7, H, W]
            if resized_tensor.shape[0] == 1:
                resized_tensor = resized_tensor.squeeze(0)  # shape [7, H, W]

            # Store (tensor, label)
            self.data_list.append((resized_tensor, label))

    def __len__(self):
        return len(self.data_list)

    def __getitem__(self, idx):
        return self.data_list[idx]

In [6]:
def get_resnet_model(num_classes=4, input_channels=15):
    """
    Build ResNet34 with a custom first conv layer
    that expects `input_channels`.
    """
    model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
    # Replace first conv to match your input_channels
    model.conv1 = nn.Conv2d(input_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
    # Replace FC layer to match num_classes
    model.fc = nn.Linear(model.fc.in_features, num_classes)
    return model

def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, device, num_epochs=10):
    """
    Basic training routine using CrossEntropyLoss
    for single-label, multi-class classification.
    """
    scaler = torch.amp.GradScaler()

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct, total = 0, 0

        for inputs, labels in train_loader:
            inputs = inputs.to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True)

            optimizer.zero_grad()

            with torch.amp.autocast(device_type=device.type, enabled=(device.type == 'cuda')):
                outputs = model(inputs)
                loss = criterion(outputs, labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

        epoch_loss = running_loss / total
        epoch_acc = 100.0 * correct / total

        val_loss, val_acc = evaluate_model(model, val_loader, criterion, device)

        scheduler.step()

        print(f"Epoch [{epoch+1}/{num_epochs}] "
              f"Train Loss: {epoch_loss:.4f}, Train Acc: {epoch_acc:.2f}%, "
              f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")

    print("Training complete.")


def evaluate_model(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct, total = 0, 0

    with torch.no_grad():
        for inputs, labels in loader:
            inputs = inputs.to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True)

            with torch.cuda.amp.autocast(enabled=(device.type == 'cuda')):
                outputs = model(inputs)
                loss = criterion(outputs, labels)

            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    avg_loss = running_loss / total
    avg_acc = 100.0 * correct / total
    return avg_loss, avg_acc

def plot_confusion_matrix(model, loader, device, class_names):
    """
    Generates and displays a confusion matrix for the model on the given loader.
    """
    model.eval()
    all_preds, all_labels = [], []

    with torch.no_grad():
        for inputs, labels in loader:
            inputs = inputs.to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().tolist())
            all_labels.extend(labels.cpu().tolist())

    conf_matrix = confusion_matrix(all_labels, all_preds)
    disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix, display_labels=class_names)
    disp.plot(cmap=plt.cm.Blues)
    plt.title("Confusion Matrix")
    plt.show()

In [7]:
def main():
    # 1. Check device for cuda or cpu
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # 2. Define dataset paths
    train_dataset_path = 'H:/Datasets/int_split/Training/'
    val_dataset_path = 'H:/Datasets/int_split/Validation/'
    test_dataset_path = 'H:/Datasets/int_split/Testing/'

    # 3. Load datasets
    train_dataset = PTDataset(root_dir=train_dataset_path, target_size=(500, 500))
    val_dataset = PTDataset(root_dir=val_dataset_path, target_size=(500, 500))
    test_dataset = PTDataset(root_dir=test_dataset_path, target_size=(500, 500))

    # 4. Create WeightedRandomSampler for training
    train_labels = [label for _, label in train_dataset.data_list]
    class_counts = Counter(train_labels)
    weights = [1.0 / class_counts[label] for label in train_labels]

    train_sampler = WeightedRandomSampler(
        weights=weights,
        num_samples=len(weights),
        replacement=True
    )

    # 5. Create DataLoaders
    use_pin_memory = True if device.type == 'cuda' else False

    train_loader = DataLoader(
        train_dataset,
        batch_size=32,
        sampler=train_sampler,
        shuffle=False,
        num_workers=0,
        pin_memory=use_pin_memory
    )

    val_loader = DataLoader(
        val_dataset,
        batch_size=32,
        shuffle=False,
        num_workers=0,
        pin_memory=use_pin_memory
    )

    test_loader = DataLoader(
        test_dataset,
        batch_size=32,
        shuffle=False,
        num_workers=0,
        pin_memory=use_pin_memory
    )

    # 6. Build model, move to device
    model = get_resnet_model(num_classes=4, input_channels=15)
    model.to(device)

    # 7. Define loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95)


    # 8. Train
    train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, device, num_epochs=13)

    # 9. Evaluate on test set
    test_loss, test_acc = evaluate_model(model, test_loader, criterion, device)
    print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_acc:.2f}%")

    # 10. Save model
    torch.save(model.state_dict(), 'resnet_model_18_10_true_15.pth')
    print("Model saved as resnet_model.pth")


if __name__ == "__main__":
    main()

#     # If you want to plot the confusion matrix after everything is done:
#     # (Just call the function from anywhere outside `main()`.)
#     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#     class_names = ['Und.', 'Low', 'Med', 'High']

#     # Re-initialize your model, load state dict if needed, etc.
#     model = get_resnet_model(num_classes=4, input_channels=7)
#     model.load_state_dict(torch.load('resnet_model_gamma2.pth', map_location=device))
#     model.to(device)

    # # Re-create test_loader (or pass it around as a global var) # ran out of ram for this to be in the funct
    # test_dataset_path = 'M:/Datasets/int_split/Testing/'
    # test_dataset = PTDataset(root_dir=test_dataset_path, target_size=(500, 500))
    # test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)

    # Plot confusion matrix
    # plot_confusion_matrix(model, test_loader, device, class_names)

Using device: cuda


  with torch.cuda.amp.autocast(enabled=(device.type == 'cuda')):


Epoch [1/13] Train Loss: 1.3493, Train Acc: 41.84%, Val Loss: 65.4427, Val Acc: 28.57%
Epoch [2/13] Train Loss: nan, Train Acc: 72.34%, Val Loss: nan, Val Acc: 16.33%
Epoch [3/13] Train Loss: 0.5133, Train Acc: 82.98%, Val Loss: nan, Val Acc: 16.33%
Epoch [4/13] Train Loss: 0.5780, Train Acc: 81.56%, Val Loss: nan, Val Acc: 16.33%
Epoch [5/13] Train Loss: nan, Train Acc: 70.92%, Val Loss: nan, Val Acc: 16.33%
Epoch [6/13] Train Loss: nan, Train Acc: 61.70%, Val Loss: nan, Val Acc: 16.33%
Epoch [7/13] Train Loss: 0.4134, Train Acc: 80.14%, Val Loss: nan, Val Acc: 16.33%
Epoch [8/13] Train Loss: 0.2393, Train Acc: 91.49%, Val Loss: nan, Val Acc: 16.33%
Epoch [9/13] Train Loss: nan, Train Acc: 60.99%, Val Loss: nan, Val Acc: 16.33%
Epoch [10/13] Train Loss: nan, Train Acc: 85.82%, Val Loss: nan, Val Acc: 16.33%
Epoch [11/13] Train Loss: nan, Train Acc: 76.60%, Val Loss: nan, Val Acc: 16.33%
Epoch [12/13] Train Loss: nan, Train Acc: 75.18%, Val Loss: nan, Val Acc: 16.33%
Epoch [13/13] Trai

In [5]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# Ensure the model is in evaluation mode
model.eval()

# Move model to appropriate device (CPU/GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Collect all true labels and predictions
all_preds = []
all_labels = []

with torch.no_grad():
    for inputs, labels in test_loader:  
        inputs, labels = inputs.to(device), labels.to(device)

        outputs = model(inputs)

        _, preds = torch.max(outputs, 1)

        all_preds.extend(preds.cpu().numpy()) 
        all_labels.extend(labels.cpu().numpy())  

conf_matrix = confusion_matrix(all_labels, all_preds)

class_names = ['Und.', 'Low', 'Med', 'High']  


disp = ConfusionMatrixDisplay(conf_matrix, display_labels=class_names)
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix")
plt.show()


NameError: name 'model' is not defined

# Effect of epoch, gamma, and Resnet complexity


In [3]:
import csv
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, WeightedRandomSampler
from collections import Counter

def get_resnet_model(model_depth=34, num_classes=4, input_channels=15):
    """Returns a ResNet model with the specified depth (18, 34, 50)."""
    if model_depth == 18:
        model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
    elif model_depth == 34:
        model = models.resnet34(weights=models.ResNet34_Weights.DEFAULT)
    elif model_depth == 50:
        model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
    else:
        raise ValueError("Invalid ResNet depth. Choose from 18, 34, or 50.")

    # Modify first convolution layer to handle 7 input channels
    model.conv1 = nn.Conv2d(input_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
    model.fc = nn.Linear(model.fc.in_features, num_classes)
    
    return model

def run_experiment(
    train_dataset,
    val_dataset,
    test_dataset,
    model_depth=34,
    use_gamma_scheduler=True,
    num_epochs=10,
    batch_size=32,
    experiment_name="resnet_experiments.csv"
):
    """Runs an experiment and logs per-epoch loss & accuracy to CSV."""
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Weighted Sampler for class balancing
    train_labels = [label for _, label in train_dataset.data_list]
    class_counts = Counter(train_labels)
    weights = [1.0 / class_counts[label] for label in train_labels]
    train_sampler = WeightedRandomSampler(weights=weights, num_samples=len(weights), replacement=True)

    # DataLoaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler, num_workers=0, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

    # Initialize Model
    model = get_resnet_model(model_depth=model_depth).to(device)

    # Define Loss Function, Optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Optionally apply Gamma Scheduler
    scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95) if use_gamma_scheduler else None

    # CSV Header (if file doesn't exist)
    file_exists = os.path.isfile(experiment_name)
    with open(experiment_name, mode='a', newline='') as file:
        writer = csv.writer(file)
        if not file_exists:
            writer.writerow([
                "Epoch", "ResNet Depth", "Gamma Scheduler", "Batch Size", "Train Loss", 
                "Train Accuracy", "Val Loss", "Val Accuracy", "Model Parameters"
            ])

    # Training Loop
    for epoch in range(num_epochs):
        model.train()
        running_loss, correct, total = 0.0, 0, 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

        train_loss = running_loss / total
        train_acc = 100.0 * correct / total

        # Validation
        val_loss, val_acc = evaluate_model(model, val_loader, criterion, device)

        if scheduler:
            scheduler.step()

        print(f"Epoch {epoch+1}: Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")

        # Log epoch data to CSV
        with open(experiment_name, mode='a', newline='') as file:
            writer = csv.writer(file)
            writer.writerow([
                epoch + 1, model_depth, use_gamma_scheduler, batch_size, 
                train_loss, train_acc, val_loss, val_acc, 
                sum(p.numel() for p in model.parameters())
            ])

    print(f"Experiment completed and results saved to {experiment_name}")
    return model

# Function to Evaluate Model
def evaluate_model(model, loader, criterion, device):
    model.eval()
    running_loss, correct, total = 0.0, 0, 0

    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    avg_loss = running_loss / total
    avg_acc = 100.0 * correct / total
    return avg_loss, avg_acc


In [5]:
train_dataset_path = 'H:/Datasets/int_split/Training/'
val_dataset_path = 'H:/Datasets/int_split/Validation/'
test_dataset_path = 'H:/Datasets/int_split/Testing/'

train_dataset = PTDataset(root_dir=train_dataset_path, target_size=(500, 500))
val_dataset = PTDataset(root_dir=val_dataset_path, target_size=(500, 500))
test_dataset = PTDataset(root_dir=test_dataset_path, target_size=(500, 500))

# # Experiment 1: ResNet-34, No gamma scheduler, 10 epochs
# run_experiment(
#     train_dataset, val_dataset, test_dataset,
#     model_depth=34,
#     use_gamma_scheduler=False,
#     num_epochs=10,
#     batch_size=32,
#     experiment_name="resnet_experiments.csv"
# )

# # Experiment 2: ResNet-34, No gamma scheduler, 10 epochs
# run_experiment(
#     train_dataset, val_dataset, test_dataset,
#     model_depth=34,
#     use_gamma_scheduler=False,
#     num_epochs=10,
#     batch_size=32,
#     experiment_name="resnet_experiments.csv"
# )

# # Experiment 3: ResNet-34, Gamma scheduler, 10 epochs
# run_experiment(
#     train_dataset, val_dataset, test_dataset,
#     model_depth=34,
#     use_gamma_scheduler= True,
#     num_epochs=10,
#     batch_size=32,
#     experiment_name="resnet_experiments.csv"
# )

# # Experiment 4: ResNet-34, Gamma scheduler, 10 epochs
# run_experiment(
#     train_dataset, val_dataset, test_dataset,
#     model_depth=34,
#     use_gamma_scheduler= True,
#     num_epochs=10,
#     batch_size=32,
#     experiment_name="resnet_experiments.csv"
# )

# # Experiment 5: ResNet-34, Gamma scheduler, 13 epochs
# run_experiment(
#     train_dataset, val_dataset, test_dataset,
#     model_depth=34,
#     use_gamma_scheduler= True,
#     num_epochs=13,
#     batch_size=32,
#     experiment_name="resnet_experiments.csv"
# )

# # Experiment 6: ResNet-34, Gamma scheduler, 13 epochs
# run_experiment(
#     train_dataset, val_dataset, test_dataset,
#     model_depth=34,
#     use_gamma_scheduler= True,
#     num_epochs=13,
#     batch_size=32,
#     experiment_name="resnet_experiments.csv"
# )

# # Experiment 7: ResNet-34, Gamma scheduler, 30 epochs
# run_experiment(
#     train_dataset, val_dataset, test_dataset,
#     model_depth=34,
#     use_gamma_scheduler= True,
#     num_epochs=30,
#     batch_size=32,
#     experiment_name="resnet_experiments.csv"
# )

# # Experiment 8: ResNet-34, Gamma scheduler, 30 epochs
# run_experiment(
#     train_dataset, val_dataset, test_dataset,
#     model_depth=34,
#     use_gamma_scheduler= True,
#     num_epochs=30,
#     batch_size=32,
#     experiment_name="resnet_experiments.csv"
# )

# # Experiment 9: ResNet-18, No amma scheduler, 10 epochs
# run_experiment(
#     train_dataset, val_dataset, test_dataset,
#     model_depth=18,
#     use_gamma_scheduler= False,
#     num_epochs=10,
#     batch_size=32,
#     experiment_name="resnet_experiments.csv"
# )

# # Experiment 10: ResNet-18, No amma scheduler, 10 epochs
# run_experiment(
#     train_dataset, val_dataset, test_dataset,
#     model_depth=18,
#     use_gamma_scheduler= False,
#     num_epochs=10,
#     batch_size=32,
#     experiment_name="resnet_experiments.csv"
# )

# # Experiment 11: ResNet-50, No gamma scheduler, 10 epochs
# run_experiment(
#     train_dataset, val_dataset, test_dataset,
#     model_depth=50,
#     use_gamma_scheduler=False,
#     num_epochs=20,
#     batch_size=32,
#     experiment_name="resnet_experiments.csv"
# )

# # Experiment 12: ResNet-50, No gamma scheduler, 10 epochs
# run_experiment(
#     train_dataset, val_dataset, test_dataset,
#     model_depth=50,
#     use_gamma_scheduler=False,
#     num_epochs=20,
#     batch_size=32,
#     experiment_name="resnet_experiments.csv"
# 
# Experiment 13: ResNet-50, No gamma scheduler, 10 epochs
# run_experiment(
#     train_dataset, val_dataset, test_dataset,
#     model_depth=50,
#     use_gamma_scheduler=False,
#     num_epochs=10,
#     batch_size=32,
#     experiment_name="resnet_experiments.csv"
# )

# # Experiment 14: ResNet-50, No gamma scheduler, 10 epochs
# run_experiment(
#     train_dataset, val_dataset, test_dataset,
#     model_depth=50,
#     use_gamma_scheduler=False,
#     num_epochs=10,
#     batch_size=32,
#     experiment_name="resnet_experiments.csv"
# )

# Experiment 15: ResNet-18, Gamma scheduler, 13 epochs
# run_experiment(
#     train_dataset, val_dataset, test_dataset,
#     model_depth=18,
#     use_gamma_scheduler=True,
#     num_epochs=10,
#     batch_size=13,
#     experiment_name="resnet_experiments.csv"
# )

# # Experiment 16: ResNet-18, Gamma scheduler, 13 epochs
# run_experiment(
#     train_dataset, val_dataset, test_dataset,
#     model_depth=18,
#     use_gamma_scheduler=True,
#     num_epochs=13,
#     batch_size=32,
#     experiment_name="resnet_experiments.csv"
# )

# # Experiment 17: ResNet-18, Gamma scheduler, 30 epochs
# run_experiment(
#     train_dataset, val_dataset, test_dataset,
#     model_depth=18,
#     use_gamma_scheduler=True,
#     num_epochs=30,
#     batch_size=32,
#     experiment_name="resnet_experiments.csv"
# )

# # Experiment 18: ResNet-18, Gamma scheduler, 30 epochs
# run_experiment(
#     train_dataset, val_dataset, test_dataset,
#     model_depth=18,
#     use_gamma_scheduler=True,
#     num_epochs=30,
#     batch_size=32,
#     experiment_name="resnet_experiments.csv"
# )

#  # Experiment 18: ResNet-18, Gamma scheduler, 30 epochs
# run_experiment(
#     train_dataset, val_dataset, test_dataset,
#     model_depth=18,
#     use_gamma_scheduler=True,
#     num_epochs=10,
#     batch_size=32,
#     experiment_name="resnet_experiments.csv"
# )

# # Experiment 18: ResNet-18, Gamma scheduler, 30 epochs
# run_experiment(
#     train_dataset, val_dataset, test_dataset,
#     model_depth=34,
#     use_gamma_scheduler=True,
#     num_epochs=10,
#     batch_size=32,
#     experiment_name="resnet_experiments.csv"
# )



Epoch 1: Train Loss: 0.8138, Train Acc: 66.67%, Val Loss: 13.0109, Val Acc: 40.82%
Epoch 2: Train Loss: 0.8864, Train Acc: 81.56%, Val Loss: 11.9270, Val Acc: 28.57%
Epoch 3: Train Loss: 0.4786, Train Acc: 82.98%, Val Loss: 4.5633, Val Acc: 48.98%
Epoch 4: Train Loss: 0.4273, Train Acc: 84.40%, Val Loss: 3.7922, Val Acc: 61.22%
Epoch 5: Train Loss: 0.2020, Train Acc: 95.04%, Val Loss: 1.6051, Val Acc: 73.47%
Epoch 6: Train Loss: 0.2837, Train Acc: 91.49%, Val Loss: 0.9432, Val Acc: 67.35%
Epoch 7: Train Loss: 0.1395, Train Acc: 95.04%, Val Loss: 1.4652, Val Acc: 46.94%
Epoch 8: Train Loss: 0.1921, Train Acc: 94.33%, Val Loss: 0.7314, Val Acc: 71.43%
Epoch 9: Train Loss: 0.1287, Train Acc: 95.74%, Val Loss: 0.5030, Val Acc: 89.80%
Epoch 10: Train Loss: 0.0314, Train Acc: 100.00%, Val Loss: 0.6346, Val Acc: 85.71%
Experiment completed and results saved to resnet_experiments.csv
Epoch 1: Train Loss: 1.2613, Train Acc: 49.65%, Val Loss: 78.5285, Val Acc: 34.69%
Epoch 2: Train Loss: 0.6463,

ResNet(
  (conv1): Conv2d(15, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
 

# Hyperparameter sweep

In [None]:
import os
import json
import torch
import torch.nn as nn
import torch.optim as optim
from itertools import product
from torch.utils.data import DataLoader, WeightedRandomSampler
from collections import Counter

def hyperparameter_search(output_dir, hyperparams, train_dataset, val_dataset, test_dataset):
    """
    Perform a grid search over the given hyperparameters.

    Args:
        output_dir (str): Directory to save results
        hyperparams (dict): Dictionary of hyperparameters to search
        train_dataset, val_dataset, test_dataset: Datasets for training, validation, and testing
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # Get all hyperparameter combinations
    keys, values = zip(*hyperparams.items())
    param_combinations = [dict(zip(keys, v)) for v in product(*values)]
    
    for params in param_combinations:
        model_depth = params['model_depth']
        batch_size = params['batch_size']
        learning_rate = params['learning_rate']
        weight_decay = params['weight_decay']
        dropout = params['dropout']
        gamma = params['gamma']
        gamma_rate = params['gamma_rate']
        num_epochs = params['num_epochs']
        optimizer_type = params['optimizer']
        
        # Weighted sampler for balancing dataset
        train_labels = [label for _, label in train_dataset.data_list]
        class_counts = Counter(train_labels)
        weights = [1.0 / class_counts[label] for label in train_labels]
        train_sampler = WeightedRandomSampler(weights=weights, num_samples=len(weights), replacement=True)
        
        # DataLoaders
        train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler, num_workers=0, pin_memory=True)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)
        
        # Initialize model
        model = get_resnet_model(model_depth=model_depth, dropout=dropout).to(device)
        
        criterion = nn.CrossEntropyLoss()
        if optimizer_type == 'Adam':
            optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
        elif optimizer_type == 'SGD':
            optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay, momentum=0.9)
        elif optimizer_type == 'RMSprop':
            optimizer = optim.RMSprop(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
        else:
            raise ValueError("Unsupported optimizer type")
        
        scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=gamma_rate) if gamma else None
        
        logs = []
        for epoch in range(num_epochs):
            model.train()
            running_loss, correct, total = 0.0, 0, 0
            
            for inputs, labels in train_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                
                running_loss += loss.item() * inputs.size(0)
                _, preds = torch.max(outputs, 1)
                correct += (preds == labels).sum().item()
                total += labels.size(0)
            
            train_loss = running_loss / total
            train_acc = 100.0 * correct / total
            val_loss, val_acc = evaluate_model(model, val_loader, criterion, device)
            if scheduler:
                scheduler.step()
            
            logs.append({
                'epoch': epoch + 1,
                'train_loss': train_loss,
                'train_acc': train_acc,
                'val_loss': val_loss,
                'val_acc': val_acc
            })
        
        test_loss, test_acc = evaluate_model(model, test_loader, criterion, device)
        
        # Save results
        result_dir = os.path.join(output_dir, f"model_{model_depth}_batch_{batch_size}_lr_{learning_rate}_wd_{weight_decay}_dropout_{dropout}_gamma_{gamma}_gammaRate_{gamma_rate}_epochs_{num_epochs}_opt_{optimizer_type}")
        os.makedirs(result_dir, exist_ok=True)
        
        torch.save(model.state_dict(), os.path.join(result_dir, "model_weights.pth"))
        with open(os.path.join(result_dir, "logs.json"), 'w') as f:
            json.dump(logs, f, indent=4)
        with open(os.path.join(result_dir, "params.json"), 'w') as f:
            json.dump(params, f, indent=4)
        
        print(f"Finished training: {params}, Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%")

hyperparams = {
    'model_depth': [18, 34],
    'batch_size': [16, 32],
    'learning_rate': [0.001, 0.0001],
    'weight_decay': [0.0001, 0.001],
    'dropout': [0.2, 0.5],
    'gamma': [True, False],
    'gamma_rate': [0.95, 0.99],
    'num_epochs': [10, 20],
    'optimizer': ['Adam', 'SGD', 'RMSprop']
}

output_directory = "./grid_search_results"

datasets = (train_dataset, val_dataset, test_dataset)

hyperparameter_search(output_directory, hyperparams, *datasets)
