In [6]:
import os
import torch
import numpy as np
from torch import nn, optim
from torchvision import datasets, models
from torch.utils.data import DataLoader, Subset
from tqdm import tqdm
import random
import pandas as pd

device = torch.device("cuda")

# Function to calculate accuracy
def binary_accuracy(preds, y):
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y).float()
    acc = correct.sum() / len(correct)
    return acc

# Function to get the best epoch and accuracy from the nested list
def best_cv_val(nested_list):
    array = np.array(nested_list)
    means = np.mean(array, axis=0)
    best_epoch = np.argmax(means)
    return best_epoch + 1, means[best_epoch]

# ... [rest of your unchanged setup, like the RandomGaussianBlur class, etc.] ...

os.chdir('/home/kdoherty/spurge/data_release')
train_dir = './data/crop_39/train'

data_transforms = transforms.Compose([
    transforms.ToTensor(), 
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

lrs = [0.001, 0.0005, 0.0001, 0.00005, 0.00001]
all_lr_results = []

for lr in lrs:
    batch_size = 32
    n_epochs = 50
    full_dataset = datasets.ImageFolder(train_dir, transform=data_transforms)
    seeds = range(8)
    
    seed_epoch_accs = []

    for seed in seeds:
        torch.manual_seed(seed)
        np.random.seed(seed)
        random.seed(seed)
    
        dataset_size = len(full_dataset)
        indices = list(range(dataset_size))
        random.shuffle(indices)
        train_indices = indices[:128]
        val_indices = indices[128:256]
    
        train_subset = Subset(full_dataset, train_indices)
        val_subset = Subset(full_dataset, val_indices)
    
        train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_subset, batch_size=batch_size)
    
        model = models.resnet50(pretrained=True)
        num_ftrs = model.fc.in_features
        model.fc = nn.Sequential(nn.Linear(num_ftrs, 1))
    
        model = model.to(device)
        criterion = nn.BCEWithLogitsLoss().to(device)
        optimizer = optim.AdamW(model.parameters(), lr=lr)
    
        epoch_accs = []

        with tqdm(total=n_epochs*len(train_loader), unit="batch", desc=f"LR: {lr} Seed: {seed}") as pbar:
            for epoch in range(n_epochs):
                model.train()
                running_loss = 0
    
                for images, labels in train_loader:
                    images, labels = images.to(device), labels.to(device)
                    optimizer.zero_grad()
                    output = model(images)
                    loss = criterion(output.squeeze(), labels.float())
                    loss.backward()
                    optimizer.step()
                    running_loss += loss.item()
                    pbar.update(1)
    
                train_loss = running_loss/len(train_loader)
    
                # Validate the model
                model.eval()
                running_loss = 0
                running_acc = 0
    
                for images, labels in val_loader:
                    images, labels = images.to(device), labels.to(device)
                    output = model(images)
                    acc = binary_accuracy(output.squeeze(), labels.float())
                    running_loss += loss.item()
                    running_acc += acc.item()
    
                val_loss = running_loss/len(val_loader)
                val_acc = running_acc/len(val_loader)
                epoch_accs.append(val_acc)
                
                pbar.set_postfix({'Learning rate': lr,
                                  'Seed':seed,
                                  'Epoch': epoch+1, 
                                  'Validation Loss': f'{val_loss:.3f}', 
                                  'Validation Accuracy': f'{val_acc:.3f}'})
                
        seed_epoch_accs.append(epoch_accs)
    
    best_epoch, best_accuracy = best_cv_val(seed_epoch_accs)
    print(f'lr: {lr}, Best epoch: {best_epoch}, Accuracy {best_accuracy}')
    all_lr_results.append({
        'lr': lr,
        'best_epoch': best_epoch,
        'accuracy': best_accuracy
    })

# Create a DataFrame and save as CSV
df = pd.DataFrame(all_lr_results)
df.to_csv('./results/best_lr.csv', index=False)

print("Results saved to './results/best_lr.csv'")


LR: 0.001 Seed: 0: 100%|████████████████████████████████████████████████████████████| 40/40 [00:04<00:00,  9.75batch/s, Learning rate=0.001, Seed=0, Epoch=10, Validation Loss=0.392, Validation Accuracy=0.602]
LR: 0.001 Seed: 1: 100%|████████████████████████████████████████████████████████████| 40/40 [00:04<00:00,  9.98batch/s, Learning rate=0.001, Seed=1, Epoch=10, Validation Loss=0.104, Validation Accuracy=0.703]
LR: 0.001 Seed: 2: 100%|████████████████████████████████████████████████████████████| 40/40 [00:04<00:00,  9.99batch/s, Learning rate=0.001, Seed=2, Epoch=10, Validation Loss=0.052, Validation Accuracy=0.734]
LR: 0.001 Seed: 3: 100%|████████████████████████████████████████████████████████████| 40/40 [00:04<00:00,  9.98batch/s, Learning rate=0.001, Seed=3, Epoch=10, Validation Loss=0.014, Validation Accuracy=0.711]
LR: 0.001 Seed: 4: 100%|████████████████████████████████████████████████████████████| 40/40 [00:04<00:00,  9.85batch/s, Learning rate=0.001, Seed=4, Epoch=10, Valid

lr: 0.001, Best epoch: 8, Accuracy 0.7236328125


LR: 0.0005 Seed: 0: 100%|██████████████████████████████████████████████████████████| 40/40 [00:04<00:00,  9.48batch/s, Learning rate=0.0005, Seed=0, Epoch=10, Validation Loss=0.068, Validation Accuracy=0.742]
LR: 0.0005 Seed: 1: 100%|██████████████████████████████████████████████████████████| 40/40 [00:04<00:00,  9.65batch/s, Learning rate=0.0005, Seed=1, Epoch=10, Validation Loss=0.050, Validation Accuracy=0.672]
LR: 0.0005 Seed: 2: 100%|██████████████████████████████████████████████████████████| 40/40 [00:04<00:00,  9.90batch/s, Learning rate=0.0005, Seed=2, Epoch=10, Validation Loss=0.148, Validation Accuracy=0.531]
LR: 0.0005 Seed: 3: 100%|██████████████████████████████████████████████████████████| 40/40 [00:04<00:00,  9.91batch/s, Learning rate=0.0005, Seed=3, Epoch=10, Validation Loss=0.008, Validation Accuracy=0.648]
LR: 0.0005 Seed: 4: 100%|██████████████████████████████████████████████████████████| 40/40 [00:04<00:00,  9.94batch/s, Learning rate=0.0005, Seed=4, Epoch=10, Valid

lr: 0.0005, Best epoch: 6, Accuracy 0.703125


LR: 0.0001 Seed: 0: 100%|██████████████████████████████████████████████████████████| 40/40 [00:04<00:00,  9.75batch/s, Learning rate=0.0001, Seed=0, Epoch=10, Validation Loss=0.006, Validation Accuracy=0.703]
LR: 0.0001 Seed: 1: 100%|██████████████████████████████████████████████████████████| 40/40 [00:04<00:00,  9.89batch/s, Learning rate=0.0001, Seed=1, Epoch=10, Validation Loss=0.010, Validation Accuracy=0.719]
LR: 0.0001 Seed: 2: 100%|██████████████████████████████████████████████████████████| 40/40 [00:04<00:00,  9.90batch/s, Learning rate=0.0001, Seed=2, Epoch=10, Validation Loss=0.008, Validation Accuracy=0.719]
LR: 0.0001 Seed: 3: 100%|██████████████████████████████████████████████████████████| 40/40 [00:04<00:00,  9.90batch/s, Learning rate=0.0001, Seed=3, Epoch=10, Validation Loss=0.004, Validation Accuracy=0.664]
LR: 0.0001 Seed: 4: 100%|██████████████████████████████████████████████████████████| 40/40 [00:04<00:00,  9.64batch/s, Learning rate=0.0001, Seed=4, Epoch=10, Valid

lr: 0.0001, Best epoch: 10, Accuracy 0.703125


LR: 5e-05 Seed: 0: 100%|█████████████████████████████████████████████████████████████| 40/40 [00:03<00:00, 10.05batch/s, Learning rate=5e-5, Seed=0, Epoch=10, Validation Loss=0.016, Validation Accuracy=0.633]
LR: 5e-05 Seed: 1: 100%|█████████████████████████████████████████████████████████████| 40/40 [00:04<00:00,  9.92batch/s, Learning rate=5e-5, Seed=1, Epoch=10, Validation Loss=0.024, Validation Accuracy=0.656]
LR: 5e-05 Seed: 2: 100%|█████████████████████████████████████████████████████████████| 40/40 [00:04<00:00,  9.96batch/s, Learning rate=5e-5, Seed=2, Epoch=10, Validation Loss=0.025, Validation Accuracy=0.703]
LR: 5e-05 Seed: 3: 100%|█████████████████████████████████████████████████████████████| 40/40 [00:04<00:00,  9.90batch/s, Learning rate=5e-5, Seed=3, Epoch=10, Validation Loss=0.030, Validation Accuracy=0.688]
LR: 5e-05 Seed: 4: 100%|█████████████████████████████████████████████████████████████| 40/40 [00:04<00:00,  9.95batch/s, Learning rate=5e-5, Seed=4, Epoch=10, Valid

lr: 5e-05, Best epoch: 10, Accuracy 0.6845703125


LR: 1e-05 Seed: 0: 100%|█████████████████████████████████████████████████████████████| 40/40 [00:04<00:00,  9.96batch/s, Learning rate=1e-5, Seed=0, Epoch=10, Validation Loss=0.299, Validation Accuracy=0.609]
LR: 1e-05 Seed: 1: 100%|█████████████████████████████████████████████████████████████| 40/40 [00:04<00:00,  9.56batch/s, Learning rate=1e-5, Seed=1, Epoch=10, Validation Loss=0.326, Validation Accuracy=0.625]
LR: 1e-05 Seed: 2: 100%|█████████████████████████████████████████████████████████████| 40/40 [00:04<00:00,  9.12batch/s, Learning rate=1e-5, Seed=2, Epoch=10, Validation Loss=0.325, Validation Accuracy=0.562]
LR: 1e-05 Seed: 3: 100%|█████████████████████████████████████████████████████████████| 40/40 [00:04<00:00,  9.89batch/s, Learning rate=1e-5, Seed=3, Epoch=10, Validation Loss=0.315, Validation Accuracy=0.648]
LR: 1e-05 Seed: 4: 100%|█████████████████████████████████████████████████████████████| 40/40 [00:04<00:00,  9.96batch/s, Learning rate=1e-5, Seed=4, Epoch=10, Valid

lr: 1e-05, Best epoch: 10, Accuracy 0.5966796875
Results saved to './results/best_lr.csv'


In [9]:
df = pd.read_csv('./results/best_lr.csv')
best_row = df.loc[df['accuracy'].idxmax()]
best_lr = best_row['lr']
best_lr

0.001