In [None]:
!pip install wandb -Uq

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms
import wandb
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
import time

In [2]:
import wandb
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33msimeonbetapudi[0m ([33msimeonbetapudi-belmont-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [3]:
# check that there's more than just the CPU available
device = torch.device('cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu')
print(f"Using device: {device}")
if device==torch.device('cpu'): print("You should probably restart this with a GPU. It will be slow otherwise.")

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)

Using device: cuda


In [4]:
# Available datasets with their normalization parameters
DATASETS = {
    'MNIST': {
        'dataset_class': torchvision.datasets.MNIST,
        'mean': (0.1307,), 'std': (0.3081,),
        'description': '28x28 grayscale handwritten digits (0-9)'
    },
    'FashionMNIST': {
        'dataset_class': torchvision.datasets.FashionMNIST,
        'mean': (0.2860,), 'std': (0.3530,),
        'description': '28x28 grayscale fashion items (10 classes: shirt, shoe, etc.)'
    },
    'CIFAR10': {
        'dataset_class': torchvision.datasets.CIFAR10,
        'mean': (0.4914, 0.4822, 0.4465), 'std': (0.2023, 0.1994, 0.2010),
        'description': '32x32 color images (10 classes: planes, cars, etc.)'
    },
    'CIFAR100': {
        'dataset_class': torchvision.datasets.CIFAR100,
        'mean': (0.5071, 0.4867, 0.4408), 'std': (0.2675, 0.2565, 0.2761),
        'description': '32x32 color images (100 classes)'
    }
}

    #### YOUR CHOICES for dataset & activation functions
DATASET_NAME = 'CIFAR10'  # Choose between 'MNIST', 'FashionMNIST', 'CIFAR10', or 'CIFAR100'
BATCH_SIZE = 256 # could make this bigger

In [5]:
def get_dataset_info(dataset_name):
    """Get dataset configuration and sample to determine properties."""
    config = DATASETS[dataset_name]
    basic_transform = transforms.Compose([transforms.ToTensor()])
    temp_dataset = config['dataset_class'](root='./data', train=True, download=True, transform=basic_transform)
    sample_img, _ = temp_dataset[0]

    return {
        'config': config,
        'num_channels': sample_img.shape[0],
        'img_height': sample_img.shape[1],
        'img_width': sample_img.shape[2],
        'num_classes': len(temp_dataset.classes),
        'class_names': temp_dataset.classes
    }

dataset_info = get_dataset_info(DATASET_NAME)

print(f"\nDataset Properties:")
print(f"  Image size: {dataset_info['num_channels']}x{dataset_info['img_height']}x{dataset_info['img_width']}")
print(f"  Number of classes: {dataset_info['num_classes']}")
print(f"  Classes: {dataset_info['class_names'][:10]}...")  # Show first 10 classes

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(dataset_info['config']['mean'], dataset_info['config']['std'])
])

train_dataset = dataset_info['config']['dataset_class'](root='./data', train=True, download=False, transform=transform)
test_dataset = dataset_info['config']['dataset_class'](root='./data', train=False, download=False, transform=transform)


Dataset Properties:
  Image size: 3x32x32
  Number of classes: 10
  Classes: ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']...


In [6]:
class CNN(nn.Module):
    def __init__(self, dataset_info, activation=torch.nn.ReLU(), base_channels=32, channel_mult=2, n_conv_layers=4, kernel_size=3, stride=2):
        super(CNN, self).__init__()

        # Build conv layers dynamically
        self.conv_layers = nn.ModuleList()
        in_channels = dataset_info['num_channels']
        for i in range(n_conv_layers):
            out_channels = base_channels * (channel_mult ** i)
            self.conv_layers.append(nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding=1))
            in_channels = out_channels

        self.activation = activation
        self.dropout1 = nn.Dropout(0.25)
        self.global_avg_pool = nn.AdaptiveAvgPool2d(1) # put near end: yields one value per channel

        # Final channels after all conv layers
        final_channels = base_channels * (channel_mult ** (n_conv_layers - 1))
        self.fc = nn.Linear(final_channels, dataset_info['num_classes'])

    def forward(self, x):
        for conv_layer in self.conv_layers:
            x = self.activation(conv_layer(x))
            x = self.dropout1(x)
        x = self.global_avg_pool(x)  # one value per channel
        return self.fc(  x.flatten(start_dim=1) )  # flatten and run through linear layer



cnn_model = CNN(dataset_info).to(device)
print(f"CNN Model Parameters: {sum(p.numel() for p in cnn_model.parameters()):,}")

# quick test to make sure the forward() runs w/o errors:
rnd_batch = torch.randn([128,dataset_info['num_channels'], dataset_info['img_height'], dataset_info['img_width']]).to(device)
result = cnn_model(rnd_batch)
del rnd_batch

CNN Model Parameters: 390,986


In [7]:
def train_model(model, train_loader, test_loader, epochs=10, lr=0.001):
    # Initialize wandb

    # Loss function:
    # Note: targets are just class indices (0-9), not one-hot vectors
    # nn.CrossEntropyLoss handles one-hot encoding internally for efficiency
    criterion = nn.CrossEntropyLoss()   # used for categorical variables, expects raw "logits"

    optimizer = optim.Adam(model.parameters(), lr=lr)

    train_losses = []
    train_accuracies = []
    test_accuracies = []

    for epoch in range(epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0

        # Use tqdm for progress bar
        pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{epochs}')

        for batch_idx, (data, target) in enumerate(pbar):
            data, target = data.to(device), target.to(device)

            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            pred = output.argmax(dim=1, keepdim=True)
            correct_train += pred.eq(target.view_as(pred)).sum().item()
            total_train += target.size(0)

            # Update progress bar
            pbar.set_postfix({
                'Loss': f'{loss.item():.4f}',
                'Acc': f'{100.*correct_train/total_train:.2f}%'
            })

        # Calculate epoch metrics
        epoch_loss = running_loss / len(train_loader)
        train_acc = 100. * correct_train / total_train

        # Evaluation phase
        model.eval()
        test_loss = 0
        correct_test = 0
        total_test = 0

        with torch.no_grad():
            for data, target in test_loader:
                data, target = data.to(device), target.to(device)
                output = model(data)
                test_loss += criterion(output, target).item()
                pred = output.argmax(dim=1, keepdim=True)
                correct_test += pred.eq(target.view_as(pred)).sum().item()
                total_test += target.size(0)

        test_acc = 100. * correct_test / total_test

        # Store metrics
        train_losses.append(epoch_loss)
        train_accuracies.append(train_acc)
        test_accuracies.append(test_acc)

        # Log to wandb
        wandb.log({
            "epoch": epoch + 1,
            "train_loss": epoch_loss,
            "train_accuracy": train_acc,
            "test_accuracy": test_acc
        })

        print(f'Epoch {epoch+1}: Train Loss: {epoch_loss:.4f}, Train Acc: {train_acc:.2f}%, Test Acc: {test_acc:.2f}%')

    wandb.finish()

    return {
        'train_losses': train_losses,
        'train_accuracies': train_accuracies,
        'test_accuracies': test_accuracies,
        'final_test_acc': test_acc
    }

In [8]:
# Expanded sweep config
sweep_config = {
    'method': 'bayes',   # options: grid, random, bayes
    'metric': {
        'name': 'test_accuracy',
        'goal': 'maximize'
    },
    'parameters': {
        # Learning rate search space
        # Activation function
        'activation': {
            'values': ['ReLU', 'LeakyReLU', 'GELU', 'SiLU']
        },

        'base_channels': {
            'values': [16, 32, 64, 128]
        },

        'channel_mult': {
            'values': [1, 2, 4]
        },

        'n_conv_layers': {
            'values': [3, 4, 5]
        },

        'kernel_size': {
            'values': [1, 3, 5, (3,5), (5,3)]
        },

        'stride': {
            'values': [1, 2, 3]
        }

    }
}

# Define available activation functions
activations = {
    "ReLU": torch.nn.ReLU(),
    "LeakyReLU": torch.nn.LeakyReLU(),
    "GELU": torch.nn.GELU(),
    "SiLU": torch.nn.SiLU()
}

In [None]:
# Sweep-compatible training function
def sweep_train():

    wandb.init(project=f"convnet_{DATASET_NAME}_sweep", name=f"Idk Where this shows", reinit=True)
    config = wandb.config

    # Pick activation
    activation = activations[config.activation]

    # Build model
    model = CNN(dataset_info,
                activation=activation,
                base_channels=config.base_channels,
                channel_mult=config.channel_mult,
                n_conv_layers=config.n_conv_layers,
                kernel_size=config.kernel_size,
                stride=config.stride).to(device)


    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

    # Train model
    results = train_model(model,
                          train_loader,
                          test_loader)

    return results


# Initialize and launch sweep
sweep_id = wandb.sweep(sweep_config, project="convnet_CIFAR10_sweep")
wandb.agent(sweep_id, function=sweep_train, count=10)  # run 10 experiments

Create sweep with ID: klsyko7c
Sweep URL: https://wandb.ai/simeonbetapudi-belmont-university/convnet_CIFAR10_sweep/sweeps/klsyko7c


[34m[1mwandb[0m: Agent Starting Run: z686m9d9 with config:
[34m[1mwandb[0m: 	activation: GELU
[34m[1mwandb[0m: 	base_channels: 16
[34m[1mwandb[0m: 	channel_mult: 1
[34m[1mwandb[0m: 	kernel_size: 1
[34m[1mwandb[0m: 	n_conv_layers: 3
[34m[1mwandb[0m: 	stride: 3




Epoch 1/10: 100%|██████████| 196/196 [00:12<00:00, 16.26it/s, Loss=2.3036, Acc=10.69%]


Epoch 1: Train Loss: 2.3037, Train Acc: 10.69%, Test Acc: 14.13%


Epoch 2/10: 100%|██████████| 196/196 [00:11<00:00, 16.83it/s, Loss=2.1763, Acc=15.50%]


Epoch 2: Train Loss: 2.2479, Train Acc: 15.50%, Test Acc: 18.04%


Epoch 3/10: 100%|██████████| 196/196 [00:11<00:00, 16.76it/s, Loss=2.1413, Acc=17.55%]


Epoch 3: Train Loss: 2.1969, Train Acc: 17.55%, Test Acc: 19.01%


Epoch 4/10: 100%|██████████| 196/196 [00:11<00:00, 16.83it/s, Loss=2.2437, Acc=17.95%]


Epoch 4: Train Loss: 2.1852, Train Acc: 17.95%, Test Acc: 19.47%


Epoch 5/10: 100%|██████████| 196/196 [00:11<00:00, 16.74it/s, Loss=2.1229, Acc=18.08%]


Epoch 5: Train Loss: 2.1776, Train Acc: 18.08%, Test Acc: 19.42%


Epoch 6/10: 100%|██████████| 196/196 [00:11<00:00, 16.86it/s, Loss=2.1690, Acc=18.24%]


Epoch 6: Train Loss: 2.1703, Train Acc: 18.24%, Test Acc: 19.82%


Epoch 7/10: 100%|██████████| 196/196 [00:11<00:00, 16.82it/s, Loss=2.1459, Acc=18.51%]


Epoch 7: Train Loss: 2.1655, Train Acc: 18.51%, Test Acc: 19.65%


Epoch 8/10: 100%|██████████| 196/196 [00:11<00:00, 16.77it/s, Loss=2.0533, Acc=18.49%]


Epoch 8: Train Loss: 2.1587, Train Acc: 18.49%, Test Acc: 19.94%


Epoch 9/10: 100%|██████████| 196/196 [00:11<00:00, 16.79it/s, Loss=2.1565, Acc=18.77%]


Epoch 9: Train Loss: 2.1531, Train Acc: 18.77%, Test Acc: 20.06%


Epoch 10/10: 100%|██████████| 196/196 [00:11<00:00, 16.87it/s, Loss=2.2058, Acc=19.16%]


Epoch 10: Train Loss: 2.1510, Train Acc: 19.16%, Test Acc: 20.34%


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁▅▇▇▇▇▇███
train_accuracy,▁▅▇▇▇▇▇▇██
train_loss,█▅▃▃▂▂▂▁▁▁

0,1
epoch,10.0
test_accuracy,20.34
train_accuracy,19.16
train_loss,2.15102


[34m[1mwandb[0m: Agent Starting Run: snfitu8t with config:
[34m[1mwandb[0m: 	activation: SiLU
[34m[1mwandb[0m: 	base_channels: 128
[34m[1mwandb[0m: 	channel_mult: 2
[34m[1mwandb[0m: 	kernel_size: [3, 5]
[34m[1mwandb[0m: 	n_conv_layers: 3
[34m[1mwandb[0m: 	stride: 3


Epoch 1/10: 100%|██████████| 196/196 [00:11<00:00, 16.58it/s, Loss=1.2446, Acc=45.48%]


Epoch 1: Train Loss: 1.5186, Train Acc: 45.48%, Test Acc: 54.22%


Epoch 2/10: 100%|██████████| 196/196 [00:11<00:00, 16.67it/s, Loss=1.0306, Acc=57.58%]


Epoch 2: Train Loss: 1.1904, Train Acc: 57.58%, Test Acc: 60.17%


Epoch 3/10: 100%|██████████| 196/196 [00:11<00:00, 16.72it/s, Loss=1.1087, Acc=63.96%]


Epoch 3: Train Loss: 1.0186, Train Acc: 63.96%, Test Acc: 63.15%


Epoch 4/10: 100%|██████████| 196/196 [00:11<00:00, 16.64it/s, Loss=0.7585, Acc=68.74%]


Epoch 4: Train Loss: 0.8840, Train Acc: 68.74%, Test Acc: 64.23%


Epoch 5/10: 100%|██████████| 196/196 [00:11<00:00, 16.78it/s, Loss=0.9738, Acc=72.98%]


Epoch 5: Train Loss: 0.7629, Train Acc: 72.98%, Test Acc: 65.53%


Epoch 6/10: 100%|██████████| 196/196 [00:11<00:00, 16.69it/s, Loss=0.6764, Acc=77.06%]


Epoch 6: Train Loss: 0.6512, Train Acc: 77.06%, Test Acc: 65.57%


Epoch 7/10: 100%|██████████| 196/196 [00:11<00:00, 16.61it/s, Loss=0.6262, Acc=80.10%]


Epoch 7: Train Loss: 0.5607, Train Acc: 80.10%, Test Acc: 66.29%


Epoch 8/10: 100%|██████████| 196/196 [00:11<00:00, 16.63it/s, Loss=0.5539, Acc=82.98%]


Epoch 8: Train Loss: 0.4798, Train Acc: 82.98%, Test Acc: 66.66%


Epoch 9/10: 100%|██████████| 196/196 [00:11<00:00, 16.74it/s, Loss=0.4468, Acc=85.08%]


Epoch 9: Train Loss: 0.4185, Train Acc: 85.08%, Test Acc: 66.65%


Epoch 10/10: 100%|██████████| 196/196 [00:11<00:00, 16.66it/s, Loss=0.2304, Acc=86.97%]


Epoch 10: Train Loss: 0.3652, Train Acc: 86.97%, Test Acc: 66.31%


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁▄▆▇▇▇████
train_accuracy,▁▃▄▅▆▆▇▇██
train_loss,█▆▅▄▃▃▂▂▁▁

0,1
epoch,10.0
test_accuracy,66.31
train_accuracy,86.972
train_loss,0.36519


[34m[1mwandb[0m: Agent Starting Run: 72qksw0r with config:
[34m[1mwandb[0m: 	activation: SiLU
[34m[1mwandb[0m: 	base_channels: 64
[34m[1mwandb[0m: 	channel_mult: 4
[34m[1mwandb[0m: 	kernel_size: [3, 5]
[34m[1mwandb[0m: 	n_conv_layers: 3
[34m[1mwandb[0m: 	stride: 2


Epoch 1/10: 100%|██████████| 196/196 [00:12<00:00, 16.19it/s, Loss=1.2930, Acc=40.10%]


Epoch 1: Train Loss: 1.6359, Train Acc: 40.10%, Test Acc: 48.23%


Epoch 2/10: 100%|██████████| 196/196 [00:12<00:00, 16.30it/s, Loss=1.2920, Acc=53.07%]


Epoch 2: Train Loss: 1.3031, Train Acc: 53.07%, Test Acc: 57.27%


Epoch 3/10: 100%|██████████| 196/196 [00:11<00:00, 16.38it/s, Loss=1.1605, Acc=61.01%]


Epoch 3: Train Loss: 1.1053, Train Acc: 61.01%, Test Acc: 62.81%


Epoch 4/10: 100%|██████████| 196/196 [00:11<00:00, 16.34it/s, Loss=0.8251, Acc=66.48%]


Epoch 4: Train Loss: 0.9497, Train Acc: 66.48%, Test Acc: 65.62%


Epoch 5/10: 100%|██████████| 196/196 [00:11<00:00, 16.40it/s, Loss=0.8296, Acc=71.75%]


Epoch 5: Train Loss: 0.8055, Train Acc: 71.75%, Test Acc: 67.80%


Epoch 6/10: 100%|██████████| 196/196 [00:11<00:00, 16.37it/s, Loss=0.5983, Acc=76.51%]


Epoch 6: Train Loss: 0.6722, Train Acc: 76.51%, Test Acc: 68.43%


Epoch 7/10: 100%|██████████| 196/196 [00:11<00:00, 16.42it/s, Loss=0.5848, Acc=81.10%]


Epoch 7: Train Loss: 0.5504, Train Acc: 81.10%, Test Acc: 69.45%


Epoch 8/10: 100%|██████████| 196/196 [00:11<00:00, 16.37it/s, Loss=0.4923, Acc=84.68%]


Epoch 8: Train Loss: 0.4428, Train Acc: 84.68%, Test Acc: 69.91%


Epoch 9/10: 100%|██████████| 196/196 [00:11<00:00, 16.36it/s, Loss=0.3800, Acc=87.87%]


Epoch 9: Train Loss: 0.3544, Train Acc: 87.87%, Test Acc: 70.31%


Epoch 10/10: 100%|██████████| 196/196 [00:12<00:00, 16.32it/s, Loss=0.3526, Acc=90.63%]


Epoch 10: Train Loss: 0.2781, Train Acc: 90.63%, Test Acc: 70.18%


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁▄▆▇▇▇████
train_accuracy,▁▃▄▅▅▆▇▇██
train_loss,█▆▅▄▄▃▂▂▁▁

0,1
epoch,10.0
test_accuracy,70.18
train_accuracy,90.626
train_loss,0.27815


[34m[1mwandb[0m: Agent Starting Run: h01owc2o with config:
[34m[1mwandb[0m: 	activation: SiLU
[34m[1mwandb[0m: 	base_channels: 128
[34m[1mwandb[0m: 	channel_mult: 4
[34m[1mwandb[0m: 	kernel_size: [3, 5]
[34m[1mwandb[0m: 	n_conv_layers: 3
[34m[1mwandb[0m: 	stride: 3


Epoch 1/10: 100%|██████████| 196/196 [00:12<00:00, 16.13it/s, Loss=1.6565, Acc=45.88%]


Epoch 1: Train Loss: 1.5083, Train Acc: 45.88%, Test Acc: 53.55%


Epoch 2/10: 100%|██████████| 196/196 [00:12<00:00, 16.26it/s, Loss=1.0060, Acc=58.21%]


Epoch 2: Train Loss: 1.1663, Train Acc: 58.21%, Test Acc: 60.41%


Epoch 3/10: 100%|██████████| 196/196 [00:12<00:00, 16.25it/s, Loss=0.9899, Acc=66.18%]


Epoch 3: Train Loss: 0.9517, Train Acc: 66.18%, Test Acc: 62.82%


Epoch 4/10: 100%|██████████| 196/196 [00:12<00:00, 16.25it/s, Loss=0.8496, Acc=74.04%]


Epoch 4: Train Loss: 0.7332, Train Acc: 74.04%, Test Acc: 65.48%


Epoch 5/10: 100%|██████████| 196/196 [00:11<00:00, 16.37it/s, Loss=0.5069, Acc=82.32%]


Epoch 5: Train Loss: 0.5068, Train Acc: 82.32%, Test Acc: 65.54%


Epoch 6/10: 100%|██████████| 196/196 [00:12<00:00, 16.19it/s, Loss=0.4334, Acc=88.44%]


Epoch 6: Train Loss: 0.3326, Train Acc: 88.44%, Test Acc: 65.55%


Epoch 7/10: 100%|██████████| 196/196 [00:12<00:00, 16.21it/s, Loss=0.3722, Acc=92.16%]


Epoch 7: Train Loss: 0.2234, Train Acc: 92.16%, Test Acc: 65.65%


Epoch 8/10: 100%|██████████| 196/196 [00:12<00:00, 16.26it/s, Loss=0.1615, Acc=94.48%]


Epoch 8: Train Loss: 0.1626, Train Acc: 94.48%, Test Acc: 65.84%


Epoch 9/10: 100%|██████████| 196/196 [00:12<00:00, 16.22it/s, Loss=0.0633, Acc=95.50%]


Epoch 9: Train Loss: 0.1288, Train Acc: 95.50%, Test Acc: 66.07%


Epoch 10/10: 100%|██████████| 196/196 [00:12<00:00, 16.23it/s, Loss=0.1162, Acc=96.14%]


Epoch 10: Train Loss: 0.1132, Train Acc: 96.14%, Test Acc: 66.27%


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁▅▆███████
train_accuracy,▁▃▄▅▆▇▇███
train_loss,█▆▅▄▃▂▂▁▁▁

0,1
epoch,10.0
test_accuracy,66.27
train_accuracy,96.144
train_loss,0.11318


[34m[1mwandb[0m: Agent Starting Run: v6i5n8ic with config:
[34m[1mwandb[0m: 	activation: SiLU
[34m[1mwandb[0m: 	base_channels: 128
[34m[1mwandb[0m: 	channel_mult: 4
[34m[1mwandb[0m: 	kernel_size: [5, 3]
[34m[1mwandb[0m: 	n_conv_layers: 3
[34m[1mwandb[0m: 	stride: 1


Epoch 1/10: 100%|██████████| 196/196 [00:50<00:00,  3.89it/s, Loss=1.3473, Acc=39.39%]


Epoch 1: Train Loss: 1.6556, Train Acc: 39.39%, Test Acc: 47.27%


Epoch 2/10: 100%|██████████| 196/196 [00:50<00:00,  3.88it/s, Loss=1.2447, Acc=53.77%]


Epoch 2: Train Loss: 1.3000, Train Acc: 53.77%, Test Acc: 56.57%


Epoch 3/10: 100%|██████████| 196/196 [00:50<00:00,  3.88it/s, Loss=1.1316, Acc=60.76%]


Epoch 3: Train Loss: 1.1161, Train Acc: 60.76%, Test Acc: 64.30%


Epoch 4/10: 100%|██████████| 196/196 [00:50<00:00,  3.88it/s, Loss=1.1797, Acc=65.52%]


Epoch 4: Train Loss: 0.9876, Train Acc: 65.52%, Test Acc: 66.43%


Epoch 5/10: 100%|██████████| 196/196 [00:50<00:00,  3.87it/s, Loss=0.8937, Acc=69.33%]


Epoch 5: Train Loss: 0.8818, Train Acc: 69.33%, Test Acc: 67.36%


Epoch 6/10: 100%|██████████| 196/196 [00:50<00:00,  3.88it/s, Loss=0.6968, Acc=72.18%]


Epoch 6: Train Loss: 0.8018, Train Acc: 72.18%, Test Acc: 73.59%


Epoch 7/10: 100%|██████████| 196/196 [00:50<00:00,  3.88it/s, Loss=0.9175, Acc=74.62%]


Epoch 7: Train Loss: 0.7359, Train Acc: 74.62%, Test Acc: 74.22%


Epoch 8/10: 100%|██████████| 196/196 [00:50<00:00,  3.88it/s, Loss=0.7468, Acc=76.91%]


Epoch 8: Train Loss: 0.6687, Train Acc: 76.91%, Test Acc: 75.30%


Epoch 9/10: 100%|██████████| 196/196 [00:50<00:00,  3.88it/s, Loss=0.6464, Acc=78.95%]


Epoch 9: Train Loss: 0.6092, Train Acc: 78.95%, Test Acc: 75.12%


Epoch 10/10: 100%|██████████| 196/196 [00:50<00:00,  3.89it/s, Loss=0.4386, Acc=81.05%]


Epoch 10: Train Loss: 0.5519, Train Acc: 81.05%, Test Acc: 76.20%


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁▃▅▆▆▇████
train_accuracy,▁▃▅▅▆▇▇▇██
train_loss,█▆▅▄▃▃▂▂▁▁

0,1
epoch,10.0
test_accuracy,76.2
train_accuracy,81.048
train_loss,0.55189


[34m[1mwandb[0m: Agent Starting Run: lphsjk93 with config:
[34m[1mwandb[0m: 	activation: SiLU
[34m[1mwandb[0m: 	base_channels: 128
[34m[1mwandb[0m: 	channel_mult: 4
[34m[1mwandb[0m: 	kernel_size: [5, 3]
[34m[1mwandb[0m: 	n_conv_layers: 5
[34m[1mwandb[0m: 	stride: 2


Epoch 1/10:   0%|          | 0/196 [00:00<?, ?it/s]
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/wandb/agents/pyagent.py", line 297, in _run_job
    self._function()
  File "/tmp/ipython-input-1212523289.py", line 24, in sweep_train
    results = train_model(model,
              ^^^^^^^^^^^^^^^^^^
  File "/tmp/ipython-input-1911957378.py", line 29, in train_model
    output = model(data)
             ^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1784, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipython-input-1595289304.py", line 23, in forward
    x = self.activation(conv_layer(x))
                        ^^^^^^^^^^^^^
  File "/usr/local/lib/p

[34m[1mwandb[0m: [32m[41mERROR[0m Run lphsjk93 errored: Calculated padded input size per channel: (3 x 4). Kernel size: (5 x 3). Kernel size can't be greater than actual input size
[34m[1mwandb[0m: Agent Starting Run: aszwup2k with config:
[34m[1mwandb[0m: 	activation: SiLU
[34m[1mwandb[0m: 	base_channels: 128
[34m[1mwandb[0m: 	channel_mult: 4
[34m[1mwandb[0m: 	kernel_size: [5, 3]
[34m[1mwandb[0m: 	n_conv_layers: 3
[34m[1mwandb[0m: 	stride: 2


Epoch 1/10: 100%|██████████| 196/196 [00:12<00:00, 15.16it/s, Loss=1.5833, Acc=42.74%]


Epoch 1: Train Loss: 1.5765, Train Acc: 42.74%, Test Acc: 51.28%


Epoch 2/10: 100%|██████████| 196/196 [00:12<00:00, 15.12it/s, Loss=1.3074, Acc=57.15%]


Epoch 2: Train Loss: 1.2019, Train Acc: 57.15%, Test Acc: 59.34%


Epoch 3/10: 100%|██████████| 196/196 [00:12<00:00, 15.11it/s, Loss=0.9500, Acc=65.71%]


Epoch 3: Train Loss: 0.9785, Train Acc: 65.71%, Test Acc: 65.11%


Epoch 4/10: 100%|██████████| 196/196 [00:12<00:00, 15.15it/s, Loss=0.7808, Acc=73.04%]


Epoch 4: Train Loss: 0.7687, Train Acc: 73.04%, Test Acc: 66.87%


Epoch 5/10: 100%|██████████| 196/196 [00:12<00:00, 15.20it/s, Loss=0.5817, Acc=80.64%]


Epoch 5: Train Loss: 0.5568, Train Acc: 80.64%, Test Acc: 70.06%


Epoch 6/10: 100%|██████████| 196/196 [00:12<00:00, 15.12it/s, Loss=0.2371, Acc=87.96%]


Epoch 6: Train Loss: 0.3575, Train Acc: 87.96%, Test Acc: 70.46%


Epoch 7/10: 100%|██████████| 196/196 [00:12<00:00, 15.21it/s, Loss=0.2315, Acc=93.12%]


Epoch 7: Train Loss: 0.2131, Train Acc: 93.12%, Test Acc: 70.17%


Epoch 8/10: 100%|██████████| 196/196 [00:12<00:00, 15.18it/s, Loss=0.1269, Acc=95.69%]


Epoch 8: Train Loss: 0.1350, Train Acc: 95.69%, Test Acc: 70.98%


Epoch 9/10: 100%|██████████| 196/196 [00:12<00:00, 15.20it/s, Loss=0.1532, Acc=96.96%]


Epoch 9: Train Loss: 0.0963, Train Acc: 96.96%, Test Acc: 70.77%


Epoch 10/10: 100%|██████████| 196/196 [00:12<00:00, 15.15it/s, Loss=0.0584, Acc=97.59%]


Epoch 10: Train Loss: 0.0759, Train Acc: 97.59%, Test Acc: 70.45%


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁▄▆▇██████
train_accuracy,▁▃▄▅▆▇▇███
train_loss,█▆▅▄▃▂▂▁▁▁

0,1
epoch,10.0
test_accuracy,70.45
train_accuracy,97.586
train_loss,0.07593


[34m[1mwandb[0m: Agent Starting Run: 4mi6pvv7 with config:
[34m[1mwandb[0m: 	activation: SiLU
[34m[1mwandb[0m: 	base_channels: 128
[34m[1mwandb[0m: 	channel_mult: 4
[34m[1mwandb[0m: 	kernel_size: 5
[34m[1mwandb[0m: 	n_conv_layers: 3
[34m[1mwandb[0m: 	stride: 2


Epoch 1/10: 100%|██████████| 196/196 [00:13<00:00, 14.60it/s, Loss=1.3109, Acc=43.99%]


Epoch 1: Train Loss: 1.5502, Train Acc: 43.99%, Test Acc: 54.00%


Epoch 2/10: 100%|██████████| 196/196 [00:13<00:00, 14.62it/s, Loss=1.0922, Acc=60.71%]


Epoch 2: Train Loss: 1.1107, Train Acc: 60.71%, Test Acc: 63.47%


Epoch 3/10: 100%|██████████| 196/196 [00:13<00:00, 14.61it/s, Loss=0.7397, Acc=70.36%]


Epoch 3: Train Loss: 0.8454, Train Acc: 70.36%, Test Acc: 67.85%


Epoch 4/10: 100%|██████████| 196/196 [00:13<00:00, 14.62it/s, Loss=0.6481, Acc=79.98%]


Epoch 4: Train Loss: 0.5797, Train Acc: 79.98%, Test Acc: 70.33%


Epoch 5/10: 100%|██████████| 196/196 [00:13<00:00, 14.65it/s, Loss=0.4333, Acc=89.14%]


Epoch 5: Train Loss: 0.3213, Train Acc: 89.14%, Test Acc: 69.91%


Epoch 6/10: 100%|██████████| 196/196 [00:13<00:00, 14.53it/s, Loss=0.2324, Acc=94.69%]


Epoch 6: Train Loss: 0.1643, Train Acc: 94.69%, Test Acc: 70.25%


Epoch 7/10: 100%|██████████| 196/196 [00:13<00:00, 14.63it/s, Loss=0.0688, Acc=96.65%]


Epoch 7: Train Loss: 0.1036, Train Acc: 96.65%, Test Acc: 71.33%


Epoch 8/10: 100%|██████████| 196/196 [00:13<00:00, 14.57it/s, Loss=0.0437, Acc=97.75%]


Epoch 8: Train Loss: 0.0710, Train Acc: 97.75%, Test Acc: 72.05%


Epoch 9/10: 100%|██████████| 196/196 [00:13<00:00, 14.65it/s, Loss=0.0173, Acc=98.09%]


Epoch 9: Train Loss: 0.0599, Train Acc: 98.09%, Test Acc: 71.61%


Epoch 10/10: 100%|██████████| 196/196 [00:13<00:00, 14.58it/s, Loss=0.0490, Acc=98.11%]


Epoch 10: Train Loss: 0.0567, Train Acc: 98.11%, Test Acc: 71.33%


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁▅▆▇▇▇████
train_accuracy,▁▃▄▆▇█████
train_loss,█▆▅▃▂▂▁▁▁▁

0,1
epoch,10.0
test_accuracy,71.33
train_accuracy,98.106
train_loss,0.05669


[34m[1mwandb[0m: Agent Starting Run: hqbod7y6 with config:
[34m[1mwandb[0m: 	activation: SiLU
[34m[1mwandb[0m: 	base_channels: 128
[34m[1mwandb[0m: 	channel_mult: 4
[34m[1mwandb[0m: 	kernel_size: [5, 3]
[34m[1mwandb[0m: 	n_conv_layers: 3
[34m[1mwandb[0m: 	stride: 1


Epoch 1/10: 100%|██████████| 196/196 [00:50<00:00,  3.88it/s, Loss=1.3594, Acc=39.18%]


Epoch 1: Train Loss: 1.6720, Train Acc: 39.18%, Test Acc: 47.33%


Epoch 2/10: 100%|██████████| 196/196 [00:50<00:00,  3.87it/s, Loss=1.3056, Acc=52.85%]


Epoch 2: Train Loss: 1.3257, Train Acc: 52.85%, Test Acc: 56.83%


Epoch 3/10: 100%|██████████| 196/196 [00:50<00:00,  3.86it/s, Loss=0.9574, Acc=59.69%]


Epoch 3: Train Loss: 1.1492, Train Acc: 59.69%, Test Acc: 62.58%


Epoch 4/10: 100%|██████████| 196/196 [00:50<00:00,  3.87it/s, Loss=0.9637, Acc=64.84%]


Epoch 4: Train Loss: 1.0085, Train Acc: 64.84%, Test Acc: 65.92%


Epoch 5/10: 100%|██████████| 196/196 [00:50<00:00,  3.87it/s, Loss=0.9044, Acc=68.36%]


Epoch 5: Train Loss: 0.9058, Train Acc: 68.36%, Test Acc: 67.66%


Epoch 6/10: 100%|██████████| 196/196 [00:50<00:00,  3.87it/s, Loss=0.7647, Acc=71.71%]


Epoch 6: Train Loss: 0.8192, Train Acc: 71.71%, Test Acc: 70.84%


Epoch 7/10: 100%|██████████| 196/196 [00:50<00:00,  3.88it/s, Loss=0.7230, Acc=73.72%]


Epoch 7: Train Loss: 0.7560, Train Acc: 73.72%, Test Acc: 73.94%


Epoch 8/10: 100%|██████████| 196/196 [00:50<00:00,  3.88it/s, Loss=0.4720, Acc=76.35%]


Epoch 8: Train Loss: 0.6816, Train Acc: 76.35%, Test Acc: 75.37%


Epoch 9/10: 100%|██████████| 196/196 [00:50<00:00,  3.88it/s, Loss=0.5542, Acc=77.97%]


Epoch 9: Train Loss: 0.6330, Train Acc: 77.97%, Test Acc: 76.77%


Epoch 10/10: 100%|██████████| 196/196 [00:50<00:00,  3.88it/s, Loss=0.6941, Acc=80.39%]


Epoch 10: Train Loss: 0.5696, Train Acc: 80.39%, Test Acc: 75.53%


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁▃▅▅▆▇▇███
train_accuracy,▁▃▄▅▆▇▇▇██
train_loss,█▆▅▄▃▃▂▂▁▁

0,1
epoch,10.0
test_accuracy,75.53
train_accuracy,80.394
train_loss,0.56959


[34m[1mwandb[0m: Agent Starting Run: o99ji4gz with config:
[34m[1mwandb[0m: 	activation: SiLU
[34m[1mwandb[0m: 	base_channels: 128
[34m[1mwandb[0m: 	channel_mult: 4
[34m[1mwandb[0m: 	kernel_size: [5, 3]
[34m[1mwandb[0m: 	n_conv_layers: 3
[34m[1mwandb[0m: 	stride: 1


Epoch 1/10: 100%|██████████| 196/196 [00:50<00:00,  3.87it/s, Loss=1.5012, Acc=39.00%]


Epoch 1: Train Loss: 1.6563, Train Acc: 39.00%, Test Acc: 45.22%


Epoch 2/10: 100%|██████████| 196/196 [00:50<00:00,  3.87it/s, Loss=1.2005, Acc=53.02%]


Epoch 2: Train Loss: 1.3093, Train Acc: 53.02%, Test Acc: 57.31%


Epoch 3/10: 100%|██████████| 196/196 [00:50<00:00,  3.87it/s, Loss=1.1289, Acc=60.00%]


Epoch 3: Train Loss: 1.1294, Train Acc: 60.00%, Test Acc: 62.86%


Epoch 4/10: 100%|██████████| 196/196 [00:50<00:00,  3.88it/s, Loss=1.0329, Acc=65.53%]


Epoch 4: Train Loss: 0.9880, Train Acc: 65.53%, Test Acc: 65.25%


Epoch 5/10:  21%|██▏       | 42/196 [00:10<00:39,  3.89it/s, Loss=0.9106, Acc=67.37%]