<a href="https://colab.research.google.com/github/rosscampbell7/test-repo/blob/master/WandB_demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import wandb

In [10]:
# Define the neural network model
class SimpleCNN(nn.Module):
    def __init__(self, dropout_rate=0.5):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.dropout = nn.Dropout(dropout_rate)
        self.fc1 = nn.Linear(64 * 8 * 8, 512)
        self.fc2 = nn.Linear(512, 100)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(-1, 64 * 8 * 8)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x



In [14]:
def train():
    # Initialize wandb with the current run
    wandb.init()

    # Hyperparameters
    batch_size = wandb.config.batch_size
    learning_rate = wandb.config.learning_rate
    num_epochs = 1#wandb.config.epochs
    optimizer_type = wandb.config.optimizer
    momentum = wandb.config.momentum
    weight_decay = wandb.config.weight_decay
    dropout_rate = wandb.config.dropout_rate
    scheduler_type = wandb.config.scheduler

    # Check if GPU is available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Data augmentation and normalization for training
    transform = transforms.Compose(
        [transforms.RandomHorizontalFlip(),
         transforms.RandomCrop(32, padding=4),
         transforms.ToTensor(),
         transforms.Normalize((0.5071, 0.4865, 0.4409), (0.2673, 0.2564, 0.2761))])

    # Load CIFAR100 dataset
    trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

    testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
    testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)

    # Initialize the model, loss function, and optimizer
    model = SimpleCNN(dropout_rate).to(device)
    criterion = nn.CrossEntropyLoss()

    if optimizer_type == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum, weight_decay=weight_decay)
    elif optimizer_type == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    elif optimizer_type == 'RMSprop':
        optimizer = optim.RMSprop(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    else:
        raise ValueError(f"Unsupported optimizer type: {optimizer_type}")

    if scheduler_type == 'StepLR':
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)
    elif scheduler_type == 'ExponentialLR':
        scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
    else:
        scheduler = None

    # Training loop
    for epoch in range(num_epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        model.train()
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            if i % 100 == 99:  # Log every 100 mini-batches
                accuracy = 100 * correct / total
                wandb.log({"loss": running_loss / 100, "accuracy": accuracy, "epoch": epoch + 1})
                print(f"[{epoch + 1}, {i + 1}] loss: {running_loss / 100:.3f}, accuracy: {accuracy:.2f}%")
                running_loss = 0.0
                correct = 0
                total = 0

        if scheduler:
            scheduler.step()

    print("Finished Training")
    torch.save(model.state_dict(), "cifar100_model.pth")

if __name__ == "__main__":
    # Sweep configuration
    sweep_config = {
        'method': 'random',  # Random search
        'parameters': {
            'batch_size': {
                'values': [32, 64, 128]
            },
            'learning_rate': {
                'values': [0.1, 0.01, 0.001]
            },
            'optimizer': {
                'values': ['SGD', 'Adam', 'RMSprop']
            },
            'momentum': {
                'values': [0.9, 0.95, 0.99]
            },
            'weight_decay': {
                'values': [0.0001, 0.001, 0.01]
            },
            'dropout_rate': {
                'values': [0.3, 0.5, 0.7]
            },
            'scheduler': {
                'values': ['None', 'StepLR', 'ExponentialLR']
            }
        }
    }


    # Initialize the sweep
    sweep_id = wandb.sweep(sweep_config, project="cifar100-classifier")

    # Start the sweep
    wandb.agent(sweep_id, function=train,)

Create sweep with ID: sdws3n5m
Sweep URL: https://wandb.ai/themachine/cifar100-classifier/sweeps/sdws3n5m


[34m[1mwandb[0m: Agent Starting Run: nqi5r5z5 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout_rate: 0.7
[34m[1mwandb[0m: 	learning_rate: 0.1
[34m[1mwandb[0m: 	momentum: 0.95
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	scheduler: None
[34m[1mwandb[0m: 	weight_decay: 0.0001


Files already downloaded and verified
Files already downloaded and verified
[1, 100] loss: 2.924, accuracy: 10.36%
[1, 200] loss: 2.392, accuracy: 10.39%
[1, 300] loss: 2.368, accuracy: 9.91%
[1, 400] loss: 2.348, accuracy: 10.30%
[1, 500] loss: 2.348, accuracy: 10.14%
[1, 600] loss: 2.336, accuracy: 10.20%
[1, 700] loss: 2.334, accuracy: 9.81%
Finished Training


0,1
accuracy,██▂▇▅▆▁
epoch,▁▁▁▁▁▁▁
loss,█▂▁▁▁▁▁

0,1
accuracy,9.8125
epoch,1.0
loss,2.33367


[34m[1mwandb[0m: Agent Starting Run: lht4vvef with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	learning_rate: 0.1
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	scheduler: None
[34m[1mwandb[0m: 	weight_decay: 0.01


Files already downloaded and verified
Files already downloaded and verified
[1, 100] loss: 2.894, accuracy: 9.82%
[1, 200] loss: 2.312, accuracy: 13.34%
[1, 300] loss: 2.102, accuracy: 22.48%
Finished Training


0,1
accuracy,▁▃█
epoch,▁▁▁
loss,█▃▁

0,1
accuracy,22.47656
epoch,1.0
loss,2.10218


[34m[1mwandb[0m: Agent Starting Run: l6weya2j with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout_rate: 0.7
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	momentum: 0.95
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	scheduler: StepLR
[34m[1mwandb[0m: 	weight_decay: 0.001


Files already downloaded and verified
Files already downloaded and verified
[1, 100] loss: 2.773, accuracy: 13.97%
[1, 200] loss: 2.186, accuracy: 20.78%
[1, 300] loss: 2.037, accuracy: 24.47%
[1, 400] loss: 1.965, accuracy: 28.97%
[1, 500] loss: 1.954, accuracy: 29.47%
[1, 600] loss: 1.924, accuracy: 29.59%
[1, 700] loss: 1.916, accuracy: 29.84%
[1, 800] loss: 1.893, accuracy: 30.88%
[1, 900] loss: 1.811, accuracy: 34.41%
[1, 1000] loss: 1.813, accuracy: 34.94%
[1, 1100] loss: 1.785, accuracy: 36.31%
[1, 1200] loss: 1.803, accuracy: 34.03%
[1, 1300] loss: 1.756, accuracy: 37.34%
[1, 1400] loss: 1.746, accuracy: 37.19%
[1, 1500] loss: 1.750, accuracy: 36.03%
Finished Training


0,1
accuracy,▁▃▄▅▆▆▆▆▇▇█▇███
epoch,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
loss,█▄▃▂▂▂▂▂▁▁▁▁▁▁▁

0,1
accuracy,36.03125
epoch,1.0
loss,1.74984


[34m[1mwandb[0m: Agent Starting Run: zaev033q with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout_rate: 0.7
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	scheduler: ExponentialLR
[34m[1mwandb[0m: 	weight_decay: 0.0001


Files already downloaded and verified
Files already downloaded and verified
[1, 100] loss: 3.104, accuracy: 10.09%
[1, 200] loss: 2.331, accuracy: 14.67%
[1, 300] loss: 2.199, accuracy: 18.27%
[1, 400] loss: 2.114, accuracy: 22.38%
[1, 500] loss: 2.058, accuracy: 24.03%
[1, 600] loss: 2.006, accuracy: 25.91%
[1, 700] loss: 1.995, accuracy: 27.52%
Finished Training


0,1
accuracy,▁▃▄▆▇▇█
epoch,▁▁▁▁▁▁▁
loss,█▃▂▂▁▁▁

0,1
accuracy,27.51562
epoch,1.0
loss,1.9946


[34m[1mwandb[0m: Agent Starting Run: hm5dxivp with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	dropout_rate: 0.5
[34m[1mwandb[0m: 	learning_rate: 0.1
[34m[1mwandb[0m: 	momentum: 0.99
[34m[1mwandb[0m: 	optimizer: RMSprop
[34m[1mwandb[0m: 	scheduler: StepLR
[34m[1mwandb[0m: 	weight_decay: 0.001


Files already downloaded and verified
Files already downloaded and verified
[1, 100] loss: 1476137.521, accuracy: 10.37%
[1, 200] loss: 3984.694, accuracy: 9.78%
[1, 300] loss: 1626.592, accuracy: 10.26%
Finished Training


0,1
accuracy,█▁▇
epoch,▁▁▁
loss,█▁▁

0,1
accuracy,10.25781
epoch,1.0
loss,1626.59161


[34m[1mwandb[0m: Agent Starting Run: 3j2s09b9 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout_rate: 0.5
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	scheduler: StepLR
[34m[1mwandb[0m: 	weight_decay: 0.001


Files already downloaded and verified
Files already downloaded and verified
[1, 100] loss: 2.631, accuracy: 14.47%
[1, 200] loss: 2.045, accuracy: 24.94%
[1, 300] loss: 1.892, accuracy: 29.78%
[1, 400] loss: 1.812, accuracy: 34.72%
[1, 500] loss: 1.709, accuracy: 37.62%
[1, 600] loss: 1.714, accuracy: 37.41%
[1, 700] loss: 1.653, accuracy: 38.66%
[1, 800] loss: 1.604, accuracy: 40.03%
[1, 900] loss: 1.544, accuracy: 44.72%
[1, 1000] loss: 1.580, accuracy: 42.88%
[1, 1100] loss: 1.526, accuracy: 44.66%
[1, 1200] loss: 1.497, accuracy: 45.28%
[1, 1300] loss: 1.466, accuracy: 47.00%
[1, 1400] loss: 1.455, accuracy: 47.25%
[1, 1500] loss: 1.420, accuracy: 48.38%
Finished Training


0,1
accuracy,▁▃▄▅▆▆▆▆▇▇▇▇███
epoch,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
loss,█▅▄▃▃▃▂▂▂▂▂▁▁▁▁

0,1
accuracy,48.375
epoch,1.0
loss,1.41961


[34m[1mwandb[0m: Agent Starting Run: nd1i4yx0 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	momentum: 0.99
[34m[1mwandb[0m: 	optimizer: RMSprop
[34m[1mwandb[0m: 	scheduler: ExponentialLR
[34m[1mwandb[0m: 	weight_decay: 0.001


Files already downloaded and verified
Files already downloaded and verified
[1, 100] loss: 2.226, accuracy: 24.66%
[1, 200] loss: 1.772, accuracy: 35.19%
[1, 300] loss: 1.642, accuracy: 40.05%
Finished Training


0,1
accuracy,▁▆█
epoch,▁▁▁
loss,█▃▁

0,1
accuracy,40.04688
epoch,1.0
loss,1.64169


[34m[1mwandb[0m: Agent Starting Run: lr0zlpvq with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout_rate: 0.7
[34m[1mwandb[0m: 	learning_rate: 0.1
[34m[1mwandb[0m: 	momentum: 0.95
[34m[1mwandb[0m: 	optimizer: RMSprop
[34m[1mwandb[0m: 	scheduler: ExponentialLR
[34m[1mwandb[0m: 	weight_decay: 0.0001


Files already downloaded and verified
Files already downloaded and verified
[1, 100] loss: 873157.970, accuracy: 9.62%
[1, 200] loss: 2.426, accuracy: 10.00%
[1, 300] loss: 333.389, accuracy: 9.41%
[1, 400] loss: 4449.225, accuracy: 10.28%
[1, 500] loss: 2.336, accuracy: 10.59%
[1, 600] loss: 2.396, accuracy: 8.84%
[1, 700] loss: 39.985, accuracy: 10.09%
[1, 800] loss: 2.338, accuracy: 8.78%
[1, 900] loss: 2.352, accuracy: 11.03%
[1, 1000] loss: 17.029, accuracy: 10.75%
[1, 1100] loss: 2.371, accuracy: 9.78%
[1, 1200] loss: 2.371, accuracy: 10.41%
[1, 1300] loss: 50.836, accuracy: 10.16%
[1, 1400] loss: 2.386, accuracy: 8.91%
[1, 1500] loss: 2.356, accuracy: 9.31%
Finished Training


0,1
accuracy,▄▅▃▆▇▁▅▁█▇▄▆▅▁▃
epoch,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
accuracy,9.3125
epoch,1.0
loss,2.35582


[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


In [18]:
# Log the model as a wandb artifact
artifact = wandb.Artifact('cifar100', type='model')
artifact.add_file('/content/wandb/')
wandb.log_artifact(artifact)

ValueError: Path is not a file: '/content/wandb/'

In [None]:
wandb.finish()

In [None]:
from google.colab import drive
drive.mount('/content/drive')