In [30]:
import os
os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
os.environ['CUDA_VISIBLE_DEVICES'] = '0, 1, 2'
import wandb

from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
import math

device = "cuda" if torch.cuda.is_available() else "cpu"

In [50]:
sweep_config = {
    'method': 'random',
    'metric' : {
        'name': 'val_loss',
        'goal': 'minimize'   
    },
    'parameters' : {
        'optimizer': {
            'values': ['adam', 'sgd']
            },
        'dropout': {
            'values': [0.3, 0.4]
            },
        'learning_rate': {
            'distribution': 'uniform',
            'min': 0,
            'max': 0.1
            },
        'epochs': {
            'values': [5, 6]
            },
        'batch_size': {
            'distribution': 'q_log_uniform',
            'q': 1,
            'min': math.log(32),
            'max': math.log(256),
            }
        }
    }

In [51]:
import pprint
pprint.pprint(sweep_config)

{'method': 'random',
 'metric': {'goal': 'minimize', 'name': 'val_loss'},
 'parameters': {'batch_size': {'distribution': 'q_log_uniform',
                               'max': 5.545177444479562,
                               'min': 3.4657359027997265,
                               'q': 1},
                'dropout': {'values': [0.3, 0.4]},
                'epochs': {'values': [5, 6]},
                'learning_rate': {'distribution': 'uniform',
                                  'max': 0.1,
                                  'min': 0},
                'optimizer': {'values': ['adam', 'sgd']}}}


In [52]:
def SweepDataset(batch_size):
    transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307,), (0.3081,))])
    
    train_data = datasets.MNIST(".", 
                train=True, 
                download=True,
                transform=transform)
    
    test_data = datasets.MNIST(".", 
                train=False, 
                download=True,
                transform=transform)
    
    
    train_loader = DataLoader(train_data, batch_size=batch_size)
    test_loader = DataLoader(test_data, batch_size=batch_size)

    return train_loader, test_loader

In [53]:
class ConvNet(nn.Module):
    def __init__(self, dropout):
        super(ConvNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 32, 3, 1, 1), nn.ReLU(),
            nn.MaxPool2d(2, 2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, 3, 1, 1), nn.ReLU(),
            nn.MaxPool2d(2, 2))
        self.layer3 = nn.Sequential(
            nn.Linear(64 * 7 * 7, 128, bias=True), nn.ReLU(),
            nn.Dropout2d(p=dropout))
        self.layer4 = nn.Sequential(
            nn.Linear(128, 84), nn.ReLU(),
            nn.Dropout2d(p=dropout))
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = x.view(x.size(0),-1) 
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.fc3(x)
        return x

In [54]:
def build_optimizer(network, optimizer, learning_rate):
    if optimizer == "sgd":
        optimizer = optim.SGD(network.parameters(),
                              lr=learning_rate, momentum=0.9)
    elif optimizer == "adam":
        optimizer = optim.Adam(network.parameters(),
                               lr=learning_rate)
    return optimizer

In [55]:
def train(model, loader, criterion, optimizer, device, config, wandb):
    model.train()
    for epoch in range(config.epochs):
        cumu_loss = 0
        for images, labels in loader:
            images, labels  = images.to(device), labels.to(device)

            output = model(images)
            loss = criterion(output, labels)
            cumu_loss += loss.item()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        avg_loss = cumu_loss / len(loader)
        wandb.log({"train_loss": avg_loss}, step=epoch)  
        print(f"TRAIN: EPOCH {epoch + 1:04d} / {config.epochs:04d} | Epoch LOSS {avg_loss:.4f}")

In [56]:
def vaild(model, loader, criterion, device,  wandb):
    model.eval()
    with torch.no_grad():
        correct, test_loss = 0, 0
        for data, target in loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            
            test_loss += criterion(output, target).item()
            
            pred = output.max(1, keepdim=True)[1]
            correct += pred.eq(target.view_as(pred)).sum().item()
            
    
    val_loss = test_loss / len(loader)
    print(f"VALID: LOSS {val_loss:.4f} | Accuracy {val_loss:.4f} ")
    wandb.log({
        "val_acc": 100. * correct / len(loader.dataset),
        "val_loss": val_loss})

In [57]:
def run_sweep(config = None):
    wandb.init(config = config, name = 'test1')

    w_config = wandb.config
    criterion = nn.CrossEntropyLoss()
    train_loader, vaild_loader = SweepDataset(w_config.batch_size)
    model = ConvNet(w_config.dropout).to(device)
    optimizer = build_optimizer(model, w_config.optimizer, w_config.learning_rate)
    
    train(model, train_loader, criterion, optimizer, device, w_config, wandb)
    vaild(model, vaild_loader, criterion, device, wandb)

In [58]:
wandb.login()
sweep_id = wandb.sweep(sweep_config, project = "Sweep_demo", entity = 'wongi')
wandb.agent(sweep_id, run_sweep, count=5)



Create sweep with ID: 16qztovc
Sweep URL: https://wandb.ai/wongi/Sweep_demo/sweeps/16qztovc


[34m[1mwandb[0m: Agent Starting Run: asima2og with config:
[34m[1mwandb[0m: 	batch_size: 122
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	epochs: 6
[34m[1mwandb[0m: 	learning_rate: 0.0710534412908822
[34m[1mwandb[0m: 	optimizer: adam
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
2022-09-17 12:18:35.468240: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNIST/raw/train-images-idx3-ubyte.gz


0it [00:00, ?it/s]

Extracting ./MNIST/raw/train-images-idx3-ubyte.gz to ./MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./MNIST/raw/train-labels-idx1-ubyte.gz


0it [00:00, ?it/s]

Extracting ./MNIST/raw/train-labels-idx1-ubyte.gz to ./MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./MNIST/raw/t10k-images-idx3-ubyte.gz


0it [00:00, ?it/s]

Extracting ./MNIST/raw/t10k-images-idx3-ubyte.gz to ./MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST/raw/t10k-labels-idx1-ubyte.gz


0it [00:00, ?it/s]

Extracting ./MNIST/raw/t10k-labels-idx1-ubyte.gz to ./MNIST/raw
Processing...
Done!


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


TRAIN: EPOCH 0001 / 0006 | Epoch LOSS 5.5176
TRAIN: EPOCH 0002 / 0006 | Epoch LOSS 2.3042
TRAIN: EPOCH 0003 / 0006 | Epoch LOSS 2.3043
TRAIN: EPOCH 0004 / 0006 | Epoch LOSS 2.3043
TRAIN: EPOCH 0005 / 0006 | Epoch LOSS 2.3043
TRAIN: EPOCH 0006 / 0006 | Epoch LOSS 2.3044
VALID: LOSS 2.3026 | Accuracy 2.3026 



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train_loss,█▁▁▁▁▁
val_acc,▁
val_loss,▁

0,1
train_loss,2.30436
val_acc,10.1
val_loss,2.30264


[34m[1mwandb[0m: Agent Starting Run: 1pq3rc9h with config:
[34m[1mwandb[0m: 	batch_size: 174
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 6
[34m[1mwandb[0m: 	learning_rate: 0.057520677417754185
[34m[1mwandb[0m: 	optimizer: adam
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
2022-09-17 12:20:36.239697: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0


TRAIN: EPOCH 0001 / 0006 | Epoch LOSS 3.7205
TRAIN: EPOCH 0002 / 0006 | Epoch LOSS 2.3031
TRAIN: EPOCH 0003 / 0006 | Epoch LOSS 2.3032
TRAIN: EPOCH 0004 / 0006 | Epoch LOSS 2.3032
TRAIN: EPOCH 0005 / 0006 | Epoch LOSS 2.3033
TRAIN: EPOCH 0006 / 0006 | Epoch LOSS 2.3033
VALID: LOSS 2.3014 | Accuracy 2.3014 



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train_loss,█▁▁▁▁▁
val_acc,▁
val_loss,▁

0,1
train_loss,2.30327
val_acc,11.35
val_loss,2.30144


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ungwksc5 with config:
[34m[1mwandb[0m: 	batch_size: 68
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	epochs: 6
[34m[1mwandb[0m: 	learning_rate: 0.09747943452538484
[34m[1mwandb[0m: 	optimizer: adam
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
2022-09-17 12:22:30.546302: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0


TRAIN: EPOCH 0001 / 0006 | Epoch LOSS 4.6731
TRAIN: EPOCH 0002 / 0006 | Epoch LOSS 2.3074
TRAIN: EPOCH 0003 / 0006 | Epoch LOSS 2.3074
TRAIN: EPOCH 0004 / 0006 | Epoch LOSS 2.3074
TRAIN: EPOCH 0005 / 0006 | Epoch LOSS 2.3074
TRAIN: EPOCH 0006 / 0006 | Epoch LOSS 2.3074
VALID: LOSS 2.3044 | Accuracy 2.3044 



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train_loss,█▁▁▁▁▁
val_acc,▁
val_loss,▁

0,1
train_loss,2.30743
val_acc,9.58
val_loss,2.30439


[34m[1mwandb[0m: Agent Starting Run: cq1bdp18 with config:
[34m[1mwandb[0m: 	batch_size: 157
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 6
[34m[1mwandb[0m: 	learning_rate: 0.07004794321475875
[34m[1mwandb[0m: 	optimizer: adam
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
2022-09-17 12:24:42.497098: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0


TRAIN: EPOCH 0001 / 0006 | Epoch LOSS 6.0168
TRAIN: EPOCH 0002 / 0006 | Epoch LOSS 2.3036
TRAIN: EPOCH 0003 / 0006 | Epoch LOSS 2.3037
TRAIN: EPOCH 0004 / 0006 | Epoch LOSS 2.3038
TRAIN: EPOCH 0005 / 0006 | Epoch LOSS 2.3038
TRAIN: EPOCH 0006 / 0006 | Epoch LOSS 2.3038
VALID: LOSS 2.3025 | Accuracy 2.3025 



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train_loss,█▁▁▁▁▁
val_acc,▁
val_loss,▁

0,1
train_loss,2.30383
val_acc,10.1
val_loss,2.30251


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ehuyvko3 with config:
[34m[1mwandb[0m: 	batch_size: 159
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.03206483058379716
[34m[1mwandb[0m: 	optimizer: adam
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
2022-09-17 12:26:38.741725: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0


TRAIN: EPOCH 0001 / 0005 | Epoch LOSS 2.5500
TRAIN: EPOCH 0002 / 0005 | Epoch LOSS 2.3024
TRAIN: EPOCH 0003 / 0005 | Epoch LOSS 2.3024
TRAIN: EPOCH 0004 / 0005 | Epoch LOSS 2.3025
TRAIN: EPOCH 0005 / 0005 | Epoch LOSS 2.3025
VALID: LOSS 2.3015 | Accuracy 2.3015 



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train_loss,█▁▁▁▁
val_acc,▁
val_loss,▁

0,1
train_loss,2.30246
val_acc,10.28
val_loss,2.30151
