In [1]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
import os
import time
import torch
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, random_split
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import wandb
warnings.filterwarnings('ignore')

device = ("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
print(f"Using {device} device")

sweep_config = { "name": "Maximize Accuracy", "method": "bayes" }
sweep_config["metric"] = {
    "name": "val_accuracy",
    "goal": "maximize"
}
parameters = {
    "dropout": {"values": [0, 0.2, 0.3]},
    "learning_rate": { "values": [0.1, 0.05] },
    "growth_rate": { "values": [ 12, 18 ] },
    "bottleneck": { "values": [True, False]},
    "optimizer": { "values": ["SGD", "Adam", "RMSprop" ] },
    "lr_gamma": { "values": [ 0.1, 0.3 ] },
    "temperature": { "values": [ 0.3, 0.5 ] },
    "weight_decay": { "values": [4e-5, 1e-5] },
    "alpha": { "values": [ 0.5, 0.7, 0.9 ]}
}
sweep_config["parameters"] = parameters

additional_config = {
    "num_epochs": 5,
    "workers": 4,
    "batch_size": 96,
    "momentum": 0.9,
    "lr_step_size": 20
}

import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(0.5),       
    transforms.ToTensor(),    
    transforms.Normalize((0.5070754, 0.48655024, 0.44091907), (0.26733398, 0.25643876, 0.2761503))
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5070754, 0.48655024, 0.44091907), (0.26733398, 0.25643876, 0.2761503))
])

train_dataset = datasets.CIFAR100(root='./data', train=True, download=True, transform=train_transform)
test_dataset = datasets.CIFAR100(root='./data', train=False, download=True, transform=test_transform)

train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size

# Split the dataset into training and validation sets
train_dataset, test_dataset = random_split(train_dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
val_loader = DataLoader(train_dataset, batch_size=128, shuffle=False, num_workers=2)

Using mps device
Files already downloaded and verified
Files already downloaded and verified


In [2]:
class DistillationLoss(nn.Module):
    def __init__(self, alpha, temperature):
        super(DistillationLoss, self).__init__()
        self.alpha = alpha
        self.temperature = temperature
        self.criterion = nn.CrossEntropyLoss()

    def forward(self, student_logits, teacher_logits, targets):
        student_loss = self.criterion(student_logits, targets)
        
        distillation_loss = F.kl_div(
            F.log_softmax(student_logits / self.temperature, dim=1),
            F.softmax(teacher_logits / self.temperature, dim=1),
            reduction='batchmean'
        ) * (self.temperature ** 2)

        loss = self.alpha * student_loss + (1 - self.alpha) * distillation_loss
        return loss

In [3]:
def distill(dataloader, teacher, model, epoch, config):
    size = len(dataloader.dataset)
    model.train()
    distillation_criterion = DistillationLoss(config["alpha"], config["temperature"])
    if config["optimizer"] != "Adam":
        optimizer = getattr(torch.optim, config["optimizer"])(model.parameters(), lr=config["learning_rate"], momentum=config["momentum"], weight_decay=config["weight_decay"])
    else:
        optimizer = getattr(torch.optim, config["optimizer"])(model.parameters(), lr=config["learning_rate"], weight_decay=config["weight_decay"])
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=config["lr_step_size"], gamma=config["lr_gamma"])

    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        with torch.no_grad():
            teacher_logits = teacher(X)
        student_logits = model(X)
        loss = distillation_criterion(student_logits, teacher_logits, y)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=5.0)
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            print(f"loss: {loss.item():.6f} [{batch * len(X)}/{size}]")

    scheduler.step()

def train(dataloader, teacher, model, config):
    for epoch in range(config["num_epochs"]):
        distill(dataloader, teacher, model, epoch, config)
        test(val_loader, model, nn.CrossEntropyLoss(), epoch)
        

def test(dataloader, model, loss_fn, epoch):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    step = epoch * len(dataloader.dataset)
    correct /= size
    wandb.log({"epoch": epoch + 1, "val_loss": test_loss, "val_accuracy": 100*correct})
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f}\n")

In [4]:
from student import densenet
from teacher import wide_resnet

def train_test_model(config=None):
    with wandb.init(config = config):
        config = wandb.config
        config.update(additional_config)
        run_name_template = "{} / lr_{} / alpha_{} / dropout_{} / growth_{} / bottleneck_{} / lr_gamma_{} / temp_{} / weight_decay_{}"
        run_name = run_name_template.format(config["optimizer"], config["learning_rate"], config["alpha"],
                                            config["dropout"], config["growth_rate"],
                                            config["bottleneck"], config["lr_gamma"],
                                            config["temperature"], config["weight_decay"])
        print("Run name: ", run_name)
        wandb.run.name = run_name

        teacher_model = wide_resnet().to(device)
        teacher_model.eval()
        
        student_model = densenet().to(device)
        student_model.train()

        train(train_loader, teacher_model, student_model, config)


sweep_id = wandb.sweep(sweep_config, project="distillation")
wandb.agent(sweep_id, train_test_model, count=25)

Create sweep with ID: sk61moht
Sweep URL: https://wandb.ai/ns24z093/distillation/sweeps/sk61moht


[34m[1mwandb[0m: Agent Starting Run: 2crdyhfh with config:
[34m[1mwandb[0m: 	alpha: 0.9
[34m[1mwandb[0m: 	bottleneck: False
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	growth_rate: 18
[34m[1mwandb[0m: 	learning_rate: 0.1
[34m[1mwandb[0m: 	lr_gamma: 0.3
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	temperature: 0.5
[34m[1mwandb[0m: 	weight_decay: 1e-05
[34m[1mwandb[0m: Currently logged in as: [33mns24z093[0m. Use [1m`wandb login --relogin`[0m to force relogin


Run name:  SGD / lr_0.1 / alpha_0.9 / dropout_0.2 / growth_18 / bottleneck_False / lr_gamma_0.3 / temp_0.5 / weight_decay_1e-05
loss: 4.343318 [0/40000]
loss: 3.859285 [6400/40000]
loss: 3.709062 [12800/40000]
loss: 3.670708 [19200/40000]
loss: 3.487759 [25600/40000]
loss: 3.308788 [32000/40000]
loss: 3.315468 [38400/40000]
Test Error: 
 Accuracy: 16.1%, Avg loss: 3.534078

loss: 3.041099 [0/40000]
loss: 3.071847 [6400/40000]
loss: 3.053597 [12800/40000]
loss: 2.859319 [19200/40000]


wandb: ERROR Error while calling W&B API: context deadline exceeded (<Response [500]>)
wandb: ERROR Error while calling W&B API: context deadline exceeded (<Response [500]>)
wandb: ERROR Error while calling W&B API: context deadline exceeded (<Response [500]>)


loss: 2.759206 [25600/40000]


[34m[1mwandb[0m: [32m[41mERROR[0m Error while calling W&B API: context deadline exceeded (<Response [500]>)


loss: 3.033726 [32000/40000]
loss: 2.613445 [38400/40000]
Test Error: 
 Accuracy: 26.3%, Avg loss: 2.936484

loss: 2.581277 [0/40000]
loss: 2.543010 [6400/40000]
loss: 2.460025 [12800/40000]
loss: 2.156973 [19200/40000]
loss: 2.468884 [25600/40000]
loss: 2.394630 [32000/40000]
loss: 2.399888 [38400/40000]
Test Error: 
 Accuracy: 37.3%, Avg loss: 2.388005

loss: 1.878180 [0/40000]
loss: 1.934017 [6400/40000]
loss: 1.873981 [12800/40000]
loss: 2.027224 [19200/40000]
loss: 1.825794 [25600/40000]
loss: 1.886956 [32000/40000]
loss: 1.921530 [38400/40000]
Test Error: 
 Accuracy: 43.4%, Avg loss: 2.113359

loss: 1.407056 [0/40000]
loss: 1.782660 [6400/40000]
loss: 1.575007 [12800/40000]
loss: 1.604043 [19200/40000]
loss: 1.985956 [25600/40000]
loss: 2.049354 [32000/40000]
loss: 1.890427 [38400/40000]
Test Error: 
 Accuracy: 49.1%, Avg loss: 1.843174



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
val_accuracy,▁▃▅▇█
val_loss,█▆▃▂▁

0,1
epoch,5.0
val_accuracy,49.1275
val_loss,1.84317


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: tb3khmyg with config:
[34m[1mwandb[0m: 	alpha: 0.5
[34m[1mwandb[0m: 	bottleneck: True
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	growth_rate: 12
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	lr_gamma: 0.3
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	temperature: 0.5
[34m[1mwandb[0m: 	weight_decay: 4e-05


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011223609255587992, max=1.0…

Run name:  SGD / lr_0.05 / alpha_0.5 / dropout_0.3 / growth_12 / bottleneck_True / lr_gamma_0.3 / temp_0.5 / weight_decay_4e-05
loss: 2.959397 [0/40000]
loss: 2.658882 [6400/40000]
loss: 2.317399 [12800/40000]
loss: 2.654253 [19200/40000]
loss: 2.267381 [25600/40000]
loss: 2.257029 [32000/40000]
loss: 2.192196 [38400/40000]
Test Error: 
 Accuracy: 16.5%, Avg loss: 3.489289

loss: 2.038930 [0/40000]
loss: 2.115407 [6400/40000]
loss: 2.182278 [12800/40000]
loss: 2.018296 [19200/40000]
loss: 1.998276 [25600/40000]
loss: 2.020421 [32000/40000]
loss: 1.725979 [38400/40000]
Test Error: 
 Accuracy: 25.8%, Avg loss: 2.946883

loss: 1.783864 [0/40000]
loss: 1.635890 [6400/40000]
loss: 2.073668 [12800/40000]
loss: 1.805214 [19200/40000]
loss: 1.653458 [25600/40000]
loss: 1.591893 [32000/40000]
loss: 1.769951 [38400/40000]
Test Error: 
 Accuracy: 33.9%, Avg loss: 2.562788

loss: 1.638171 [0/40000]
loss: 1.499069 [6400/40000]
loss: 1.223448 [12800/40000]
loss: 1.389364 [19200/40000]
loss: 1.474752

VBox(children=(Label(value='0.001 MB of 0.013 MB uploaded\r'), FloatProgress(value=0.07366246348934957, max=1.…

0,1
epoch,▁▃▅▆█
val_accuracy,▁▃▅▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
val_accuracy,44.0475
val_loss,2.08006


[34m[1mwandb[0m: Agent Starting Run: 0mn1dne4 with config:
[34m[1mwandb[0m: 	alpha: 0.7
[34m[1mwandb[0m: 	bottleneck: False
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	growth_rate: 12
[34m[1mwandb[0m: 	learning_rate: 0.1
[34m[1mwandb[0m: 	lr_gamma: 0.1
[34m[1mwandb[0m: 	optimizer: Adam
[34m[1mwandb[0m: 	temperature: 0.3
[34m[1mwandb[0m: 	weight_decay: 1e-05


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011223418055548488, max=1.0…

Run name:  Adam / lr_0.1 / alpha_0.7 / dropout_0 / growth_12 / bottleneck_False / lr_gamma_0.1 / temp_0.3 / weight_decay_1e-05
loss: 3.393914 [0/40000]
loss: 3.261738 [6400/40000]
loss: 3.333615 [12800/40000]
loss: 3.190885 [19200/40000]
loss: 3.183601 [25600/40000]
loss: 3.117500 [32000/40000]
loss: 3.127318 [38400/40000]
Test Error: 
 Accuracy: 3.2%, Avg loss: 4.339481

loss: 3.182307 [0/40000]
loss: 3.063886 [6400/40000]
loss: 3.009709 [12800/40000]
loss: 3.007939 [19200/40000]
loss: 3.015509 [25600/40000]
loss: 3.074695 [32000/40000]
loss: 2.933464 [38400/40000]
Test Error: 
 Accuracy: 3.4%, Avg loss: 4.306521

loss: 3.203881 [0/40000]
loss: 3.170161 [6400/40000]
loss: 3.002965 [12800/40000]
loss: 3.109426 [19200/40000]
loss: 2.876787 [25600/40000]
loss: 2.890200 [32000/40000]
loss: 2.910312 [38400/40000]
Test Error: 
 Accuracy: 5.1%, Avg loss: 4.642931

loss: 2.869914 [0/40000]
loss: 2.906414 [6400/40000]
loss: 2.975931 [12800/40000]
loss: 3.009435 [19200/40000]
loss: 2.790966 [25

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
val_accuracy,▂▂▄▁█
val_loss,▁▁▂█▁

0,1
epoch,5.0
val_accuracy,7.735
val_loss,4.01055


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: s83363rh with config:
[34m[1mwandb[0m: 	alpha: 0.9
[34m[1mwandb[0m: 	bottleneck: True
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	growth_rate: 12
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	lr_gamma: 0.3
[34m[1mwandb[0m: 	optimizer: RMSprop
[34m[1mwandb[0m: 	temperature: 0.5
[34m[1mwandb[0m: 	weight_decay: 4e-05
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011218862500067593, max=1.0…

Run name:  RMSprop / lr_0.05 / alpha_0.9 / dropout_0.2 / growth_12 / bottleneck_True / lr_gamma_0.3 / temp_0.5 / weight_decay_4e-05
loss: 4.265295 [0/40000]


wandb: ERROR Error while calling W&B API: context deadline exceeded (<Response [500]>)


loss: 4.797584 [6400/40000]
loss: 4.322650 [12800/40000]
loss: 4.330664 [19200/40000]
loss: 4.698223 [25600/40000]
loss: 4.291952 [32000/40000]
loss: 4.447123 [38400/40000]
Test Error: 
 Accuracy: 1.2%, Avg loss: 9.054632

loss: 4.386692 [0/40000]
loss: 4.494320 [6400/40000]
loss: 4.220326 [12800/40000]
loss: 4.440357 [19200/40000]
loss: 4.406767 [25600/40000]
loss: 4.352552 [32000/40000]
loss: 4.430954 [38400/40000]
Test Error: 
 Accuracy: 1.0%, Avg loss: 350.516412

loss: 4.613198 [0/40000]
loss: 4.877012 [6400/40000]
loss: 4.426882 [12800/40000]
loss: 4.490308 [19200/40000]
loss: 4.314939 [25600/40000]
loss: 4.327379 [32000/40000]
loss: 4.391150 [38400/40000]
Test Error: 
 Accuracy: 1.3%, Avg loss: 367.473149

loss: 4.390847 [0/40000]
loss: 8.081509 [6400/40000]
loss: 4.348940 [12800/40000]
loss: 4.468610 [19200/40000]
loss: 4.356429 [25600/40000]
loss: 4.386706 [32000/40000]
loss: 4.406091 [38400/40000]
Test Error: 
 Accuracy: 1.0%, Avg loss: 19.416457

loss: 4.337761 [0/40000]
los

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
val_accuracy,▆▁█▂▁
val_loss,▁▃▃▁█

0,1
epoch,5.0
val_accuracy,1.0
val_loss,1471.9855


[34m[1mwandb[0m: Agent Starting Run: eaiyzer4 with config:
[34m[1mwandb[0m: 	alpha: 0.7
[34m[1mwandb[0m: 	bottleneck: False
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	growth_rate: 18
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	lr_gamma: 0.1
[34m[1mwandb[0m: 	optimizer: Adam
[34m[1mwandb[0m: 	temperature: 0.5
[34m[1mwandb[0m: 	weight_decay: 4e-05


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011223675933272009, max=1.0…

Run name:  Adam / lr_0.05 / alpha_0.7 / dropout_0 / growth_18 / bottleneck_False / lr_gamma_0.1 / temp_0.5 / weight_decay_4e-05
loss: 3.660249 [0/40000]
loss: 3.413182 [6400/40000]
loss: 3.287546 [12800/40000]
loss: 3.380669 [19200/40000]
loss: 3.318784 [25600/40000]
loss: 3.256132 [32000/40000]
loss: 3.192126 [38400/40000]
Test Error: 
 Accuracy: 4.5%, Avg loss: 4.239957

loss: 3.262769 [0/40000]
loss: 3.244479 [6400/40000]
loss: 3.122771 [12800/40000]
loss: 3.246646 [19200/40000]
loss: 3.148272 [25600/40000]
loss: 3.063935 [32000/40000]
loss: 2.958948 [38400/40000]
Test Error: 
 Accuracy: 6.8%, Avg loss: 4.021687

loss: 3.076134 [0/40000]
loss: 3.377604 [6400/40000]
loss: 3.068630 [12800/40000]
loss: 3.181702 [19200/40000]
loss: 2.988222 [25600/40000]
loss: 2.974760 [32000/40000]
loss: 3.181623 [38400/40000]
Test Error: 
 Accuracy: 9.2%, Avg loss: 3.886974

loss: 2.757160 [0/40000]
loss: 2.892049 [6400/40000]
loss: 3.096702 [12800/40000]
loss: 2.873559 [19200/40000]
loss: 2.875634 [2

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
val_accuracy,▁▄▆█▅
val_loss,█▄▂▁▅

0,1
epoch,5.0
val_accuracy,7.8275
val_loss,4.04374


[34m[1mwandb[0m: Agent Starting Run: bmhh213z with config:
[34m[1mwandb[0m: 	alpha: 0.9
[34m[1mwandb[0m: 	bottleneck: True
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	growth_rate: 18
[34m[1mwandb[0m: 	learning_rate: 0.1
[34m[1mwandb[0m: 	lr_gamma: 0.3
[34m[1mwandb[0m: 	optimizer: Adam
[34m[1mwandb[0m: 	temperature: 0.5
[34m[1mwandb[0m: 	weight_decay: 1e-05


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011223493977740872, max=1.0…

Run name:  Adam / lr_0.1 / alpha_0.9 / dropout_0.3 / growth_18 / bottleneck_True / lr_gamma_0.3 / temp_0.5 / weight_decay_1e-05
loss: 4.278213 [0/40000]
loss: 4.121851 [6400/40000]
loss: 4.194000 [12800/40000]
loss: 4.144911 [19200/40000]
loss: 3.935740 [25600/40000]
loss: 4.109190 [32000/40000]
loss: 3.948375 [38400/40000]
Test Error: 
 Accuracy: 4.2%, Avg loss: 4.351579

loss: 4.081284 [0/40000]
loss: 3.721755 [6400/40000]
loss: 3.691903 [12800/40000]
loss: 3.755623 [19200/40000]
loss: 3.782952 [25600/40000]
loss: 3.666044 [32000/40000]
loss: 3.773466 [38400/40000]
Test Error: 
 Accuracy: 3.2%, Avg loss: 5.246836

loss: 3.794428 [0/40000]
loss: 3.651362 [6400/40000]
loss: 3.589965 [12800/40000]
loss: 3.797123 [19200/40000]
loss: 3.793657 [25600/40000]
loss: 3.591815 [32000/40000]
loss: 3.713839 [38400/40000]
Test Error: 
 Accuracy: 6.3%, Avg loss: 5.368992

loss: 3.454129 [0/40000]
loss: 3.500636 [6400/40000]
loss: 3.349344 [12800/40000]
loss: 3.949766 [19200/40000]
loss: 3.612420 [2

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
val_accuracy,▂▁▅▅█
val_loss,▃▇█▃▁

0,1
epoch,5.0
val_accuracy,8.1425
val_loss,4.03584


[34m[1mwandb[0m: Agent Starting Run: h1clptr5 with config:
[34m[1mwandb[0m: 	alpha: 0.9
[34m[1mwandb[0m: 	bottleneck: True
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	growth_rate: 18
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	lr_gamma: 0.1
[34m[1mwandb[0m: 	optimizer: RMSprop
[34m[1mwandb[0m: 	temperature: 0.5
[34m[1mwandb[0m: 	weight_decay: 4e-05


Run name:  RMSprop / lr_0.05 / alpha_0.9 / dropout_0.2 / growth_18 / bottleneck_True / lr_gamma_0.1 / temp_0.5 / weight_decay_4e-05
loss: 4.306247 [0/40000]
loss: 11.629231 [6400/40000]
loss: 7.207847 [12800/40000]
loss: 4.904413 [19200/40000]
loss: 4.448657 [25600/40000]
loss: 4.631292 [32000/40000]
loss: 4.376763 [38400/40000]
Test Error: 
 Accuracy: 1.6%, Avg loss: 5.971328

loss: 4.407309 [0/40000]
loss: 4.785070 [6400/40000]
loss: 4.752174 [12800/40000]
loss: 4.808119 [19200/40000]
loss: 4.654322 [25600/40000]
loss: 4.268241 [32000/40000]
loss: 4.317183 [38400/40000]
Test Error: 
 Accuracy: 0.7%, Avg loss: 20.591696

loss: 4.606661 [0/40000]
loss: 4.338557 [6400/40000]
loss: 4.441094 [12800/40000]
loss: 4.452987 [19200/40000]
loss: 4.422584 [25600/40000]
loss: 4.372622 [32000/40000]
loss: 4.367122 [38400/40000]
Test Error: 
 Accuracy: 0.9%, Avg loss: 168.877825

loss: 4.352106 [0/40000]
loss: 5.295310 [6400/40000]
loss: 4.708655 [12800/40000]
loss: 4.431078 [19200/40000]
loss: 4.6

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
val_accuracy,█▁▂▃▃
val_loss,▁▂█▁▃

0,1
epoch,5.0
val_accuracy,0.9725
val_loss,57.18993


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: wybv6jqx with config:
[34m[1mwandb[0m: 	alpha: 0.7
[34m[1mwandb[0m: 	bottleneck: True
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	growth_rate: 12
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	lr_gamma: 0.3
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	temperature: 0.3
[34m[1mwandb[0m: 	weight_decay: 4e-05


Run name:  SGD / lr_0.05 / alpha_0.7 / dropout_0 / growth_12 / bottleneck_True / lr_gamma_0.3 / temp_0.3 / weight_decay_4e-05
loss: 3.343158 [0/40000]
loss: 3.072527 [6400/40000]
loss: 2.803607 [12800/40000]
loss: 2.810993 [19200/40000]
loss: 3.029817 [25600/40000]
loss: 2.631893 [32000/40000]
loss: 2.651509 [38400/40000]
Test Error: 
 Accuracy: 15.4%, Avg loss: 3.531409

loss: 2.537847 [0/40000]
loss: 2.581179 [6400/40000]
loss: 2.388868 [12800/40000]
loss: 2.301183 [19200/40000]
loss: 2.230112 [25600/40000]
loss: 2.120758 [32000/40000]
loss: 1.988944 [38400/40000]
Test Error: 
 Accuracy: 27.0%, Avg loss: 2.853239

loss: 2.105553 [0/40000]
loss: 2.025631 [6400/40000]
loss: 2.171468 [12800/40000]
loss: 1.929065 [19200/40000]
loss: 2.069127 [25600/40000]
loss: 1.840333 [32000/40000]
loss: 1.562930 [38400/40000]
Test Error: 
 Accuracy: 33.5%, Avg loss: 2.538361

loss: 2.010887 [0/40000]
loss: 1.732815 [6400/40000]
loss: 1.865995 [12800/40000]
loss: 1.782575 [19200/40000]
loss: 1.654031 [

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
val_accuracy,▁▄▅▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
val_accuracy,43.97
val_loss,2.0695


[34m[1mwandb[0m: Agent Starting Run: 9lwzcd4o with config:
[34m[1mwandb[0m: 	alpha: 0.7
[34m[1mwandb[0m: 	bottleneck: True
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	growth_rate: 12
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	lr_gamma: 0.1
[34m[1mwandb[0m: 	optimizer: RMSprop
[34m[1mwandb[0m: 	temperature: 0.5
[34m[1mwandb[0m: 	weight_decay: 4e-05


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011223597688755641, max=1.0…

Run name:  RMSprop / lr_0.05 / alpha_0.7 / dropout_0.2 / growth_12 / bottleneck_True / lr_gamma_0.1 / temp_0.5 / weight_decay_4e-05
loss: 3.595114 [0/40000]
loss: 3.995954 [6400/40000]
loss: 4.056948 [12800/40000]
loss: 3.632473 [19200/40000]
loss: 3.766059 [25600/40000]
loss: 3.596190 [32000/40000]
loss: 3.766304 [38400/40000]
Test Error: 
 Accuracy: 1.3%, Avg loss: 4.794957

loss: 3.663815 [0/40000]
loss: 3.759430 [6400/40000]
loss: 3.593078 [12800/40000]
loss: 3.637517 [19200/40000]
loss: 3.625576 [25600/40000]
loss: 3.670685 [32000/40000]
loss: 3.702896 [38400/40000]
Test Error: 
 Accuracy: 1.2%, Avg loss: 274.362918

loss: 3.998927 [0/40000]
loss: 4.482728 [6400/40000]
loss: 3.703066 [12800/40000]
loss: 3.916018 [19200/40000]
loss: 3.706710 [25600/40000]
loss: 3.640208 [32000/40000]
loss: 3.637635 [38400/40000]
Test Error: 
 Accuracy: 1.0%, Avg loss: 18.830607

loss: 3.729365 [0/40000]
loss: 4.179500 [6400/40000]
loss: 3.949374 [12800/40000]
loss: 3.866669 [19200/40000]
loss: 3.77

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
val_accuracy,█▆▂▂▁
val_loss,▁▃▁█▁

0,1
epoch,5.0
val_accuracy,0.9775
val_loss,4.73824


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 75m52hnk with config:
[34m[1mwandb[0m: 	alpha: 0.9
[34m[1mwandb[0m: 	bottleneck: False
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	growth_rate: 18
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	lr_gamma: 0.3
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	temperature: 0.5
[34m[1mwandb[0m: 	weight_decay: 4e-05


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011223750466726617, max=1.0…

Run name:  SGD / lr_0.05 / alpha_0.9 / dropout_0.2 / growth_18 / bottleneck_False / lr_gamma_0.3 / temp_0.5 / weight_decay_4e-05
loss: 4.314622 [0/40000]
loss: 3.787286 [6400/40000]
loss: 3.690921 [12800/40000]
loss: 3.864851 [19200/40000]
loss: 3.481808 [25600/40000]
loss: 3.335868 [32000/40000]
loss: 3.464485 [38400/40000]
Test Error: 
 Accuracy: 17.0%, Avg loss: 3.404584

loss: 3.124171 [0/40000]
loss: 3.101824 [6400/40000]
loss: 3.207647 [12800/40000]
loss: 3.037912 [19200/40000]
loss: 2.720607 [25600/40000]
loss: 2.794789 [32000/40000]
loss: 2.623399 [38400/40000]
Test Error: 
 Accuracy: 27.4%, Avg loss: 2.866600

loss: 2.567890 [0/40000]
loss: 2.560680 [6400/40000]
loss: 2.644041 [12800/40000]
loss: 2.432384 [19200/40000]
loss: 2.296749 [25600/40000]
loss: 2.519578 [32000/40000]
loss: 2.504173 [38400/40000]
Test Error: 
 Accuracy: 31.9%, Avg loss: 2.631236

loss: 2.267893 [0/40000]
loss: 2.184081 [6400/40000]
loss: 2.196723 [12800/40000]
loss: 1.959490 [19200/40000]
loss: 1.88019

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
val_accuracy,▁▃▄▇█
val_loss,█▅▄▂▁

0,1
epoch,5.0
val_accuracy,47.4975
val_loss,1.90378


[34m[1mwandb[0m: Agent Starting Run: qnj1uh7o with config:
[34m[1mwandb[0m: 	alpha: 0.9
[34m[1mwandb[0m: 	bottleneck: False
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	growth_rate: 18
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	lr_gamma: 0.3
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	temperature: 0.5
[34m[1mwandb[0m: 	weight_decay: 4e-05


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011223099533203317, max=1.0…

Run name:  SGD / lr_0.05 / alpha_0.9 / dropout_0.3 / growth_18 / bottleneck_False / lr_gamma_0.3 / temp_0.5 / weight_decay_4e-05
loss: 4.298599 [0/40000]
loss: 3.846824 [6400/40000]
loss: 3.528808 [12800/40000]
loss: 3.368920 [19200/40000]
loss: 3.578658 [25600/40000]
loss: 3.297917 [32000/40000]
loss: 3.299170 [38400/40000]
Test Error: 
 Accuracy: 16.2%, Avg loss: 3.473619

loss: 3.481165 [0/40000]
loss: 3.223503 [6400/40000]
loss: 3.238327 [12800/40000]
loss: 3.287432 [19200/40000]
loss: 3.070650 [25600/40000]
loss: 2.700750 [32000/40000]
loss: 2.441479 [38400/40000]
Test Error: 
 Accuracy: 27.6%, Avg loss: 2.820260

loss: 2.845160 [0/40000]
loss: 2.546674 [6400/40000]
loss: 2.528852 [12800/40000]
loss: 2.188390 [19200/40000]
loss: 2.304164 [25600/40000]
loss: 2.483366 [32000/40000]
loss: 2.338350 [38400/40000]
Test Error: 
 Accuracy: 38.0%, Avg loss: 2.348241

loss: 2.264133 [0/40000]
loss: 1.998160 [6400/40000]
loss: 2.256293 [12800/40000]
loss: 2.134583 [19200/40000]
loss: 2.31544

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
val_accuracy,▁▄▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
val_accuracy,46.8525
val_loss,1.94869


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: w0pbisv9 with config:
[34m[1mwandb[0m: 	alpha: 0.7
[34m[1mwandb[0m: 	bottleneck: False
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	growth_rate: 18
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	lr_gamma: 0.3
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	temperature: 0.5
[34m[1mwandb[0m: 	weight_decay: 4e-05


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011223321755662458, max=1.0…

Run name:  SGD / lr_0.05 / alpha_0.7 / dropout_0.3 / growth_18 / bottleneck_False / lr_gamma_0.3 / temp_0.5 / weight_decay_4e-05
loss: 3.629009 [0/40000]
loss: 3.285404 [6400/40000]
loss: 3.082648 [12800/40000]
loss: 2.983084 [19200/40000]
loss: 2.964281 [25600/40000]
loss: 2.726713 [32000/40000]
loss: 2.573498 [38400/40000]
Test Error: 
 Accuracy: 16.2%, Avg loss: 3.457644

loss: 2.768394 [0/40000]
loss: 2.915216 [6400/40000]
loss: 2.285378 [12800/40000]
loss: 2.730334 [19200/40000]
loss: 2.220136 [25600/40000]
loss: 2.425975 [32000/40000]
loss: 2.161753 [38400/40000]
Test Error: 
 Accuracy: 23.9%, Avg loss: 3.020901

loss: 2.240609 [0/40000]
loss: 2.006033 [6400/40000]
loss: 1.735214 [12800/40000]
loss: 2.202931 [19200/40000]
loss: 1.894111 [25600/40000]
loss: 2.145629 [32000/40000]
loss: 1.772503 [38400/40000]
Test Error: 
 Accuracy: 31.8%, Avg loss: 2.672827

loss: 1.913939 [0/40000]
loss: 1.857010 [6400/40000]
loss: 1.654243 [12800/40000]
loss: 1.576182 [19200/40000]
loss: 1.85157

VBox(children=(Label(value='0.001 MB of 0.013 MB uploaded\r'), FloatProgress(value=0.07358906839370863, max=1.…

0,1
epoch,▁▃▅▆█
val_accuracy,▁▃▅▇█
val_loss,█▆▄▂▁

0,1
epoch,5.0
val_accuracy,45.13
val_loss,2.00803


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: aed25owh with config:
[34m[1mwandb[0m: 	alpha: 0.9
[34m[1mwandb[0m: 	bottleneck: False
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	growth_rate: 18
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	lr_gamma: 0.3
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	temperature: 0.5
[34m[1mwandb[0m: 	weight_decay: 1e-05


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011223912500039053, max=1.0…

Run name:  SGD / lr_0.05 / alpha_0.9 / dropout_0.3 / growth_18 / bottleneck_False / lr_gamma_0.3 / temp_0.5 / weight_decay_1e-05
loss: 4.271152 [0/40000]
loss: 3.852246 [6400/40000]
loss: 3.620556 [12800/40000]
loss: 3.465893 [19200/40000]
loss: 3.248549 [25600/40000]
loss: 3.440275 [32000/40000]
loss: 3.151579 [38400/40000]
Test Error: 
 Accuracy: 16.4%, Avg loss: 3.460166

loss: 3.167067 [0/40000]
loss: 2.903201 [6400/40000]
loss: 3.000974 [12800/40000]
loss: 2.701319 [19200/40000]
loss: 2.745051 [25600/40000]
loss: 2.580399 [32000/40000]
loss: 2.914092 [38400/40000]
Test Error: 
 Accuracy: 26.8%, Avg loss: 2.871243

loss: 2.556648 [0/40000]
loss: 2.634104 [6400/40000]
loss: 2.321227 [12800/40000]
loss: 2.651322 [19200/40000]
loss: 2.404831 [25600/40000]
loss: 2.372994 [32000/40000]
loss: 2.348025 [38400/40000]
Test Error: 
 Accuracy: 35.4%, Avg loss: 2.461956

loss: 2.378511 [0/40000]
loss: 2.288824 [6400/40000]
loss: 2.255254 [12800/40000]
loss: 2.230013 [19200/40000]
loss: 1.95176

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
val_accuracy,▁▄▆▆█
val_loss,█▅▃▂▁

0,1
epoch,5.0
val_accuracy,45.3325
val_loss,2.0345


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ts0ytjs3 with config:
[34m[1mwandb[0m: 	alpha: 0.9
[34m[1mwandb[0m: 	bottleneck: False
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	growth_rate: 18
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	lr_gamma: 0.3
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	temperature: 0.5
[34m[1mwandb[0m: 	weight_decay: 1e-05


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011201765277979172, max=1.0…

Run name:  SGD / lr_0.05 / alpha_0.9 / dropout_0.3 / growth_18 / bottleneck_False / lr_gamma_0.3 / temp_0.5 / weight_decay_1e-05
loss: 4.306124 [0/40000]
loss: 3.848259 [6400/40000]
loss: 3.691851 [12800/40000]
loss: 3.692580 [19200/40000]
loss: 3.378096 [25600/40000]
loss: 3.394649 [32000/40000]
loss: 3.088682 [38400/40000]
Test Error: 
 Accuracy: 14.9%, Avg loss: 3.557024

loss: 3.263147 [0/40000]
loss: 3.323675 [6400/40000]
loss: 2.844238 [12800/40000]
loss: 3.288754 [19200/40000]
loss: 2.707074 [25600/40000]
loss: 2.755441 [32000/40000]
loss: 2.837940 [38400/40000]
Test Error: 
 Accuracy: 27.3%, Avg loss: 2.868462

loss: 2.698695 [0/40000]
loss: 2.690529 [6400/40000]
loss: 2.405286 [12800/40000]
loss: 2.443734 [19200/40000]
loss: 2.635622 [25600/40000]
loss: 2.510779 [32000/40000]
loss: 2.502064 [38400/40000]
Test Error: 
 Accuracy: 34.8%, Avg loss: 2.508545

loss: 2.588014 [0/40000]
loss: 2.342636 [6400/40000]
loss: 2.105269 [12800/40000]
loss: 2.165405 [19200/40000]
loss: 2.10421

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
val_accuracy,▁▄▅▆█
val_loss,█▅▄▂▁

0,1
epoch,5.0
val_accuracy,48.455
val_loss,1.87053


[34m[1mwandb[0m: Agent Starting Run: f8x8mtax with config:
[34m[1mwandb[0m: 	alpha: 0.7
[34m[1mwandb[0m: 	bottleneck: False
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	growth_rate: 18
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	lr_gamma: 0.3
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	temperature: 0.5
[34m[1mwandb[0m: 	weight_decay: 1e-05


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011223398611117671, max=1.0…

Run name:  SGD / lr_0.05 / alpha_0.7 / dropout_0 / growth_18 / bottleneck_False / lr_gamma_0.3 / temp_0.5 / weight_decay_1e-05
loss: 3.572153 [0/40000]
loss: 3.265350 [6400/40000]
loss: 3.113343 [12800/40000]
loss: 3.157789 [19200/40000]
loss: 3.142596 [25600/40000]
loss: 2.851784 [32000/40000]
loss: 2.876231 [38400/40000]
Test Error: 
 Accuracy: 15.2%, Avg loss: 3.543154

loss: 2.790482 [0/40000]
loss: 2.634310 [6400/40000]
loss: 2.407294 [12800/40000]
loss: 2.884216 [19200/40000]
loss: 2.439633 [25600/40000]
loss: 2.388529 [32000/40000]
loss: 2.084669 [38400/40000]
Test Error: 
 Accuracy: 26.3%, Avg loss: 2.908301

loss: 2.211584 [0/40000]
loss: 2.334589 [6400/40000]
loss: 2.113340 [12800/40000]
loss: 2.203331 [19200/40000]
loss: 2.219975 [25600/40000]
loss: 1.834858 [32000/40000]
loss: 1.955353 [38400/40000]
Test Error: 
 Accuracy: 32.7%, Avg loss: 2.586802

loss: 2.002132 [0/40000]
loss: 1.889491 [6400/40000]
loss: 1.875852 [12800/40000]
loss: 1.802874 [19200/40000]
loss: 1.775884 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
val_accuracy,▁▄▅██
val_loss,█▅▃▁▁

0,1
epoch,5.0
val_accuracy,43.2175
val_loss,2.13904


[34m[1mwandb[0m: Agent Starting Run: x69696ex with config:
[34m[1mwandb[0m: 	alpha: 0.9
[34m[1mwandb[0m: 	bottleneck: False
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	growth_rate: 18
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	lr_gamma: 0.3
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	temperature: 0.5
[34m[1mwandb[0m: 	weight_decay: 1e-05


Run name:  SGD / lr_0.05 / alpha_0.9 / dropout_0.3 / growth_18 / bottleneck_False / lr_gamma_0.3 / temp_0.5 / weight_decay_1e-05
loss: 4.227479 [0/40000]
loss: 3.980433 [6400/40000]
loss: 3.624254 [12800/40000]
loss: 3.808121 [19200/40000]
loss: 3.325144 [25600/40000]
loss: 3.387464 [32000/40000]
loss: 3.131479 [38400/40000]
Test Error: 
 Accuracy: 16.0%, Avg loss: 3.469704

loss: 3.266784 [0/40000]
loss: 3.055146 [6400/40000]
loss: 3.032740 [12800/40000]
loss: 2.856745 [19200/40000]
loss: 2.878380 [25600/40000]
loss: 2.846563 [32000/40000]
loss: 2.696573 [38400/40000]
Test Error: 
 Accuracy: 26.6%, Avg loss: 2.864699

loss: 2.498207 [0/40000]
loss: 2.764250 [6400/40000]
loss: 2.408621 [12800/40000]
loss: 2.459475 [19200/40000]
loss: 2.586199 [25600/40000]
loss: 2.357044 [32000/40000]
loss: 2.307050 [38400/40000]
Test Error: 
 Accuracy: 34.5%, Avg loss: 2.490743

loss: 2.334879 [0/40000]
loss: 2.645073 [6400/40000]
loss: 2.290902 [12800/40000]
loss: 1.833744 [19200/40000]
loss: 2.19213

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
val_accuracy,▁▄▆▆█
val_loss,█▅▃▃▁

0,1
epoch,5.0
val_accuracy,43.74
val_loss,2.05847


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: s16ff9yc with config:
[34m[1mwandb[0m: 	alpha: 0.5
[34m[1mwandb[0m: 	bottleneck: True
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	growth_rate: 18
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	lr_gamma: 0.3
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	temperature: 0.5
[34m[1mwandb[0m: 	weight_decay: 1e-05


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011223520366669012, max=1.0…

Run name:  SGD / lr_0.05 / alpha_0.5 / dropout_0.3 / growth_18 / bottleneck_True / lr_gamma_0.3 / temp_0.5 / weight_decay_1e-05
loss: 2.952329 [0/40000]
loss: 2.611945 [6400/40000]
loss: 2.471727 [12800/40000]
loss: 2.477351 [19200/40000]
loss: 2.259022 [25600/40000]
loss: 2.255201 [32000/40000]
loss: 2.209257 [38400/40000]
Test Error: 
 Accuracy: 17.8%, Avg loss: 3.402300

loss: 2.236706 [0/40000]
loss: 2.104675 [6400/40000]
loss: 1.876648 [12800/40000]
loss: 1.893790 [19200/40000]
loss: 1.741797 [25600/40000]
loss: 1.837792 [32000/40000]
loss: 1.740600 [38400/40000]
Test Error: 
 Accuracy: 27.7%, Avg loss: 2.888272

loss: 1.874713 [0/40000]
loss: 1.761022 [6400/40000]
loss: 1.978493 [12800/40000]
loss: 1.898691 [19200/40000]
loss: 1.590222 [25600/40000]
loss: 1.786619 [32000/40000]
loss: 1.515616 [38400/40000]
Test Error: 
 Accuracy: 30.8%, Avg loss: 2.665944

loss: 1.766292 [0/40000]
loss: 1.711910 [6400/40000]
loss: 1.362355 [12800/40000]
loss: 1.615754 [19200/40000]
loss: 1.718261

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
val_accuracy,▁▄▄▇█
val_loss,█▅▄▂▁

0,1
epoch,5.0
val_accuracy,44.92
val_loss,2.0525


[34m[1mwandb[0m: Agent Starting Run: 30sml0az with config:
[34m[1mwandb[0m: 	alpha: 0.5
[34m[1mwandb[0m: 	bottleneck: True
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	growth_rate: 18
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	lr_gamma: 0.3
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	temperature: 0.3
[34m[1mwandb[0m: 	weight_decay: 4e-05


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011223344444361929, max=1.0…

Run name:  SGD / lr_0.05 / alpha_0.5 / dropout_0.3 / growth_18 / bottleneck_True / lr_gamma_0.3 / temp_0.3 / weight_decay_4e-05
loss: 2.552216 [0/40000]
loss: 2.447219 [6400/40000]
loss: 2.279592 [12800/40000]
loss: 2.117611 [19200/40000]
loss: 2.109565 [25600/40000]
loss: 2.256026 [32000/40000]
loss: 2.000983 [38400/40000]
Test Error: 
 Accuracy: 12.6%, Avg loss: 3.736635

loss: 2.092804 [0/40000]
loss: 1.954897 [6400/40000]
loss: 1.851500 [12800/40000]
loss: 1.972035 [19200/40000]
loss: 1.835198 [25600/40000]
loss: 1.703855 [32000/40000]
loss: 1.775987 [38400/40000]
Test Error: 
 Accuracy: 21.9%, Avg loss: 3.133246

loss: 1.733083 [0/40000]
loss: 1.530245 [6400/40000]
loss: 1.568824 [12800/40000]
loss: 1.592893 [19200/40000]
loss: 1.596741 [25600/40000]
loss: 1.879893 [32000/40000]
loss: 1.369430 [38400/40000]
Test Error: 
 Accuracy: 31.9%, Avg loss: 2.634771

loss: 1.526479 [0/40000]
loss: 1.535437 [6400/40000]
loss: 1.445748 [12800/40000]
loss: 1.224848 [19200/40000]
loss: 1.173769

VBox(children=(Label(value='0.001 MB of 0.013 MB uploaded\r'), FloatProgress(value=0.07367296045600286, max=1.…

0,1
epoch,▁▃▅▆█
val_accuracy,▁▃▅▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
val_accuracy,43.95
val_loss,2.10003


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: f7j5011f with config:
[34m[1mwandb[0m: 	alpha: 0.9
[34m[1mwandb[0m: 	bottleneck: True
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	growth_rate: 18
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	lr_gamma: 0.3
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	temperature: 0.5
[34m[1mwandb[0m: 	weight_decay: 4e-05


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011223273144181197, max=1.0…

Run name:  SGD / lr_0.05 / alpha_0.9 / dropout_0.2 / growth_18 / bottleneck_True / lr_gamma_0.3 / temp_0.5 / weight_decay_4e-05
loss: 4.346490 [0/40000]
loss: 3.949597 [6400/40000]
loss: 4.012076 [12800/40000]
loss: 3.475628 [19200/40000]
loss: 3.366695 [25600/40000]
loss: 3.403450 [32000/40000]
loss: 3.146581 [38400/40000]
Test Error: 
 Accuracy: 16.1%, Avg loss: 3.453281

loss: 3.222385 [0/40000]
loss: 3.340034 [6400/40000]
loss: 2.798954 [12800/40000]
loss: 2.855089 [19200/40000]
loss: 3.130921 [25600/40000]
loss: 2.902505 [32000/40000]
loss: 2.543183 [38400/40000]
Test Error: 
 Accuracy: 25.9%, Avg loss: 2.962364

loss: 2.534345 [0/40000]
loss: 2.421487 [6400/40000]
loss: 2.032638 [12800/40000]
loss: 2.509492 [19200/40000]
loss: 2.317562 [25600/40000]
loss: 2.076983 [32000/40000]
loss: 2.417805 [38400/40000]
Test Error: 
 Accuracy: 35.7%, Avg loss: 2.433757

loss: 2.363886 [0/40000]
loss: 2.245544 [6400/40000]
loss: 2.335655 [12800/40000]
loss: 2.031116 [19200/40000]
loss: 2.299143

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
val_accuracy,▁▃▆██
val_loss,█▆▃▁▁

0,1
epoch,5.0
val_accuracy,44.5875
val_loss,2.03305


[34m[1mwandb[0m: Agent Starting Run: 7xjqzuq7 with config:
[34m[1mwandb[0m: 	alpha: 0.9
[34m[1mwandb[0m: 	bottleneck: False
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	growth_rate: 18
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	lr_gamma: 0.3
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	temperature: 0.3
[34m[1mwandb[0m: 	weight_decay: 1e-05


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01122382037768451, max=1.0)…

Run name:  SGD / lr_0.05 / alpha_0.9 / dropout_0.2 / growth_18 / bottleneck_False / lr_gamma_0.3 / temp_0.3 / weight_decay_1e-05
loss: 4.231745 [0/40000]
loss: 3.834834 [6400/40000]


wandb: Network error (ConnectionError), entering retry loop.


loss: 3.549063 [12800/40000]


[34m[1mwandb[0m: Network error (ConnectionError), entering retry loop.


loss: 3.414405 [19200/40000]
loss: 3.266970 [25600/40000]
loss: 3.392025 [32000/40000]
loss: 3.233953 [38400/40000]
Test Error: 
 Accuracy: 14.4%, Avg loss: 3.638867

loss: 3.145591 [0/40000]
loss: 3.006533 [6400/40000]
loss: 2.961030 [12800/40000]
loss: 2.820025 [19200/40000]
loss: 2.588237 [25600/40000]
loss: 2.584831 [32000/40000]
loss: 2.432291 [38400/40000]
Test Error: 
 Accuracy: 27.5%, Avg loss: 2.827490

loss: 2.347822 [0/40000]
loss: 2.163829 [6400/40000]
loss: 2.555655 [12800/40000]
loss: 2.176779 [19200/40000]
loss: 2.355064 [25600/40000]
loss: 2.485780 [32000/40000]
loss: 2.296734 [38400/40000]
Test Error: 
 Accuracy: 34.7%, Avg loss: 2.478217

loss: 2.397866 [0/40000]
loss: 1.839357 [6400/40000]
loss: 2.111107 [12800/40000]
loss: 2.167364 [19200/40000]
loss: 2.302803 [25600/40000]
loss: 1.910862 [32000/40000]
loss: 2.020895 [38400/40000]
Test Error: 
 Accuracy: 42.2%, Avg loss: 2.132821

loss: 1.658976 [0/40000]
loss: 1.884217 [6400/40000]
loss: 1.853250 [12800/40000]
loss

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
val_accuracy,▁▄▅▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
val_accuracy,47.985
val_loss,1.87298


[34m[1mwandb[0m: Agent Starting Run: m10ylofi with config:
[34m[1mwandb[0m: 	alpha: 0.9
[34m[1mwandb[0m: 	bottleneck: True
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	growth_rate: 18
[34m[1mwandb[0m: 	learning_rate: 0.1
[34m[1mwandb[0m: 	lr_gamma: 0.3
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	temperature: 0.5
[34m[1mwandb[0m: 	weight_decay: 1e-05


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011223392588888399, max=1.0…

Run name:  SGD / lr_0.1 / alpha_0.9 / dropout_0.2 / growth_18 / bottleneck_True / lr_gamma_0.3 / temp_0.5 / weight_decay_1e-05
loss: 4.308201 [0/40000]
loss: 3.907932 [6400/40000]
loss: 3.572792 [12800/40000]
loss: 3.463505 [19200/40000]
loss: 3.615295 [25600/40000]
loss: 3.298095 [32000/40000]
loss: 3.078516 [38400/40000]
Test Error: 
 Accuracy: 19.0%, Avg loss: 3.348083

loss: 3.183243 [0/40000]
loss: 3.089086 [6400/40000]
loss: 2.628766 [12800/40000]
loss: 2.742583 [19200/40000]
loss: 2.685783 [25600/40000]
loss: 2.741322 [32000/40000]
loss: 2.722209 [38400/40000]
Test Error: 
 Accuracy: 29.5%, Avg loss: 2.728895

loss: 2.588046 [0/40000]
loss: 2.843583 [6400/40000]
loss: 2.401692 [12800/40000]
loss: 2.406142 [19200/40000]
loss: 2.259803 [25600/40000]
loss: 2.319631 [32000/40000]
loss: 2.206593 [38400/40000]
Test Error: 
 Accuracy: 35.5%, Avg loss: 2.468783

loss: 2.422567 [0/40000]
loss: 2.411598 [6400/40000]
loss: 1.990793 [12800/40000]
loss: 1.633236 [19200/40000]
loss: 2.014681 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
val_accuracy,▁▄▅▇█
val_loss,█▅▄▁▁

0,1
epoch,5.0
val_accuracy,47.1275
val_loss,1.95231


[34m[1mwandb[0m: Agent Starting Run: ctci5i0k with config:
[34m[1mwandb[0m: 	alpha: 0.9
[34m[1mwandb[0m: 	bottleneck: False
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	growth_rate: 18
[34m[1mwandb[0m: 	learning_rate: 0.1
[34m[1mwandb[0m: 	lr_gamma: 0.3
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	temperature: 0.5
[34m[1mwandb[0m: 	weight_decay: 1e-05


Run name:  SGD / lr_0.1 / alpha_0.9 / dropout_0.3 / growth_18 / bottleneck_False / lr_gamma_0.3 / temp_0.5 / weight_decay_1e-05
loss: 4.261663 [0/40000]
loss: 3.934130 [6400/40000]
loss: 3.608915 [12800/40000]
loss: 3.743326 [19200/40000]
loss: 3.341441 [25600/40000]
loss: 3.227503 [32000/40000]
loss: 3.417485 [38400/40000]
Test Error: 
 Accuracy: 15.3%, Avg loss: 3.490589

loss: 2.812170 [0/40000]
loss: 3.190781 [6400/40000]
loss: 2.936044 [12800/40000]
loss: 2.924950 [19200/40000]
loss: 3.130913 [25600/40000]
loss: 2.701691 [32000/40000]
loss: 2.872542 [38400/40000]
Test Error: 
 Accuracy: 29.4%, Avg loss: 2.744645

loss: 2.398973 [0/40000]
loss: 2.407709 [6400/40000]
loss: 2.515553 [12800/40000]
loss: 2.337357 [19200/40000]
loss: 2.138769 [25600/40000]
loss: 2.329593 [32000/40000]
loss: 2.023126 [38400/40000]
Test Error: 
 Accuracy: 36.7%, Avg loss: 2.380362

loss: 2.397195 [0/40000]
loss: 2.158615 [6400/40000]
loss: 2.073389 [12800/40000]
loss: 1.907106 [19200/40000]
loss: 1.965130

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
val_accuracy,▁▄▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
val_accuracy,46.24
val_loss,1.97397


[34m[1mwandb[0m: Agent Starting Run: 582owxij with config:
[34m[1mwandb[0m: 	alpha: 0.9
[34m[1mwandb[0m: 	bottleneck: True
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	growth_rate: 18
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	lr_gamma: 0.3
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	temperature: 0.5
[34m[1mwandb[0m: 	weight_decay: 1e-05


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011223477322427141, max=1.0…

Run name:  SGD / lr_0.05 / alpha_0.9 / dropout_0.3 / growth_18 / bottleneck_True / lr_gamma_0.3 / temp_0.5 / weight_decay_1e-05
loss: 4.311179 [0/40000]
loss: 3.919492 [6400/40000]
loss: 3.668296 [12800/40000]
loss: 3.328015 [19200/40000]
loss: 3.347681 [25600/40000]
loss: 3.370659 [32000/40000]
loss: 2.893643 [38400/40000]
Test Error: 
 Accuracy: 16.6%, Avg loss: 3.407986

loss: 3.236651 [0/40000]
loss: 3.057373 [6400/40000]
loss: 2.905179 [12800/40000]
loss: 2.809231 [19200/40000]
loss: 2.314089 [25600/40000]
loss: 3.037251 [32000/40000]
loss: 2.760852 [38400/40000]
Test Error: 
 Accuracy: 27.8%, Avg loss: 2.814843

loss: 2.451884 [0/40000]
loss: 2.719365 [6400/40000]
loss: 2.513929 [12800/40000]
loss: 2.098386 [19200/40000]
loss: 2.331665 [25600/40000]
loss: 2.302519 [32000/40000]
loss: 2.323035 [38400/40000]
Test Error: 
 Accuracy: 34.9%, Avg loss: 2.454741

loss: 2.305219 [0/40000]
loss: 1.922729 [6400/40000]
loss: 2.006063 [12800/40000]
loss: 2.140831 [19200/40000]
loss: 1.956712

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
val_accuracy,▁▄▅▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
val_accuracy,45.5075
val_loss,1.96991


[34m[1mwandb[0m: Agent Starting Run: ygp4jah6 with config:
[34m[1mwandb[0m: 	alpha: 0.9
[34m[1mwandb[0m: 	bottleneck: True
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	growth_rate: 18
[34m[1mwandb[0m: 	learning_rate: 0.1
[34m[1mwandb[0m: 	lr_gamma: 0.3
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	temperature: 0.5
[34m[1mwandb[0m: 	weight_decay: 1e-05


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01114390694452191, max=1.0)…

Run name:  SGD / lr_0.1 / alpha_0.9 / dropout_0.3 / growth_18 / bottleneck_True / lr_gamma_0.3 / temp_0.5 / weight_decay_1e-05
loss: 4.218929 [0/40000]
loss: 3.811833 [6400/40000]
loss: 3.771649 [12800/40000]
loss: 3.607079 [19200/40000]
loss: 3.624594 [25600/40000]
loss: 3.457611 [32000/40000]
loss: 3.083219 [38400/40000]
Test Error: 
 Accuracy: 12.1%, Avg loss: 3.733317

loss: 3.021126 [0/40000]
loss: 3.290119 [6400/40000]
loss: 2.910751 [12800/40000]
loss: 2.954216 [19200/40000]
loss: 2.877865 [25600/40000]
loss: 3.066600 [32000/40000]
loss: 2.294977 [38400/40000]
Test Error: 
 Accuracy: 26.2%, Avg loss: 2.944865

loss: 2.651605 [0/40000]
loss: 2.574262 [6400/40000]
loss: 2.223831 [12800/40000]
loss: 2.405437 [19200/40000]
loss: 2.508388 [25600/40000]
loss: 2.716457 [32000/40000]
loss: 1.895089 [38400/40000]
Test Error: 
 Accuracy: 36.0%, Avg loss: 2.440603

loss: 2.072308 [0/40000]
loss: 2.170182 [6400/40000]
loss: 2.356375 [12800/40000]
loss: 2.192872 [19200/40000]
loss: 2.114227 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
val_accuracy,▁▄▆▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
val_accuracy,48.7525
val_loss,1.85554


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: b2c9gbbb with config:
[34m[1mwandb[0m: 	alpha: 0.7
[34m[1mwandb[0m: 	bottleneck: False
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	growth_rate: 18
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	lr_gamma: 0.3
[34m[1mwandb[0m: 	optimizer: SGD
[34m[1mwandb[0m: 	temperature: 0.3
[34m[1mwandb[0m: 	weight_decay: 4e-05


Run name:  SGD / lr_0.05 / alpha_0.7 / dropout_0.2 / growth_18 / bottleneck_False / lr_gamma_0.3 / temp_0.3 / weight_decay_4e-05
loss: 3.354159 [0/40000]
loss: 3.073051 [6400/40000]
loss: 3.243021 [12800/40000]
loss: 2.685098 [19200/40000]
loss: 2.831759 [25600/40000]
loss: 2.454076 [32000/40000]
loss: 2.743147 [38400/40000]
Test Error: 
 Accuracy: 16.7%, Avg loss: 3.434104

loss: 2.464139 [0/40000]
loss: 2.520153 [6400/40000]
loss: 2.392344 [12800/40000]
loss: 2.465183 [19200/40000]
loss: 2.380396 [25600/40000]
loss: 2.419028 [32000/40000]
loss: 2.063451 [38400/40000]
Test Error: 
 Accuracy: 26.6%, Avg loss: 2.883815

loss: 2.087478 [0/40000]
loss: 2.206781 [6400/40000]
loss: 2.372774 [12800/40000]
loss: 1.821325 [19200/40000]
loss: 1.840449 [25600/40000]
loss: 1.933722 [32000/40000]
loss: 1.756778 [38400/40000]
Test Error: 
 Accuracy: 32.2%, Avg loss: 2.600117

loss: 2.031932 [0/40000]
loss: 2.091281 [6400/40000]
loss: 2.012183 [12800/40000]
loss: 1.842877 [19200/40000]
loss: 1.66245

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
val_accuracy,▁▄▅▇█
val_loss,█▅▃▂▁

0,1
epoch,5.0
val_accuracy,40.89
val_loss,2.15947
