In [2]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

Using device: cuda


In [3]:
def train_val(model, criterion, optimizer, train_loader, val_loader, device, scheduler = None, use_scheduler = True):
    model.train()
    train_loss = 0
    correct = 0
    count = 0
    total = 0
    for i, data in enumerate(train_loader, 0):
        image, label = data
        image = image.to(device)
        label = label.to(device)
    
        optimizer.zero_grad()
        output = model(image)
        loss = criterion(output, label)

        train_loss += loss.item()

        pred = torch.max(output.data, 1)[1]
        cur_correct = (pred == label).sum().item()
        cur_loss = loss.item()

        loss.backward()

        optimizer.step()
        
        total += label.size(0)
        correct += cur_correct
        train_loss += cur_loss

    train_accuracy = correct/total
    train_loss = train_loss/len(train_loader)
    
    model.eval()
    valid_loss = 0
    correct = 0
    count = 0
    total = 0
    for i, data in enumerate(val_loader, 0):
        image, label = data
        image = image.to(device)
        label = label.to(device)
                
        output = model(image)
        loss = criterion(output, label)

        pred = torch.max(output.data, 1)[1]
        cur_correct = (pred == label).sum().item()
        cur_loss = loss.item()
            
        total += label.size(0)
        correct += cur_correct
        valid_loss += cur_loss

    valid_accuracy = correct/total
    valid_loss = valid_loss/len(val_loader)
    
    if use_scheduler:
        scheduler.step(valid_accuracy)

    return train_loss, train_accuracy, valid_loss, valid_accuracy

def test(model, criterion, dataloader, device):
    model.eval()
    test_loss = 0
    correct = 0
    count = 0
    total = 0
    for i, data in enumerate(dataloader, 0):
        image, label = data
        image = image.to(device)
        label = label.to(device)
                
        output = model(image)
        loss = criterion(output, label)

        pred = torch.max(output.data, 1)[1]
        cur_correct = (pred == label).sum().item()
        cur_loss = loss.item()
            
        total += label.size(0)
        correct += cur_correct
        test_loss += cur_loss

    accuracy = correct/total
    test_loss = test_loss/len(dataloader)

    return test_loss, accuracy

In [4]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

In [5]:
class ZigZag_ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ZigZag_ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 128, num_blocks[3], stride=2)
        self.layer5 = self._make_layer(block, 64, num_blocks[4], stride=2)
        self.layer6 = self._make_layer(block, 128, num_blocks[5], stride=2)
        self.layer7 = self._make_layer(block, 256, num_blocks[6], stride=2)
        self.linear = nn.Linear(256*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = self.layer6(out)
        out = self.layer7(out)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

zz_model = ZigZag_ResNet(BasicBlock, [2, 2, 2, 2, 2, 1, 1])
num_params = sum(p.numel() for p in zz_model.parameters() if p.requires_grad)
print(f"Num Params: {num_params}\n")

Num Params: 4891338



In [43]:
class Deep_Narrow_ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(Deep_Narrow_ResNet, self).__init__()
        self.in_planes = 32

        self.conv1 = nn.Conv2d(3, 32, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        self.layer1 = self._make_layer(block, 32, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 64, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 128, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.linear = nn.Linear(256*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out
    
dn_model = Deep_Narrow_ResNet(BasicBlock, [13, 9, 3, 5])
num_params = sum(p.numel() for p in dn_model.parameters() if p.requires_grad)
print(f"Num Params: {num_params}\n")

Num Params: 4995754



In [5]:
import wandb
import os
os.environ['WANDB_API_KEY'] = '7af926a637567fde060a1a93e656d8887056249f'
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33msj3828[0m ([33mnyu-tandon[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [6]:
sweep_config = {
    "name": "hyperparameter_sweep",
    'metric': {'goal': 'maximize', 'name': 'test_accuracy'},
    "method": "grid",
    "parameters": {
        "factor": {
            "values": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]
        },
        "patience": {
            "values": [0, 1, 2, 3, 5, 10]
        }
    }
}

In [8]:
batch_size = 32
    
transform_train = torchvision.transforms.Compose([
  torchvision.transforms.RandomCrop(32, padding=4),
  torchvision.transforms.RandomHorizontalFlip(),
  torchvision.transforms.RandomResizedCrop(32, scale=(0.8, 1.0), ratio=(0.8, 1.2)),
  torchvision.transforms.ToTensor(),
  torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

transform_test = torchvision.transforms.Compose([
                  torchvision.transforms.ToTensor(), 
                  torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])

trainset = torchvision.datasets.CIFAR10(root = './data', train = True, download = True, transform = transform_train)

# Split the train data into train and validation sets
train_size = int(0.8 * len(trainset))
val_size = len(trainset) - train_size
trainset, valset = torch.utils.data.random_split(trainset, [train_size, val_size])

train_size = int(0.2 * len(trainset))
val_size = int(0.2 * len(valset))
trainset, _ = torch.utils.data.random_split(trainset, [train_size, len(trainset) - train_size])
valset, _ = torch.utils.data.random_split(valset, [val_size, len(valset) - val_size])

testset = torchvision.datasets.CIFAR10(root = './data', train = False, download = True, transform = transform_test)

train_loader = torch.utils.data.DataLoader(trainset, batch_size = batch_size, shuffle = True)
val_loader   = torch.utils.data.DataLoader(valset, batch_size = batch_size, shuffle = True)
test_loader  = torch.utils.data.DataLoader(testset, batch_size = batch_size, shuffle = False)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
    
def train_sweep():
    wandb.init()
    
    config = wandb.config
    config.factor = sweep_config['parameters']['factor']
    config.patience = sweep_config['parameters']['patience']
    
    best_test_acc = 0

    torch.cuda.empty_cache()
    model = ZigZag_ResNet(BasicBlock, [2, 2, 2, 2, 2, 1, 1])
    num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"Num Params: {num_params}\n")
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), 0.01, momentum = 0.8, weight_decay = 0.0005 , nesterov=True)

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=config.factor, patience=config.patience, verbose = True)

    train_losses_ = []
    train_accuracies_ = []
    valid_losses_ = []
    valid_accuracies_ = []

    epochs = 50

    for epoch in range(epochs):
        print(f"\n\tEpoch: {epoch}")

        train_loss, train_accuracy, val_loss, val_accuracy = train_val(model, criterion, optimizer, 
                                                                    train_loader, val_loader, device,
                                                                    scheduler = scheduler, use_scheduler = True)
        train_losses_.append(train_loss)
        train_accuracies_.append(train_accuracy)
        valid_losses_.append(val_loss)
        valid_accuracies_.append(val_accuracy)
        print(f"\tTraining Loss: {round(train_loss, 4)}; Training Accuracy: {round(train_accuracy*100, 4)}%")
        print(f"\tValidation Loss: {round(val_loss, 4)}; Validation Accuracy: {round(val_accuracy*100, 4)}%")

    test_loss, test_accuracy = test(model, criterion, test_loader, device)
    print(f"\n\tTesting Loss: {round(test_loss, 4)}; Testing Accuracy: {round(test_accuracy*100, 4)}%")

    if test_accuracy > best_test_acc:
        best_test_acc = test_accuracy

        torch.save(model.state_dict(), 'zigzag_mini_resnet_7blocks.pth')

        metrics_dict = {'train_loss': train_losses_, 'train_accuracy': train_accuracies_, 
                      'valid_loss': valid_losses_, 'valid_accuracy': valid_accuracies_,
                      'test_loss': test_loss, 'test_accuracy': test_accuracy}
    wandb.log({'test_accuracy': test_accuracy})
    wandb.finish()

Files already downloaded and verified
Files already downloaded and verified


In [9]:
print(f"Train: {len(trainset)}; Validation: {len(valset)}; Test: {len(testset)}")

Train: 8000; Validation: 2000; Test: 10000


In [10]:
sweep_id = wandb.sweep(sweep_config, project = "ZigZag-Mini-Resnet")
wandb.agent(sweep_id, train_sweep)

Create sweep with ID: qe6f7ssq
Sweep URL: https://wandb.ai/nyu-tandon/ZigZag-Mini-Resnet/sweeps/qe6f7ssq


[34m[1mwandb[0m: Agent Starting Run: ijsoju5d with config:
[34m[1mwandb[0m: 	factor: 0.1
[34m[1mwandb[0m: 	patience: 0




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0732; Training Accuracy: 24.3875%
	Validation Loss: 1.8463; Validation Accuracy: 29.7%

	Epoch: 1
	Training Loss: 3.6255; Training Accuracy: 32.7375%
	Validation Loss: 1.7272; Validation Accuracy: 35.6%

	Epoch: 2
Epoch 00003: reducing learning rate of group 0 to 1.0000e-03.
	Training Loss: 3.4086; Training Accuracy: 36.7625%
	Validation Loss: 1.7302; Validation Accuracy: 34.75%

	Epoch: 3
	Training Loss: 3.1282; Training Accuracy: 42.55%
	Validation Loss: 1.5205; Validation Accuracy: 42.85%

	Epoch: 4
	Training Loss: 3.0019; Training Accuracy: 44.8%
	Validation Loss: 1.4858; Validation Accuracy: 45.2%

	Epoch: 5
	Training Loss: 2.9302; Training Accuracy: 46.5875%
	Validation Loss: 1.4341; Validation Accuracy: 48.15%

	Epoch: 6
Epoch 00007: reducing learning rate of group 0 to 1.0000e-04.
	Training Loss: 2.8656; Training Accuracy: 47.5875%
	Validation Loss: 1.4234; Validation Accuracy: 47.65%

	Epoch: 7
	Training Loss: 2.8217; Training 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.516


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: qvhmopbs with config:
[34m[1mwandb[0m: 	factor: 0.1
[34m[1mwandb[0m: 	patience: 1




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0433; Training Accuracy: 25.1%
	Validation Loss: 1.993; Validation Accuracy: 31.45%

	Epoch: 1
	Training Loss: 3.5626; Training Accuracy: 33.775%
	Validation Loss: 1.8147; Validation Accuracy: 33.85%

	Epoch: 2
	Training Loss: 3.3776; Training Accuracy: 37.125%
	Validation Loss: 1.6208; Validation Accuracy: 39.4%

	Epoch: 3
	Training Loss: 3.2318; Training Accuracy: 39.55%
	Validation Loss: 1.5926; Validation Accuracy: 41.9%

	Epoch: 4
	Training Loss: 3.0805; Training Accuracy: 44.6875%
	Validation Loss: 1.5101; Validation Accuracy: 44.55%

	Epoch: 5
	Training Loss: 2.9462; Training Accuracy: 46.175%
	Validation Loss: 1.4324; Validation Accuracy: 48.15%

	Epoch: 6
	Training Loss: 2.832; Training Accuracy: 48.4%
	Validation Loss: 1.4248; Validation Accuracy: 48.85%

	Epoch: 7
	Training Loss: 2.7287; Training Accuracy: 49.9125%
	Validation Loss: 1.3522; Validation Accuracy: 50.55%

	Epoch: 8
	Training Loss: 2.5813; Training Accuracy: 53.6

0,1
test_accuracy,▁

0,1
test_accuracy,0.7561


[34m[1mwandb[0m: Agent Starting Run: ai740sir with config:
[34m[1mwandb[0m: 	factor: 0.1
[34m[1mwandb[0m: 	patience: 2




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1079; Training Accuracy: 23.175%
	Validation Loss: 1.8789; Validation Accuracy: 28.75%

	Epoch: 1
	Training Loss: 3.6316; Training Accuracy: 31.1%
	Validation Loss: 1.8412; Validation Accuracy: 33.15%

	Epoch: 2
	Training Loss: 3.392; Training Accuracy: 36.25%
	Validation Loss: 1.6724; Validation Accuracy: 38.85%

	Epoch: 3
	Training Loss: 3.2302; Training Accuracy: 41.1375%
	Validation Loss: 1.539; Validation Accuracy: 43.95%

	Epoch: 4
	Training Loss: 3.0714; Training Accuracy: 44.2%
	Validation Loss: 1.544; Validation Accuracy: 43.7%

	Epoch: 5
	Training Loss: 2.942; Training Accuracy: 46.6%
	Validation Loss: 1.4213; Validation Accuracy: 47.6%

	Epoch: 6
	Training Loss: 2.8223; Training Accuracy: 48.0375%
	Validation Loss: 1.4241; Validation Accuracy: 47.35%

	Epoch: 7
	Training Loss: 2.7336; Training Accuracy: 50.625%
	Validation Loss: 1.3792; Validation Accuracy: 50.65%

	Epoch: 8
	Training Loss: 2.6084; Training Accuracy: 53.1125%

0,1
test_accuracy,▁

0,1
test_accuracy,0.7665


[34m[1mwandb[0m: Agent Starting Run: jt5q4okh with config:
[34m[1mwandb[0m: 	factor: 0.1
[34m[1mwandb[0m: 	patience: 3




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0255; Training Accuracy: 24.9375%
	Validation Loss: 1.921; Validation Accuracy: 29.15%

	Epoch: 1
	Training Loss: 3.6316; Training Accuracy: 32.3125%
	Validation Loss: 1.7828; Validation Accuracy: 34.8%

	Epoch: 2
	Training Loss: 3.4294; Training Accuracy: 35.7%
	Validation Loss: 1.6864; Validation Accuracy: 37.85%

	Epoch: 3
	Training Loss: 3.297; Training Accuracy: 38.9%
	Validation Loss: 1.6516; Validation Accuracy: 40.1%

	Epoch: 4
	Training Loss: 3.1341; Training Accuracy: 41.9%
	Validation Loss: 1.616; Validation Accuracy: 40.8%

	Epoch: 5
	Training Loss: 3.0521; Training Accuracy: 44.45%
	Validation Loss: 1.6012; Validation Accuracy: 41.3%

	Epoch: 6
	Training Loss: 2.9163; Training Accuracy: 47.1125%
	Validation Loss: 1.4874; Validation Accuracy: 45.25%

	Epoch: 7
	Training Loss: 2.7883; Training Accuracy: 48.9625%
	Validation Loss: 1.4438; Validation Accuracy: 48.75%

	Epoch: 8
	Training Loss: 2.6777; Training Accuracy: 51.2625

0,1
test_accuracy,▁

0,1
test_accuracy,0.7845


[34m[1mwandb[0m: Agent Starting Run: 29tsvk52 with config:
[34m[1mwandb[0m: 	factor: 0.1
[34m[1mwandb[0m: 	patience: 5




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0557; Training Accuracy: 24.7375%
	Validation Loss: 1.8504; Validation Accuracy: 31.65%

	Epoch: 1
	Training Loss: 3.6128; Training Accuracy: 33.1375%
	Validation Loss: 1.7707; Validation Accuracy: 35.3%

	Epoch: 2
	Training Loss: 3.4177; Training Accuracy: 36.75%
	Validation Loss: 1.7157; Validation Accuracy: 36.6%

	Epoch: 3
	Training Loss: 3.2497; Training Accuracy: 40.5125%
	Validation Loss: 1.6507; Validation Accuracy: 40.9%

	Epoch: 4
	Training Loss: 3.173; Training Accuracy: 41.8125%
	Validation Loss: 1.5006; Validation Accuracy: 46.6%

	Epoch: 5
	Training Loss: 3.0206; Training Accuracy: 44.2875%
	Validation Loss: 1.5314; Validation Accuracy: 44.45%

	Epoch: 6
	Training Loss: 2.926; Training Accuracy: 45.95%
	Validation Loss: 1.402; Validation Accuracy: 48.45%

	Epoch: 7
	Training Loss: 2.805; Training Accuracy: 49.4125%
	Validation Loss: 1.381; Validation Accuracy: 49.8%

	Epoch: 8
	Training Loss: 2.7158; Training Accuracy: 51.

0,1
test_accuracy,▁

0,1
test_accuracy,0.8124


[34m[1mwandb[0m: Agent Starting Run: 3kx3jerp with config:
[34m[1mwandb[0m: 	factor: 0.1
[34m[1mwandb[0m: 	patience: 10




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0346; Training Accuracy: 24.3%
	Validation Loss: 1.8759; Validation Accuracy: 32.25%

	Epoch: 1
	Training Loss: 3.6494; Training Accuracy: 32.0%
	Validation Loss: 1.7552; Validation Accuracy: 33.85%

	Epoch: 2
	Training Loss: 3.4512; Training Accuracy: 35.9375%
	Validation Loss: 1.6398; Validation Accuracy: 39.75%

	Epoch: 3
	Training Loss: 3.279; Training Accuracy: 38.9%
	Validation Loss: 1.5884; Validation Accuracy: 42.45%

	Epoch: 4
	Training Loss: 3.1639; Training Accuracy: 42.3125%
	Validation Loss: 1.641; Validation Accuracy: 40.9%

	Epoch: 5
	Training Loss: 3.0304; Training Accuracy: 44.1375%
	Validation Loss: 1.5042; Validation Accuracy: 44.9%

	Epoch: 6
	Training Loss: 2.9146; Training Accuracy: 47.1875%
	Validation Loss: 1.403; Validation Accuracy: 49.35%

	Epoch: 7
	Training Loss: 2.8085; Training Accuracy: 48.7%
	Validation Loss: 1.3626; Validation Accuracy: 51.3%

	Epoch: 8
	Training Loss: 2.7207; Training Accuracy: 50.9375

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.7659


[34m[1mwandb[0m: Agent Starting Run: q1yfhd9l with config:
[34m[1mwandb[0m: 	factor: 0.2
[34m[1mwandb[0m: 	patience: 0




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0599; Training Accuracy: 24.325%
	Validation Loss: 1.881; Validation Accuracy: 31.5%

	Epoch: 1
	Training Loss: 3.6248; Training Accuracy: 32.8125%
	Validation Loss: 1.7326; Validation Accuracy: 34.5%

	Epoch: 2
	Training Loss: 3.406; Training Accuracy: 35.9875%
	Validation Loss: 1.6547; Validation Accuracy: 37.25%

	Epoch: 3
	Training Loss: 3.2748; Training Accuracy: 38.4875%
	Validation Loss: 1.6399; Validation Accuracy: 39.3%

	Epoch: 4
	Training Loss: 3.1329; Training Accuracy: 41.9125%
	Validation Loss: 1.5355; Validation Accuracy: 43.85%

	Epoch: 5
Epoch 00006: reducing learning rate of group 0 to 2.0000e-03.
	Training Loss: 3.0127; Training Accuracy: 44.675%
	Validation Loss: 1.5254; Validation Accuracy: 43.55%

	Epoch: 6
	Training Loss: 2.7032; Training Accuracy: 50.5125%
	Validation Loss: 1.3245; Validation Accuracy: 50.3%

	Epoch: 7
	Training Loss: 2.5999; Training Accuracy: 52.075%
	Validation Loss: 1.3097; Validation Accurac

0,1
test_accuracy,▁

0,1
test_accuracy,0.6083


[34m[1mwandb[0m: Agent Starting Run: s9t9qt8q with config:
[34m[1mwandb[0m: 	factor: 0.2
[34m[1mwandb[0m: 	patience: 1




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1225; Training Accuracy: 24.2%
	Validation Loss: 2.0363; Validation Accuracy: 26.1%

	Epoch: 1
	Training Loss: 3.6742; Training Accuracy: 32.1%
	Validation Loss: 1.7759; Validation Accuracy: 33.95%

	Epoch: 2
	Training Loss: 3.4438; Training Accuracy: 37.0875%
	Validation Loss: 1.6266; Validation Accuracy: 38.9%

	Epoch: 3
	Training Loss: 3.2752; Training Accuracy: 39.3375%
	Validation Loss: 1.663; Validation Accuracy: 40.05%

	Epoch: 4
	Training Loss: 3.1575; Training Accuracy: 42.0875%
	Validation Loss: 1.5501; Validation Accuracy: 41.7%

	Epoch: 5
	Training Loss: 3.0203; Training Accuracy: 44.4375%
	Validation Loss: 1.5424; Validation Accuracy: 44.0%

	Epoch: 6
	Training Loss: 2.9042; Training Accuracy: 46.7%
	Validation Loss: 1.4781; Validation Accuracy: 46.6%

	Epoch: 7
	Training Loss: 2.8012; Training Accuracy: 48.9875%
	Validation Loss: 1.4163; Validation Accuracy: 48.65%

	Epoch: 8
	Training Loss: 2.6744; Training Accuracy: 51.9

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.7345


[34m[1mwandb[0m: Agent Starting Run: vtb602og with config:
[34m[1mwandb[0m: 	factor: 0.2
[34m[1mwandb[0m: 	patience: 2




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1092; Training Accuracy: 23.475%
	Validation Loss: 1.8956; Validation Accuracy: 28.65%

	Epoch: 1
	Training Loss: 3.6626; Training Accuracy: 32.225%
	Validation Loss: 1.7469; Validation Accuracy: 33.9%

	Epoch: 2
	Training Loss: 3.4262; Training Accuracy: 36.5%
	Validation Loss: 1.6227; Validation Accuracy: 39.6%

	Epoch: 3
	Training Loss: 3.2541; Training Accuracy: 39.9875%
	Validation Loss: 1.6234; Validation Accuracy: 40.45%

	Epoch: 4
	Training Loss: 3.1255; Training Accuracy: 43.075%
	Validation Loss: 1.5843; Validation Accuracy: 43.65%

	Epoch: 5
	Training Loss: 3.0022; Training Accuracy: 45.15%
	Validation Loss: 1.4936; Validation Accuracy: 44.4%

	Epoch: 6
	Training Loss: 2.8653; Training Accuracy: 47.775%
	Validation Loss: 1.5061; Validation Accuracy: 45.85%

	Epoch: 7
	Training Loss: 2.7997; Training Accuracy: 49.4625%
	Validation Loss: 1.4155; Validation Accuracy: 47.25%

	Epoch: 8
	Training Loss: 2.6628; Training Accuracy: 5

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.7801


[34m[1mwandb[0m: Agent Starting Run: e3t58cm7 with config:
[34m[1mwandb[0m: 	factor: 0.2
[34m[1mwandb[0m: 	patience: 3




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1078; Training Accuracy: 23.25%
	Validation Loss: 1.8964; Validation Accuracy: 30.25%

	Epoch: 1
	Training Loss: 3.6169; Training Accuracy: 33.2%
	Validation Loss: 1.681; Validation Accuracy: 36.35%

	Epoch: 2
	Training Loss: 3.4109; Training Accuracy: 36.6375%
	Validation Loss: 1.6871; Validation Accuracy: 36.95%

	Epoch: 3
	Training Loss: 3.2497; Training Accuracy: 40.025%
	Validation Loss: 1.5965; Validation Accuracy: 40.65%

	Epoch: 4
	Training Loss: 3.1046; Training Accuracy: 42.6875%
	Validation Loss: 1.661; Validation Accuracy: 37.0%

	Epoch: 5
	Training Loss: 2.9855; Training Accuracy: 45.2125%
	Validation Loss: 1.4812; Validation Accuracy: 46.1%

	Epoch: 6
	Training Loss: 2.8576; Training Accuracy: 47.4875%
	Validation Loss: 1.516; Validation Accuracy: 46.35%

	Epoch: 7
	Training Loss: 2.742; Training Accuracy: 50.05%
	Validation Loss: 1.3684; Validation Accuracy: 50.55%

	Epoch: 8
	Training Loss: 2.6305; Training Accuracy: 52.

0,1
test_accuracy,▁

0,1
test_accuracy,0.7938


[34m[1mwandb[0m: Agent Starting Run: r0ne1cgt with config:
[34m[1mwandb[0m: 	factor: 0.2
[34m[1mwandb[0m: 	patience: 5




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0409; Training Accuracy: 25.275%
	Validation Loss: 1.8269; Validation Accuracy: 33.8%

	Epoch: 1
	Training Loss: 3.5799; Training Accuracy: 33.5875%
	Validation Loss: 1.7819; Validation Accuracy: 35.05%

	Epoch: 2
	Training Loss: 3.3889; Training Accuracy: 37.9625%
	Validation Loss: 1.674; Validation Accuracy: 41.75%

	Epoch: 3
	Training Loss: 3.2286; Training Accuracy: 40.525%
	Validation Loss: 1.5585; Validation Accuracy: 42.4%

	Epoch: 4
	Training Loss: 3.112; Training Accuracy: 42.975%
	Validation Loss: 1.5759; Validation Accuracy: 43.25%

	Epoch: 5
	Training Loss: 2.9812; Training Accuracy: 45.4875%
	Validation Loss: 1.493; Validation Accuracy: 44.9%

	Epoch: 6
	Training Loss: 2.8627; Training Accuracy: 48.125%
	Validation Loss: 1.4669; Validation Accuracy: 47.4%

	Epoch: 7
	Training Loss: 2.754; Training Accuracy: 50.1375%
	Validation Loss: 1.3754; Validation Accuracy: 49.6%

	Epoch: 8
	Training Loss: 2.642; Training Accuracy: 52.

0,1
test_accuracy,▁

0,1
test_accuracy,0.7538


[34m[1mwandb[0m: Agent Starting Run: nwywxcfi with config:
[34m[1mwandb[0m: 	factor: 0.2
[34m[1mwandb[0m: 	patience: 10




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1211; Training Accuracy: 23.5125%
	Validation Loss: 1.8636; Validation Accuracy: 30.8%

	Epoch: 1
	Training Loss: 3.6341; Training Accuracy: 32.2875%
	Validation Loss: 1.7986; Validation Accuracy: 30.65%

	Epoch: 2
	Training Loss: 3.4144; Training Accuracy: 36.5625%
	Validation Loss: 1.7064; Validation Accuracy: 38.55%

	Epoch: 3
	Training Loss: 3.292; Training Accuracy: 39.3875%
	Validation Loss: 1.5772; Validation Accuracy: 40.8%

	Epoch: 4
	Training Loss: 3.1588; Training Accuracy: 41.425%
	Validation Loss: 1.5534; Validation Accuracy: 43.7%

	Epoch: 5
	Training Loss: 3.0565; Training Accuracy: 43.9%
	Validation Loss: 1.4334; Validation Accuracy: 46.35%

	Epoch: 6
	Training Loss: 2.8999; Training Accuracy: 47.3625%
	Validation Loss: 1.4261; Validation Accuracy: 47.7%

	Epoch: 7
	Training Loss: 2.8166; Training Accuracy: 48.7625%
	Validation Loss: 1.401; Validation Accuracy: 48.1%

	Epoch: 8
	Training Loss: 2.6836; Training Accuracy: 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.7703


[34m[1mwandb[0m: Agent Starting Run: bt2qod32 with config:
[34m[1mwandb[0m: 	factor: 0.3
[34m[1mwandb[0m: 	patience: 0




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1003; Training Accuracy: 24.225%
	Validation Loss: 1.8651; Validation Accuracy: 29.75%

	Epoch: 1
	Training Loss: 3.6586; Training Accuracy: 32.2125%
	Validation Loss: 1.7692; Validation Accuracy: 35.5%

	Epoch: 2
	Training Loss: 3.4375; Training Accuracy: 36.3375%
	Validation Loss: 1.729; Validation Accuracy: 36.5%

	Epoch: 3
	Training Loss: 3.2777; Training Accuracy: 39.1625%
	Validation Loss: 1.6004; Validation Accuracy: 41.85%

	Epoch: 4
	Training Loss: 3.1265; Training Accuracy: 42.725%
	Validation Loss: 1.5289; Validation Accuracy: 43.15%

	Epoch: 5
	Training Loss: 3.0041; Training Accuracy: 45.0875%
	Validation Loss: 1.5015; Validation Accuracy: 43.9%

	Epoch: 6
	Training Loss: 2.8825; Training Accuracy: 47.4875%
	Validation Loss: 1.4224; Validation Accuracy: 49.45%

	Epoch: 7
Epoch 00008: reducing learning rate of group 0 to 3.0000e-03.
	Training Loss: 2.75; Training Accuracy: 49.9%
	Validation Loss: 1.4501; Validation Accuracy:

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.6689


[34m[1mwandb[0m: Agent Starting Run: ayddz3m0 with config:
[34m[1mwandb[0m: 	factor: 0.3
[34m[1mwandb[0m: 	patience: 1




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1033; Training Accuracy: 23.65%
	Validation Loss: 1.9345; Validation Accuracy: 29.15%

	Epoch: 1
	Training Loss: 3.6507; Training Accuracy: 32.575%
	Validation Loss: 1.7542; Validation Accuracy: 36.0%

	Epoch: 2
	Training Loss: 3.4252; Training Accuracy: 36.8625%
	Validation Loss: 1.6331; Validation Accuracy: 38.5%

	Epoch: 3
	Training Loss: 3.2732; Training Accuracy: 39.8125%
	Validation Loss: 1.5667; Validation Accuracy: 42.1%

	Epoch: 4
	Training Loss: 3.1411; Training Accuracy: 42.5125%
	Validation Loss: 1.6493; Validation Accuracy: 40.8%

	Epoch: 5
	Training Loss: 3.0588; Training Accuracy: 43.975%
	Validation Loss: 1.4827; Validation Accuracy: 46.4%

	Epoch: 6
	Training Loss: 2.9383; Training Accuracy: 46.025%
	Validation Loss: 1.4628; Validation Accuracy: 47.95%

	Epoch: 7
	Training Loss: 2.8194; Training Accuracy: 49.4%
	Validation Loss: 1.3808; Validation Accuracy: 48.75%

	Epoch: 8
	Training Loss: 2.7253; Training Accuracy: 50

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.7875


[34m[1mwandb[0m: Agent Starting Run: vhlbahgc with config:
[34m[1mwandb[0m: 	factor: 0.3
[34m[1mwandb[0m: 	patience: 2




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1189; Training Accuracy: 23.5875%
	Validation Loss: 1.8347; Validation Accuracy: 32.05%

	Epoch: 1
	Training Loss: 3.6065; Training Accuracy: 33.1625%
	Validation Loss: 1.7146; Validation Accuracy: 37.55%

	Epoch: 2
	Training Loss: 3.3995; Training Accuracy: 37.1875%
	Validation Loss: 1.6339; Validation Accuracy: 40.5%

	Epoch: 3
	Training Loss: 3.227; Training Accuracy: 40.0875%
	Validation Loss: 1.6013; Validation Accuracy: 41.4%

	Epoch: 4
	Training Loss: 3.0813; Training Accuracy: 43.425%
	Validation Loss: 1.5748; Validation Accuracy: 40.3%

	Epoch: 5
	Training Loss: 2.9524; Training Accuracy: 46.1375%
	Validation Loss: 1.4645; Validation Accuracy: 45.9%

	Epoch: 6
	Training Loss: 2.8518; Training Accuracy: 47.825%
	Validation Loss: 1.4549; Validation Accuracy: 47.45%

	Epoch: 7
	Training Loss: 2.717; Training Accuracy: 50.6%
	Validation Loss: 1.3949; Validation Accuracy: 48.6%

	Epoch: 8
	Training Loss: 2.6003; Training Accuracy: 5

VBox(children=(Label(value='0.001 MB of 0.013 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.087009…

0,1
test_accuracy,▁

0,1
test_accuracy,0.7781


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 103hks81 with config:
[34m[1mwandb[0m: 	factor: 0.3
[34m[1mwandb[0m: 	patience: 3




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1121; Training Accuracy: 23.525%
	Validation Loss: 1.8707; Validation Accuracy: 30.3%

	Epoch: 1
	Training Loss: 3.6247; Training Accuracy: 32.8125%
	Validation Loss: 1.8073; Validation Accuracy: 35.3%

	Epoch: 2
	Training Loss: 3.4002; Training Accuracy: 37.1375%
	Validation Loss: 1.6807; Validation Accuracy: 36.75%

	Epoch: 3
	Training Loss: 3.1996; Training Accuracy: 40.85%
	Validation Loss: 1.5544; Validation Accuracy: 42.6%

	Epoch: 4
	Training Loss: 3.0742; Training Accuracy: 43.1%
	Validation Loss: 1.4942; Validation Accuracy: 44.8%

	Epoch: 5
	Training Loss: 2.9239; Training Accuracy: 46.225%
	Validation Loss: 1.4538; Validation Accuracy: 46.3%

	Epoch: 6
	Training Loss: 2.8131; Training Accuracy: 49.1%
	Validation Loss: 1.3933; Validation Accuracy: 49.5%

	Epoch: 7
	Training Loss: 2.6758; Training Accuracy: 50.8375%
	Validation Loss: 1.3618; Validation Accuracy: 50.85%

	Epoch: 8
	Training Loss: 2.6028; Training Accuracy: 52.37

0,1
test_accuracy,▁

0,1
test_accuracy,0.7915


[34m[1mwandb[0m: Agent Starting Run: sks10sgm with config:
[34m[1mwandb[0m: 	factor: 0.3
[34m[1mwandb[0m: 	patience: 5




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0935; Training Accuracy: 24.65%
	Validation Loss: 2.0212; Validation Accuracy: 26.9%

	Epoch: 1
	Training Loss: 3.6469; Training Accuracy: 32.0875%
	Validation Loss: 1.7507; Validation Accuracy: 33.8%

	Epoch: 2
	Training Loss: 3.3944; Training Accuracy: 37.1%
	Validation Loss: 1.6802; Validation Accuracy: 38.8%

	Epoch: 3
	Training Loss: 3.237; Training Accuracy: 40.7375%
	Validation Loss: 1.5697; Validation Accuracy: 41.15%

	Epoch: 4
	Training Loss: 3.11; Training Accuracy: 43.35%
	Validation Loss: 1.508; Validation Accuracy: 44.35%

	Epoch: 5
	Training Loss: 2.9506; Training Accuracy: 46.675%
	Validation Loss: 1.4466; Validation Accuracy: 46.8%

	Epoch: 6
	Training Loss: 2.859; Training Accuracy: 48.05%
	Validation Loss: 1.5214; Validation Accuracy: 44.65%

	Epoch: 7
	Training Loss: 2.7332; Training Accuracy: 50.4625%
	Validation Loss: 1.3835; Validation Accuracy: 49.5%

	Epoch: 8
	Training Loss: 2.6307; Training Accuracy: 53.2625%


0,1
test_accuracy,▁

0,1
test_accuracy,0.8


[34m[1mwandb[0m: Agent Starting Run: u9myahi4 with config:
[34m[1mwandb[0m: 	factor: 0.3
[34m[1mwandb[0m: 	patience: 10




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0353; Training Accuracy: 25.6%
	Validation Loss: 1.8415; Validation Accuracy: 31.35%

	Epoch: 1
	Training Loss: 3.5456; Training Accuracy: 33.8%
	Validation Loss: 1.7351; Validation Accuracy: 35.6%

	Epoch: 2
	Training Loss: 3.3412; Training Accuracy: 39.0%
	Validation Loss: 1.6804; Validation Accuracy: 37.4%

	Epoch: 3
	Training Loss: 3.2232; Training Accuracy: 40.55%
	Validation Loss: 1.6289; Validation Accuracy: 39.75%

	Epoch: 4
	Training Loss: 3.0693; Training Accuracy: 43.975%
	Validation Loss: 1.5285; Validation Accuracy: 43.3%

	Epoch: 5
	Training Loss: 2.9287; Training Accuracy: 46.425%
	Validation Loss: 1.4185; Validation Accuracy: 48.75%

	Epoch: 6
	Training Loss: 2.8307; Training Accuracy: 48.3375%
	Validation Loss: 1.4247; Validation Accuracy: 48.1%

	Epoch: 7
	Training Loss: 2.7365; Training Accuracy: 50.3%
	Validation Loss: 1.375; Validation Accuracy: 51.9%

	Epoch: 8
	Training Loss: 2.5677; Training Accuracy: 54.45%
	Val

0,1
test_accuracy,▁

0,1
test_accuracy,0.7481


[34m[1mwandb[0m: Agent Starting Run: g7uu7973 with config:
[34m[1mwandb[0m: 	factor: 0.4
[34m[1mwandb[0m: 	patience: 0




Num Params: 4891338


	Epoch: 0
	Training Loss: 3.9973; Training Accuracy: 26.65%
	Validation Loss: 1.8307; Validation Accuracy: 32.25%

	Epoch: 1
	Training Loss: 3.5567; Training Accuracy: 33.65%
	Validation Loss: 1.813; Validation Accuracy: 33.3%

	Epoch: 2
	Training Loss: 3.3725; Training Accuracy: 37.9625%
	Validation Loss: 1.6465; Validation Accuracy: 38.8%

	Epoch: 3
	Training Loss: 3.2012; Training Accuracy: 41.175%
	Validation Loss: 1.5375; Validation Accuracy: 43.0%

	Epoch: 4
	Training Loss: 3.0762; Training Accuracy: 44.725%
	Validation Loss: 1.5037; Validation Accuracy: 46.65%

	Epoch: 5
Epoch 00006: reducing learning rate of group 0 to 4.0000e-03.
	Training Loss: 2.9185; Training Accuracy: 47.575%
	Validation Loss: 1.6204; Validation Accuracy: 41.8%

	Epoch: 6
	Training Loss: 2.6622; Training Accuracy: 52.175%
	Validation Loss: 1.3397; Validation Accuracy: 50.95%

	Epoch: 7
	Training Loss: 2.54; Training Accuracy: 53.65%
	Validation Loss: 1.2905; Validation Accuracy: 52.4%

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.6741


[34m[1mwandb[0m: Agent Starting Run: wanpnadz with config:
[34m[1mwandb[0m: 	factor: 0.4
[34m[1mwandb[0m: 	patience: 1




Num Params: 4891338


	Epoch: 0
	Training Loss: 3.9769; Training Accuracy: 25.6375%
	Validation Loss: 1.8124; Validation Accuracy: 31.45%

	Epoch: 1
	Training Loss: 3.5056; Training Accuracy: 35.25%
	Validation Loss: 1.7058; Validation Accuracy: 38.3%

	Epoch: 2
	Training Loss: 3.2986; Training Accuracy: 39.0875%
	Validation Loss: 1.5874; Validation Accuracy: 43.8%

	Epoch: 3
	Training Loss: 3.1398; Training Accuracy: 42.4%
	Validation Loss: 1.6388; Validation Accuracy: 40.7%

	Epoch: 4
	Training Loss: 2.9624; Training Accuracy: 45.725%
	Validation Loss: 1.5003; Validation Accuracy: 46.55%

	Epoch: 5
	Training Loss: 2.8742; Training Accuracy: 47.825%
	Validation Loss: 1.3758; Validation Accuracy: 49.9%

	Epoch: 6
	Training Loss: 2.7138; Training Accuracy: 51.425%
	Validation Loss: 1.3622; Validation Accuracy: 49.55%

	Epoch: 7
	Training Loss: 2.612; Training Accuracy: 53.325%
	Validation Loss: 1.2331; Validation Accuracy: 54.85%

	Epoch: 8
	Training Loss: 2.4881; Training Accuracy: 55.

0,1
test_accuracy,▁

0,1
test_accuracy,0.7569


[34m[1mwandb[0m: Agent Starting Run: n758vpjk with config:
[34m[1mwandb[0m: 	factor: 0.4
[34m[1mwandb[0m: 	patience: 2




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0961; Training Accuracy: 23.9125%
	Validation Loss: 1.8705; Validation Accuracy: 29.6%

	Epoch: 1
	Training Loss: 3.6095; Training Accuracy: 33.0125%
	Validation Loss: 1.7793; Validation Accuracy: 33.45%

	Epoch: 2
	Training Loss: 3.3936; Training Accuracy: 36.6375%
	Validation Loss: 1.6766; Validation Accuracy: 37.1%

	Epoch: 3
	Training Loss: 3.2679; Training Accuracy: 40.5125%
	Validation Loss: 1.5961; Validation Accuracy: 40.3%

	Epoch: 4
	Training Loss: 3.1024; Training Accuracy: 42.525%
	Validation Loss: 1.6258; Validation Accuracy: 39.2%

	Epoch: 5
	Training Loss: 2.9351; Training Accuracy: 47.3%
	Validation Loss: 1.3775; Validation Accuracy: 50.35%

	Epoch: 6
	Training Loss: 2.8474; Training Accuracy: 48.4875%
	Validation Loss: 1.4382; Validation Accuracy: 48.95%

	Epoch: 7
	Training Loss: 2.7397; Training Accuracy: 50.65%
	Validation Loss: 1.3746; Validation Accuracy: 49.65%

	Epoch: 8
Epoch 00009: reducing learning rate of gro

0,1
test_accuracy,▁

0,1
test_accuracy,0.7757


[34m[1mwandb[0m: Agent Starting Run: bib5t6f8 with config:
[34m[1mwandb[0m: 	factor: 0.4
[34m[1mwandb[0m: 	patience: 3




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.111; Training Accuracy: 24.275%
	Validation Loss: 1.8835; Validation Accuracy: 31.0%

	Epoch: 1
	Training Loss: 3.7305; Training Accuracy: 31.075%
	Validation Loss: 1.8374; Validation Accuracy: 33.5%

	Epoch: 2
	Training Loss: 3.4133; Training Accuracy: 37.525%
	Validation Loss: 1.7255; Validation Accuracy: 36.35%

	Epoch: 3
	Training Loss: 3.2656; Training Accuracy: 40.125%
	Validation Loss: 1.5998; Validation Accuracy: 40.3%

	Epoch: 4
	Training Loss: 3.1139; Training Accuracy: 42.8875%
	Validation Loss: 1.5519; Validation Accuracy: 43.6%

	Epoch: 5
	Training Loss: 2.9887; Training Accuracy: 44.9875%
	Validation Loss: 1.5337; Validation Accuracy: 44.85%

	Epoch: 6
	Training Loss: 2.8664; Training Accuracy: 48.2125%
	Validation Loss: 1.3961; Validation Accuracy: 49.05%

	Epoch: 7
	Training Loss: 2.769; Training Accuracy: 50.4%
	Validation Loss: 1.4157; Validation Accuracy: 49.6%

	Epoch: 8
	Training Loss: 2.6474; Training Accuracy: 52.

0,1
test_accuracy,▁

0,1
test_accuracy,0.8024


[34m[1mwandb[0m: Agent Starting Run: kjelird2 with config:
[34m[1mwandb[0m: 	factor: 0.4
[34m[1mwandb[0m: 	patience: 5




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0882; Training Accuracy: 23.8375%
	Validation Loss: 1.8502; Validation Accuracy: 30.05%

	Epoch: 1
	Training Loss: 3.6741; Training Accuracy: 31.7375%
	Validation Loss: 1.7231; Validation Accuracy: 35.35%

	Epoch: 2
	Training Loss: 3.4957; Training Accuracy: 34.8625%
	Validation Loss: 1.7303; Validation Accuracy: 35.4%

	Epoch: 3
	Training Loss: 3.326; Training Accuracy: 38.925%
	Validation Loss: 1.5922; Validation Accuracy: 41.6%

	Epoch: 4
	Training Loss: 3.1908; Training Accuracy: 40.9375%
	Validation Loss: 1.6992; Validation Accuracy: 36.35%

	Epoch: 5
	Training Loss: 3.051; Training Accuracy: 43.725%
	Validation Loss: 1.5058; Validation Accuracy: 43.9%

	Epoch: 6
	Training Loss: 2.9814; Training Accuracy: 44.9125%
	Validation Loss: 1.4579; Validation Accuracy: 45.05%

	Epoch: 7
	Training Loss: 2.863; Training Accuracy: 48.0875%
	Validation Loss: 1.478; Validation Accuracy: 46.0%

	Epoch: 8
	Training Loss: 2.7723; Training Accuracy:

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.8042


[34m[1mwandb[0m: Agent Starting Run: 4a4punm9 with config:
[34m[1mwandb[0m: 	factor: 0.4
[34m[1mwandb[0m: 	patience: 10




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0619; Training Accuracy: 24.3625%
	Validation Loss: 1.8587; Validation Accuracy: 31.1%

	Epoch: 1
	Training Loss: 3.5708; Training Accuracy: 33.475%
	Validation Loss: 1.7024; Validation Accuracy: 36.6%

	Epoch: 2
	Training Loss: 3.3826; Training Accuracy: 38.1%
	Validation Loss: 1.6792; Validation Accuracy: 39.2%

	Epoch: 3
	Training Loss: 3.2299; Training Accuracy: 40.5875%
	Validation Loss: 1.6235; Validation Accuracy: 38.9%

	Epoch: 4
	Training Loss: 3.0969; Training Accuracy: 42.75%
	Validation Loss: 1.5649; Validation Accuracy: 43.05%

	Epoch: 5
	Training Loss: 2.9757; Training Accuracy: 45.775%
	Validation Loss: 1.4316; Validation Accuracy: 47.3%

	Epoch: 6
	Training Loss: 2.8544; Training Accuracy: 48.15%
	Validation Loss: 1.4218; Validation Accuracy: 47.75%

	Epoch: 7
	Training Loss: 2.7503; Training Accuracy: 49.7%
	Validation Loss: 1.3281; Validation Accuracy: 50.95%

	Epoch: 8
	Training Loss: 2.6266; Training Accuracy: 52.625

0,1
test_accuracy,▁

0,1
test_accuracy,0.7772


[34m[1mwandb[0m: Agent Starting Run: x2vpuc7o with config:
[34m[1mwandb[0m: 	factor: 0.5
[34m[1mwandb[0m: 	patience: 0




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0973; Training Accuracy: 23.825%
	Validation Loss: 1.8698; Validation Accuracy: 31.95%

	Epoch: 1
	Training Loss: 3.6174; Training Accuracy: 32.7125%
	Validation Loss: 1.777; Validation Accuracy: 33.95%

	Epoch: 2
	Training Loss: 3.3956; Training Accuracy: 36.9375%
	Validation Loss: 1.675; Validation Accuracy: 39.2%

	Epoch: 3
	Training Loss: 3.2312; Training Accuracy: 39.95%
	Validation Loss: 1.6094; Validation Accuracy: 39.95%

	Epoch: 4
Epoch 00005: reducing learning rate of group 0 to 5.0000e-03.
	Training Loss: 3.0944; Training Accuracy: 43.3375%
	Validation Loss: 1.8415; Validation Accuracy: 37.6%

	Epoch: 5
	Training Loss: 2.8652; Training Accuracy: 47.8875%
	Validation Loss: 1.3918; Validation Accuracy: 48.5%

	Epoch: 6
	Training Loss: 2.7471; Training Accuracy: 50.0625%
	Validation Loss: 1.4311; Validation Accuracy: 49.15%

	Epoch: 7
	Training Loss: 2.6595; Training Accuracy: 51.65%
	Validation Loss: 1.3758; Validation Accuracy

0,1
test_accuracy,▁

0,1
test_accuracy,0.7193


[34m[1mwandb[0m: Agent Starting Run: 54avctur with config:
[34m[1mwandb[0m: 	factor: 0.5
[34m[1mwandb[0m: 	patience: 1




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.061; Training Accuracy: 24.7%
	Validation Loss: 1.8592; Validation Accuracy: 29.95%

	Epoch: 1
	Training Loss: 3.6152; Training Accuracy: 32.45%
	Validation Loss: 1.7601; Validation Accuracy: 34.25%

	Epoch: 2
	Training Loss: 3.4256; Training Accuracy: 35.9125%
	Validation Loss: 1.667; Validation Accuracy: 39.3%

	Epoch: 3
	Training Loss: 3.2536; Training Accuracy: 40.175%
	Validation Loss: 1.6146; Validation Accuracy: 41.25%

	Epoch: 4
	Training Loss: 3.1539; Training Accuracy: 41.8%
	Validation Loss: 1.5062; Validation Accuracy: 44.5%

	Epoch: 5
	Training Loss: 2.9845; Training Accuracy: 45.325%
	Validation Loss: 1.4692; Validation Accuracy: 47.15%

	Epoch: 6
	Training Loss: 2.8367; Training Accuracy: 48.35%
	Validation Loss: 1.4212; Validation Accuracy: 49.35%

	Epoch: 7
	Training Loss: 2.7375; Training Accuracy: 50.9%
	Validation Loss: 1.4243; Validation Accuracy: 50.05%

	Epoch: 8
	Training Loss: 2.6235; Training Accuracy: 52.75%
	

0,1
test_accuracy,▁

0,1
test_accuracy,0.7853


[34m[1mwandb[0m: Agent Starting Run: ud6ndcju with config:
[34m[1mwandb[0m: 	factor: 0.5
[34m[1mwandb[0m: 	patience: 2




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0676; Training Accuracy: 24.0375%
	Validation Loss: 1.8918; Validation Accuracy: 29.1%

	Epoch: 1
	Training Loss: 3.5864; Training Accuracy: 32.85%
	Validation Loss: 1.7308; Validation Accuracy: 35.35%

	Epoch: 2
	Training Loss: 3.4051; Training Accuracy: 36.0%
	Validation Loss: 1.6605; Validation Accuracy: 36.7%

	Epoch: 3
	Training Loss: 3.2752; Training Accuracy: 39.2%
	Validation Loss: 1.5729; Validation Accuracy: 42.55%

	Epoch: 4
	Training Loss: 3.1105; Training Accuracy: 43.2375%
	Validation Loss: 1.5462; Validation Accuracy: 43.35%

	Epoch: 5
	Training Loss: 2.9927; Training Accuracy: 45.5125%
	Validation Loss: 1.4918; Validation Accuracy: 46.15%

	Epoch: 6
	Training Loss: 2.8748; Training Accuracy: 48.075%
	Validation Loss: 1.6202; Validation Accuracy: 43.5%

	Epoch: 7
	Training Loss: 2.7779; Training Accuracy: 49.8125%
	Validation Loss: 1.4217; Validation Accuracy: 47.8%

	Epoch: 8
	Training Loss: 2.686; Training Accuracy: 51.

0,1
test_accuracy,▁

0,1
test_accuracy,0.7855


[34m[1mwandb[0m: Agent Starting Run: p8ldipzq with config:
[34m[1mwandb[0m: 	factor: 0.5
[34m[1mwandb[0m: 	patience: 3




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0477; Training Accuracy: 25.9125%
	Validation Loss: 1.893; Validation Accuracy: 30.6%

	Epoch: 1
	Training Loss: 3.5765; Training Accuracy: 33.775%
	Validation Loss: 1.7953; Validation Accuracy: 35.6%

	Epoch: 2
	Training Loss: 3.382; Training Accuracy: 37.9625%
	Validation Loss: 1.6453; Validation Accuracy: 40.45%

	Epoch: 3
	Training Loss: 3.2282; Training Accuracy: 41.1%
	Validation Loss: 1.5649; Validation Accuracy: 42.8%

	Epoch: 4
	Training Loss: 3.0903; Training Accuracy: 43.5%
	Validation Loss: 1.5245; Validation Accuracy: 45.45%

	Epoch: 5
	Training Loss: 2.9711; Training Accuracy: 45.7125%
	Validation Loss: 1.6679; Validation Accuracy: 44.25%

	Epoch: 6
	Training Loss: 2.8606; Training Accuracy: 48.3125%
	Validation Loss: 1.4331; Validation Accuracy: 48.05%

	Epoch: 7
	Training Loss: 2.7205; Training Accuracy: 50.8375%
	Validation Loss: 1.4131; Validation Accuracy: 49.65%

	Epoch: 8
	Training Loss: 2.6358; Training Accuracy: 5

0,1
test_accuracy,▁

0,1
test_accuracy,0.7844


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 61tnahbz with config:
[34m[1mwandb[0m: 	factor: 0.5
[34m[1mwandb[0m: 	patience: 5




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0379; Training Accuracy: 25.6375%
	Validation Loss: 1.8608; Validation Accuracy: 33.2%

	Epoch: 1
	Training Loss: 3.645; Training Accuracy: 32.675%
	Validation Loss: 1.7865; Validation Accuracy: 32.85%

	Epoch: 2
	Training Loss: 3.4089; Training Accuracy: 37.5125%
	Validation Loss: 1.6404; Validation Accuracy: 39.5%

	Epoch: 3
	Training Loss: 3.258; Training Accuracy: 39.625%
	Validation Loss: 1.5662; Validation Accuracy: 43.15%

	Epoch: 4
	Training Loss: 3.1209; Training Accuracy: 42.9%
	Validation Loss: 1.595; Validation Accuracy: 41.0%

	Epoch: 5
	Training Loss: 2.9811; Training Accuracy: 45.425%
	Validation Loss: 1.5384; Validation Accuracy: 45.35%

	Epoch: 6
	Training Loss: 2.8552; Training Accuracy: 47.3875%
	Validation Loss: 1.4633; Validation Accuracy: 48.2%

	Epoch: 7
	Training Loss: 2.7737; Training Accuracy: 50.4%
	Validation Loss: 1.4679; Validation Accuracy: 46.9%

	Epoch: 8
	Training Loss: 2.6414; Training Accuracy: 51.962

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.7828


[34m[1mwandb[0m: Agent Starting Run: 66p863hi with config:
[34m[1mwandb[0m: 	factor: 0.5
[34m[1mwandb[0m: 	patience: 10




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0986; Training Accuracy: 24.95%
	Validation Loss: 1.8718; Validation Accuracy: 29.9%

	Epoch: 1
	Training Loss: 3.605; Training Accuracy: 32.25%
	Validation Loss: 1.7479; Validation Accuracy: 33.9%

	Epoch: 2
	Training Loss: 3.4006; Training Accuracy: 37.2625%
	Validation Loss: 1.678; Validation Accuracy: 36.65%

	Epoch: 3
	Training Loss: 3.241; Training Accuracy: 40.0375%
	Validation Loss: 1.6225; Validation Accuracy: 39.45%

	Epoch: 4
	Training Loss: 3.1013; Training Accuracy: 42.775%
	Validation Loss: 1.5956; Validation Accuracy: 40.8%

	Epoch: 5
	Training Loss: 2.9772; Training Accuracy: 45.625%
	Validation Loss: 1.4477; Validation Accuracy: 47.4%

	Epoch: 6
	Training Loss: 2.8496; Training Accuracy: 48.2625%
	Validation Loss: 1.469; Validation Accuracy: 46.6%

	Epoch: 7
	Training Loss: 2.7262; Training Accuracy: 50.1625%
	Validation Loss: 1.4127; Validation Accuracy: 48.55%

	Epoch: 8
	Training Loss: 2.6015; Training Accuracy: 52.9

0,1
test_accuracy,▁

0,1
test_accuracy,0.7593


[34m[1mwandb[0m: Agent Starting Run: hzg81o8n with config:
[34m[1mwandb[0m: 	factor: 0.6
[34m[1mwandb[0m: 	patience: 0




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0822; Training Accuracy: 24.75%
	Validation Loss: 1.8678; Validation Accuracy: 30.35%

	Epoch: 1
	Training Loss: 3.6351; Training Accuracy: 32.2875%
	Validation Loss: 1.7378; Validation Accuracy: 35.65%

	Epoch: 2
	Training Loss: 3.384; Training Accuracy: 37.6625%
	Validation Loss: 1.5898; Validation Accuracy: 41.3%

	Epoch: 3
Epoch 00004: reducing learning rate of group 0 to 6.0000e-03.
	Training Loss: 3.2514; Training Accuracy: 40.4375%
	Validation Loss: 1.5997; Validation Accuracy: 39.8%

	Epoch: 4
	Training Loss: 2.9832; Training Accuracy: 45.45%
	Validation Loss: 1.5242; Validation Accuracy: 42.8%

	Epoch: 5
	Training Loss: 2.8801; Training Accuracy: 47.65%
	Validation Loss: 1.4175; Validation Accuracy: 48.35%

	Epoch: 6
	Training Loss: 2.7646; Training Accuracy: 49.8375%
	Validation Loss: 1.3591; Validation Accuracy: 50.6%

	Epoch: 7
Epoch 00008: reducing learning rate of group 0 to 3.6000e-03.
	Training Loss: 2.6671; Training Acc

0,1
test_accuracy,▁

0,1
test_accuracy,0.6898


[34m[1mwandb[0m: Agent Starting Run: co29kfsr with config:
[34m[1mwandb[0m: 	factor: 0.6
[34m[1mwandb[0m: 	patience: 1




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.075; Training Accuracy: 23.9375%
	Validation Loss: 1.8528; Validation Accuracy: 29.35%

	Epoch: 1
	Training Loss: 3.6313; Training Accuracy: 32.7875%
	Validation Loss: 1.8365; Validation Accuracy: 34.35%

	Epoch: 2
	Training Loss: 3.3947; Training Accuracy: 36.4375%
	Validation Loss: 1.6881; Validation Accuracy: 38.1%

	Epoch: 3
	Training Loss: 3.259; Training Accuracy: 39.7875%
	Validation Loss: 1.5895; Validation Accuracy: 42.35%

	Epoch: 4
	Training Loss: 3.1297; Training Accuracy: 42.15%
	Validation Loss: 1.5333; Validation Accuracy: 44.0%

	Epoch: 5
	Training Loss: 2.9913; Training Accuracy: 45.725%
	Validation Loss: 1.5568; Validation Accuracy: 43.6%

	Epoch: 6
	Training Loss: 2.8798; Training Accuracy: 47.25%
	Validation Loss: 1.3941; Validation Accuracy: 46.75%

	Epoch: 7
	Training Loss: 2.7602; Training Accuracy: 50.35%
	Validation Loss: 1.4411; Validation Accuracy: 48.4%

	Epoch: 8
	Training Loss: 2.6542; Training Accuracy: 51

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.8006


[34m[1mwandb[0m: Agent Starting Run: gtltms2e with config:
[34m[1mwandb[0m: 	factor: 0.6
[34m[1mwandb[0m: 	patience: 2




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.082; Training Accuracy: 24.35%
	Validation Loss: 1.8669; Validation Accuracy: 29.45%

	Epoch: 1
	Training Loss: 3.6307; Training Accuracy: 32.7125%
	Validation Loss: 1.7075; Validation Accuracy: 35.25%

	Epoch: 2
	Training Loss: 3.4046; Training Accuracy: 36.225%
	Validation Loss: 1.7465; Validation Accuracy: 39.05%

	Epoch: 3
	Training Loss: 3.2639; Training Accuracy: 39.5125%
	Validation Loss: 1.6509; Validation Accuracy: 39.6%

	Epoch: 4
	Training Loss: 3.1376; Training Accuracy: 42.4375%
	Validation Loss: 1.5516; Validation Accuracy: 43.85%

	Epoch: 5
	Training Loss: 3.0054; Training Accuracy: 45.0%
	Validation Loss: 1.4991; Validation Accuracy: 44.0%

	Epoch: 6
	Training Loss: 2.8898; Training Accuracy: 47.7875%
	Validation Loss: 1.5539; Validation Accuracy: 44.3%

	Epoch: 7
	Training Loss: 2.7687; Training Accuracy: 50.0625%
	Validation Loss: 1.3922; Validation Accuracy: 49.45%

	Epoch: 8
	Training Loss: 2.6808; Training Accuracy:

0,1
test_accuracy,▁

0,1
test_accuracy,0.7945


[34m[1mwandb[0m: Agent Starting Run: 4740eaie with config:
[34m[1mwandb[0m: 	factor: 0.6
[34m[1mwandb[0m: 	patience: 3




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0896; Training Accuracy: 23.9875%
	Validation Loss: 1.8413; Validation Accuracy: 31.6%

	Epoch: 1
	Training Loss: 3.6156; Training Accuracy: 32.5625%
	Validation Loss: 1.8878; Validation Accuracy: 33.2%

	Epoch: 2
	Training Loss: 3.419; Training Accuracy: 36.5125%
	Validation Loss: 1.6409; Validation Accuracy: 39.4%

	Epoch: 3
	Training Loss: 3.2293; Training Accuracy: 40.05%
	Validation Loss: 1.6008; Validation Accuracy: 41.5%

	Epoch: 4
	Training Loss: 3.1213; Training Accuracy: 42.6875%
	Validation Loss: 1.5552; Validation Accuracy: 42.55%

	Epoch: 5
	Training Loss: 2.9927; Training Accuracy: 45.7625%
	Validation Loss: 1.4518; Validation Accuracy: 47.95%

	Epoch: 6
	Training Loss: 2.8876; Training Accuracy: 47.1375%
	Validation Loss: 1.4066; Validation Accuracy: 49.65%

	Epoch: 7
	Training Loss: 2.7737; Training Accuracy: 50.0625%
	Validation Loss: 1.4019; Validation Accuracy: 48.8%

	Epoch: 8
	Training Loss: 2.6381; Training Accurac

0,1
test_accuracy,▁

0,1
test_accuracy,0.794


[34m[1mwandb[0m: Agent Starting Run: nk7bjsns with config:
[34m[1mwandb[0m: 	factor: 0.6
[34m[1mwandb[0m: 	patience: 5




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0283; Training Accuracy: 24.9875%
	Validation Loss: 1.9064; Validation Accuracy: 31.1%

	Epoch: 1
	Training Loss: 3.5584; Training Accuracy: 34.6%
	Validation Loss: 1.7581; Validation Accuracy: 37.05%

	Epoch: 2
	Training Loss: 3.3731; Training Accuracy: 37.7375%
	Validation Loss: 1.7302; Validation Accuracy: 37.35%

	Epoch: 3
	Training Loss: 3.2087; Training Accuracy: 41.0375%
	Validation Loss: 1.6865; Validation Accuracy: 40.45%

	Epoch: 4
	Training Loss: 3.0763; Training Accuracy: 43.6%
	Validation Loss: 1.5451; Validation Accuracy: 42.8%

	Epoch: 5
	Training Loss: 2.9692; Training Accuracy: 46.1%
	Validation Loss: 1.4546; Validation Accuracy: 47.5%

	Epoch: 6
	Training Loss: 2.8655; Training Accuracy: 47.2375%
	Validation Loss: 1.4937; Validation Accuracy: 48.15%

	Epoch: 7
	Training Loss: 2.7902; Training Accuracy: 49.375%
	Validation Loss: 1.4803; Validation Accuracy: 45.65%

	Epoch: 8
	Training Loss: 2.6386; Training Accuracy: 51

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.759


[34m[1mwandb[0m: Agent Starting Run: xe8742pd with config:
[34m[1mwandb[0m: 	factor: 0.6
[34m[1mwandb[0m: 	patience: 10




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1692; Training Accuracy: 21.6625%
	Validation Loss: 1.9251; Validation Accuracy: 27.05%

	Epoch: 1
	Training Loss: 3.6585; Training Accuracy: 31.675%
	Validation Loss: 1.753; Validation Accuracy: 36.4%

	Epoch: 2
	Training Loss: 3.4166; Training Accuracy: 35.9625%
	Validation Loss: 1.8182; Validation Accuracy: 37.1%

	Epoch: 3
	Training Loss: 3.2744; Training Accuracy: 39.9875%
	Validation Loss: 1.625; Validation Accuracy: 40.5%

	Epoch: 4
	Training Loss: 3.1081; Training Accuracy: 42.775%
	Validation Loss: 1.5743; Validation Accuracy: 41.7%

	Epoch: 5
	Training Loss: 2.9979; Training Accuracy: 44.925%
	Validation Loss: 1.5765; Validation Accuracy: 42.75%

	Epoch: 6
	Training Loss: 2.9191; Training Accuracy: 47.125%
	Validation Loss: 1.4574; Validation Accuracy: 47.45%

	Epoch: 7
	Training Loss: 2.8178; Training Accuracy: 49.2625%
	Validation Loss: 1.4552; Validation Accuracy: 48.05%

	Epoch: 8
	Training Loss: 2.7083; Training Accuracy:

0,1
test_accuracy,▁

0,1
test_accuracy,0.7723


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Sweep Agent: Exiting.


### Running the best hyperparameters for 100 epochs

In [12]:
batch_size = 32
    
transform_train = torchvision.transforms.Compose([
  torchvision.transforms.RandomCrop(32, padding=4),
  torchvision.transforms.RandomHorizontalFlip(),
  torchvision.transforms.RandomResizedCrop(32, scale=(0.8, 1.0), ratio=(0.8, 1.2)),
  torchvision.transforms.ToTensor(),
  torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

transform_test = torchvision.transforms.Compose([
                  torchvision.transforms.ToTensor(), 
                  torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])

trainset = torchvision.datasets.CIFAR10(root = './data', train = True, download = True, transform = transform_train)

# Split the train data into train and validation sets
train_size = int(0.8 * len(trainset))
val_size = len(trainset) - train_size
trainset, valset = torch.utils.data.random_split(trainset, [train_size, val_size])

testset = torchvision.datasets.CIFAR10(root = './data', train = False, download = True, transform = transform_test)

train_loader = torch.utils.data.DataLoader(trainset, batch_size = batch_size, shuffle = True)
val_loader   = torch.utils.data.DataLoader(valset, batch_size = batch_size, shuffle = True)
test_loader  = torch.utils.data.DataLoader(testset, batch_size = batch_size, shuffle = False)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
    
best_test_acc = 0

torch.cuda.empty_cache()
model = ZigZag_ResNet(BasicBlock, [2, 2, 2, 2, 2, 1, 1])
num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Num Params: {num_params}\n")
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), 0.01, momentum = 0.8, weight_decay = 0.0005 , nesterov=True)

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=5, verbose = True)

train_losses_ = []
train_accuracies_ = []
valid_losses_ = []
valid_accuracies_ = []

Files already downloaded and verified
Files already downloaded and verified
Num Params: 4891338



In [13]:
print(f"Train: {len(trainset)}; Validation: {len(valset)}; Test: {len(testset)}")

Train: 40000; Validation: 10000; Test: 10000


In [14]:
epochs = 100

for epoch in range(epochs):
    print(f"\n\tEpoch: {epoch}")

    train_loss, train_accuracy, val_loss, val_accuracy = train_val(model, criterion, optimizer, 
                                                                train_loader, val_loader, device,
                                                                scheduler = scheduler, use_scheduler = True)
    train_losses_.append(train_loss)
    train_accuracies_.append(train_accuracy)
    valid_losses_.append(val_loss)
    valid_accuracies_.append(val_accuracy)
    print(f"\tTraining Loss: {round(train_loss, 4)}; Training Accuracy: {round(train_accuracy*100, 4)}%")
    print(f"\tValidation Loss: {round(val_loss, 4)}; Validation Accuracy: {round(val_accuracy*100, 4)}%")

test_loss, test_accuracy = test(model, criterion, test_loader, device)
print(f"\n\tTesting Loss: {round(test_loss, 4)}; Testing Accuracy: {round(test_accuracy*100, 4)}%")

if test_accuracy > best_test_acc:
    best_test_acc = test_accuracy

    torch.save(model.state_dict(), 'zigzag_resnet_tuned.pth')

    metrics_dict = {'train_loss': train_losses_, 'train_accuracy': train_accuracies_, 
                  'valid_loss': valid_losses_, 'valid_accuracy': valid_accuracies_,
                  'test_loss': test_loss, 'test_accuracy': test_accuracy}


	Epoch: 0
	Training Loss: 3.4809; Training Accuracy: 35.82%
	Validation Loss: 1.4469; Validation Accuracy: 47.46%

	Epoch: 1
	Training Loss: 2.7637; Training Accuracy: 50.04%
	Validation Loss: 1.2508; Validation Accuracy: 54.9%

	Epoch: 2
	Training Loss: 2.2853; Training Accuracy: 59.295%
	Validation Loss: 1.0214; Validation Accuracy: 63.82%

	Epoch: 3
	Training Loss: 1.9332; Training Accuracy: 66.0725%
	Validation Loss: 0.8924; Validation Accuracy: 68.26%

	Epoch: 4
	Training Loss: 1.707; Training Accuracy: 70.775%
	Validation Loss: 0.7887; Validation Accuracy: 72.32%

	Epoch: 5
	Training Loss: 1.5247; Training Accuracy: 73.725%
	Validation Loss: 0.7433; Validation Accuracy: 74.25%

	Epoch: 6
	Training Loss: 1.3991; Training Accuracy: 75.625%
	Validation Loss: 0.694; Validation Accuracy: 76.01%

	Epoch: 7
	Training Loss: 1.2909; Training Accuracy: 77.875%
	Validation Loss: 0.7053; Validation Accuracy: 76.1%

	Epoch: 8
	Training Loss: 1.2107; Training Accuracy: 79.3275%
	Validation Lo

### ZigZag Scheduler

In [6]:
import numpy as np

class ZigZagLROnPlateau(torch.optim.lr_scheduler._LRScheduler):
    def __init__(self, optimizer, mode='min', up_factor=1.1, down_factor=0.8, up_patience=10, down_patience=10, verbose=True):
        super(ZigZagLROnPlateau).__init__()
        self.optimizer = optimizer
        self.mode = mode
        self.up_factor = 1 + up_factor
        self.down_factor = 1 - down_factor
        self.up_patience = up_patience
        self.down_patience = down_patience
        self.num_bad_epochs = 0
        self.num_good_epochs = 0
        self.best_metric = np.Inf if self.mode == 'min' else -np.Inf
        self.verbose = verbose

    def step(self, metric):
        if self.mode == 'min':
            if metric < self.best_metric:
                self.best_metric = metric
                self.num_bad_epochs = 0
                self.num_good_epochs += 1
                if self.num_good_epochs > self.up_patience:
                    old_lr = self.optimizer.param_groups[0]['lr']
                    new_lr = old_lr * self.up_factor
                    self.optimizer.param_groups[0]['lr'] = new_lr
                    if self.verbose:
                        print(f"increasing learning rate of group 0 to {new_lr:.4e}.")
                    self.num_good_epochs = 0
            else:
                self.num_bad_epochs += 1
                self.num_good_epochs = 0
                if self.num_bad_epochs > self.down_patience:
                    old_lr = self.optimizer.param_groups[0]['lr']
                    new_lr = old_lr * self.down_factor
                    self.optimizer.param_groups[0]['lr'] = new_lr
                    if self.verbose:
                        print(f"reducing learning rate of group 0 to {new_lr:.4e}.")
                    self.num_bad_epochs = 0
        else:
            if metric > self.best_metric:
                self.best_metric = metric
                self.num_bad_epochs = 0
                self.num_good_epochs += 1
                if self.num_good_epochs > self.up_patience:
                    old_lr = self.optimizer.param_groups[0]['lr']
                    new_lr = old_lr * self.up_factor
                    self.optimizer.param_groups[0]['lr'] = new_lr
                    if self.verbose:
                        print(f"increasing learning rate of group 0 to {new_lr:.4e}.")
                    self.num_good_epochs = 0
            else:
                self.num_bad_epochs += 1
                self.num_good_epochs = 0
                if self.num_bad_epochs > self.down_patience:
                    old_lr = self.optimizer.param_groups[0]['lr']
                    new_lr = old_lr * self.down_factor
                    self.optimizer.param_groups[0]['lr'] = new_lr
                    if self.verbose:
                        print(f"reducing learning rate of group 0 to {new_lr:.4e}.")
                    self.num_bad_epochs = 0

In [21]:
batch_size = 32
    
transform_train = torchvision.transforms.Compose([
  torchvision.transforms.RandomCrop(32, padding=4),
  torchvision.transforms.RandomHorizontalFlip(),
  torchvision.transforms.RandomResizedCrop(32, scale=(0.8, 1.0), ratio=(0.8, 1.2)),
  torchvision.transforms.ToTensor(),
  torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

transform_test = torchvision.transforms.Compose([
                  torchvision.transforms.ToTensor(), 
                  torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])

trainset = torchvision.datasets.CIFAR10(root = './data', train = True, download = True, transform = transform_train)

# Split the train data into train and validation sets
train_size = int(0.8 * len(trainset))
val_size = len(trainset) - train_size
trainset, valset = torch.utils.data.random_split(trainset, [train_size, val_size])

testset = torchvision.datasets.CIFAR10(root = './data', train = False, download = True, transform = transform_test)

train_loader = torch.utils.data.DataLoader(trainset, batch_size = batch_size, shuffle = True)
val_loader   = torch.utils.data.DataLoader(valset, batch_size = batch_size, shuffle = True)
test_loader  = torch.utils.data.DataLoader(testset, batch_size = batch_size, shuffle = False)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
    
best_test_acc = 0

torch.cuda.empty_cache()
model = ZigZag_ResNet(BasicBlock, [2, 2, 2, 2, 2, 1, 1])
num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Num Params: {num_params}\n")
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), 0.01, momentum = 0.8, weight_decay = 0.0005 , nesterov=True)

scheduler = ZigZagLROnPlateau(optimizer, mode='max', up_factor=0.2, down_factor=0.2, up_patience=1, down_patience=1, verbose = True)

train_losses_ = []
train_accuracies_ = []
valid_losses_ = []
valid_accuracies_ = []

Files already downloaded and verified
Files already downloaded and verified
Num Params: 4891338



In [22]:
epochs = 20

for epoch in range(epochs):
    print(f"\n\tEpoch: {epoch}")

    train_loss, train_accuracy, val_loss, val_accuracy = train_val(model, criterion, optimizer, 
                                                                train_loader, val_loader, device,
                                                                scheduler = scheduler, use_scheduler = True)
    train_losses_.append(train_loss)
    train_accuracies_.append(train_accuracy)
    valid_losses_.append(val_loss)
    valid_accuracies_.append(val_accuracy)
    print(f"\tTraining Loss: {round(train_loss, 4)}; Training Accuracy: {round(train_accuracy*100, 4)}%")
    print(f"\tValidation Loss: {round(val_loss, 4)}; Validation Accuracy: {round(val_accuracy*100, 4)}%")

test_loss, test_accuracy = test(model, criterion, test_loader, device)
print(f"\n\tTesting Loss: {round(test_loss, 4)}; Testing Accuracy: {round(test_accuracy*100, 4)}%")

if test_accuracy > best_test_acc:
    best_test_acc = test_accuracy

    metrics_dict = {'train_loss': train_losses_, 'train_accuracy': train_accuracies_, 
                  'valid_loss': valid_losses_, 'valid_accuracy': valid_accuracies_,
                  'test_loss': test_loss, 'test_accuracy': test_accuracy}


	Epoch: 0
	Training Loss: 3.57; Training Accuracy: 33.68%
	Validation Loss: 1.5733; Validation Accuracy: 42.35%

	Epoch: 1
increasing learning rate of group 0 to 1.2000e-02.
	Training Loss: 2.8566; Training Accuracy: 48.1%
	Validation Loss: 1.286; Validation Accuracy: 53.05%

	Epoch: 2
	Training Loss: 2.398; Training Accuracy: 57.4025%
	Validation Loss: 1.0792; Validation Accuracy: 61.64%

	Epoch: 3
increasing learning rate of group 0 to 1.4400e-02.
	Training Loss: 1.978; Training Accuracy: 65.3825%
	Validation Loss: 0.9306; Validation Accuracy: 67.73%

	Epoch: 4
	Training Loss: 1.8; Training Accuracy: 68.72%
	Validation Loss: 0.9533; Validation Accuracy: 66.83%

	Epoch: 5
	Training Loss: 1.5869; Training Accuracy: 72.605%
	Validation Loss: 0.8102; Validation Accuracy: 72.1%

	Epoch: 6
increasing learning rate of group 0 to 1.7280e-02.
	Training Loss: 1.4401; Training Accuracy: 75.4325%
	Validation Loss: 0.7403; Validation Accuracy: 74.69%

	Epoch: 7
	Training Loss: 1.3941; Training A

### ZigZag Sweep

In [7]:
import wandb
import os
os.environ['WANDB_API_KEY'] = '7af926a637567fde060a1a93e656d8887056249f'
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33msj3828[0m ([33mnyu-tandon[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [8]:
sweep_config = {
    "name": "hyperparameter_sweep",
    'metric': {'goal': 'maximize', 'name': 'test_accuracy'},
    "method": "grid",
    "parameters": {
        "up_factor": {
            "values": [0.1, 0.3, 0.5]
        },
        "down_factor": {
            "values": [0.1, 0.3, 0.5]
        },
        "up_patience": {
            "values": [1, 3, 5]
        },
        "down_patience": {
            "values": [1, 3, 5]
        }
    }
}

In [9]:
batch_size = 32
    
transform_train = torchvision.transforms.Compose([
  torchvision.transforms.RandomCrop(32, padding=4),
  torchvision.transforms.RandomHorizontalFlip(),
  torchvision.transforms.RandomResizedCrop(32, scale=(0.8, 1.0), ratio=(0.8, 1.2)),
  torchvision.transforms.ToTensor(),
  torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

transform_test = torchvision.transforms.Compose([
                  torchvision.transforms.ToTensor(), 
                  torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])

trainset = torchvision.datasets.CIFAR10(root = './data', train = True, download = True, transform = transform_train)

# Split the train data into train and validation sets
train_size = int(0.8 * len(trainset))
val_size = len(trainset) - train_size
trainset, valset = torch.utils.data.random_split(trainset, [train_size, val_size])

train_size = int(0.2 * len(trainset))
val_size = int(0.2 * len(valset))
trainset, _ = torch.utils.data.random_split(trainset, [train_size, len(trainset) - train_size])
valset, _ = torch.utils.data.random_split(valset, [val_size, len(valset) - val_size])

testset = torchvision.datasets.CIFAR10(root = './data', train = False, download = True, transform = transform_test)

train_loader = torch.utils.data.DataLoader(trainset, batch_size = batch_size, shuffle = True)
val_loader   = torch.utils.data.DataLoader(valset, batch_size = batch_size, shuffle = True)
test_loader  = torch.utils.data.DataLoader(testset, batch_size = batch_size, shuffle = False)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

def train_sweep():
    wandb.init()
    
    config = wandb.config
    config.up_factor = sweep_config['parameters']['up_factor']
    config.down_factor = sweep_config['parameters']['down_factor']
    config.up_patience = sweep_config['parameters']['up_patience']
    config.down_patience = sweep_config['parameters']['down_patience']
    
    torch.cuda.empty_cache()
    model = ZigZag_ResNet(BasicBlock, [2, 2, 2, 2, 2, 1, 1])
    num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"Num Params: {num_params}\n")
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), 0.01, momentum = 0.8, weight_decay = 0.0005 , nesterov=True)

    scheduler = ZigZagLROnPlateau(optimizer, mode='max', up_factor=config.up_factor, down_factor=config.down_factor, 
                                                         up_patience=config.up_patience, down_patience=config.down_patience, 
                                                         verbose = True)


    train_losses_ = []
    train_accuracies_ = []
    valid_losses_ = []
    valid_accuracies_ = []

    epochs = 25

    for epoch in range(epochs):
        print(f"\n\tEpoch: {epoch}")

        train_loss, train_accuracy, val_loss, val_accuracy = train_val(model, criterion, optimizer, 
                                                                    train_loader, val_loader, device,
                                                                    scheduler = scheduler, use_scheduler = True)
        train_losses_.append(train_loss)
        train_accuracies_.append(train_accuracy)
        valid_losses_.append(val_loss)
        valid_accuracies_.append(val_accuracy)
        print(f"\tTraining Loss: {round(train_loss, 4)}; Training Accuracy: {round(train_accuracy*100, 4)}%")
        print(f"\tValidation Loss: {round(val_loss, 4)}; Validation Accuracy: {round(val_accuracy*100, 4)}%")

    test_loss, test_accuracy = test(model, criterion, test_loader, device)
    print(f"\n\tTesting Loss: {round(test_loss, 4)}; Testing Accuracy: {round(test_accuracy*100, 4)}%")

    wandb.log({'test_accuracy': test_accuracy})
    wandb.finish()

Files already downloaded and verified
Files already downloaded and verified


In [10]:
print(f"Train: {len(trainset)}; Validation: {len(valset)}; Test: {len(testset)}")

Train: 8000; Validation: 2000; Test: 10000


In [11]:
sweep_id = wandb.sweep(sweep_config, project = "Mini-ZigZag__Resnet__ZigZag-Scheduler")
wandb.agent(sweep_id, train_sweep)

500 response executing GraphQL.
{"errors":[{"message":"An internal error occurred. Please contact support.","path":["upsertSweep"]}],"data":{"upsertSweep":null}}
[34m[1mwandb[0m: [32m[41mERROR[0m Error while calling W&B API: An internal error occurred. Please contact support. (<Response [500]>)


Create sweep with ID: nun2d9tx
Sweep URL: https://wandb.ai/nyu-tandon/Mini-ZigZag__Resnet__ZigZag-Scheduler/sweeps/nun2d9tx


[34m[1mwandb[0m: Agent Starting Run: fwtmuknh with config:
[34m[1mwandb[0m: 	down_factor: 0.1
[34m[1mwandb[0m: 	down_patience: 1
[34m[1mwandb[0m: 	up_factor: 0.1
[34m[1mwandb[0m: 	up_patience: 1




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0888; Training Accuracy: 24.1875%
	Validation Loss: 1.8659; Validation Accuracy: 31.65%

	Epoch: 1
increasing learning rate of group 0 to 1.1000e-02.
	Training Loss: 3.6231; Training Accuracy: 32.95%
	Validation Loss: 1.6921; Validation Accuracy: 36.5%

	Epoch: 2
	Training Loss: 3.4162; Training Accuracy: 36.375%
	Validation Loss: 1.7097; Validation Accuracy: 36.55%

	Epoch: 3
increasing learning rate of group 0 to 1.2100e-02.
	Training Loss: 3.2488; Training Accuracy: 40.8125%
	Validation Loss: 1.6039; Validation Accuracy: 40.9%

	Epoch: 4
	Training Loss: 3.1135; Training Accuracy: 42.6125%
	Validation Loss: 1.4722; Validation Accuracy: 46.4%

	Epoch: 5
	Training Loss: 2.9578; Training Accuracy: 45.325%
	Validation Loss: 1.5025; Validation Accuracy: 43.2%

	Epoch: 6
	Training Loss: 2.8272; Training Accuracy: 48.975%
	Validation Loss: 1.4111; Validation Accuracy: 48.4%

	Epoch: 7
increasing learning rate of group 0 to 1.3310e-02.
	Train

0,1
test_accuracy,▁

0,1
test_accuracy,0.7157


[34m[1mwandb[0m: Agent Starting Run: nz2lv0ah with config:
[34m[1mwandb[0m: 	down_factor: 0.1
[34m[1mwandb[0m: 	down_patience: 1
[34m[1mwandb[0m: 	up_factor: 0.1
[34m[1mwandb[0m: 	up_patience: 3




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0198; Training Accuracy: 25.925%
	Validation Loss: 1.8693; Validation Accuracy: 31.55%

	Epoch: 1
	Training Loss: 3.5914; Training Accuracy: 33.7875%
	Validation Loss: 1.6956; Validation Accuracy: 37.65%

	Epoch: 2
	Training Loss: 3.3599; Training Accuracy: 37.2125%
	Validation Loss: 1.6746; Validation Accuracy: 38.35%

	Epoch: 3
increasing learning rate of group 0 to 1.1000e-02.
	Training Loss: 3.2173; Training Accuracy: 40.9875%
	Validation Loss: 1.5484; Validation Accuracy: 44.75%

	Epoch: 4
	Training Loss: 3.1086; Training Accuracy: 42.85%
	Validation Loss: 1.4684; Validation Accuracy: 45.9%

	Epoch: 5
	Training Loss: 2.9593; Training Accuracy: 45.925%
	Validation Loss: 1.6254; Validation Accuracy: 42.7%

	Epoch: 6
	Training Loss: 2.8627; Training Accuracy: 47.7%
	Validation Loss: 1.4158; Validation Accuracy: 48.55%

	Epoch: 7
	Training Loss: 2.7175; Training Accuracy: 51.0%
	Validation Loss: 1.3815; Validation Accuracy: 50.95%

	Ep

0,1
test_accuracy,▁

0,1
test_accuracy,0.7172


[34m[1mwandb[0m: Agent Starting Run: 7zig9y9q with config:
[34m[1mwandb[0m: 	down_factor: 0.1
[34m[1mwandb[0m: 	down_patience: 1
[34m[1mwandb[0m: 	up_factor: 0.1
[34m[1mwandb[0m: 	up_patience: 5




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0434; Training Accuracy: 25.1375%
	Validation Loss: 1.8942; Validation Accuracy: 29.9%

	Epoch: 1
	Training Loss: 3.6352; Training Accuracy: 32.525%
	Validation Loss: 1.7548; Validation Accuracy: 33.5%

	Epoch: 2
	Training Loss: 3.3961; Training Accuracy: 36.525%
	Validation Loss: 1.7615; Validation Accuracy: 36.0%

	Epoch: 3
	Training Loss: 3.2626; Training Accuracy: 40.0625%
	Validation Loss: 1.6303; Validation Accuracy: 40.5%

	Epoch: 4
	Training Loss: 3.1075; Training Accuracy: 42.325%
	Validation Loss: 1.482; Validation Accuracy: 45.85%

	Epoch: 5
	Training Loss: 2.9493; Training Accuracy: 46.3375%
	Validation Loss: 1.6067; Validation Accuracy: 42.3%

	Epoch: 6
	Training Loss: 2.8508; Training Accuracy: 48.4375%
	Validation Loss: 1.3671; Validation Accuracy: 50.0%

	Epoch: 7
	Training Loss: 2.7259; Training Accuracy: 50.775%
	Validation Loss: 1.3288; Validation Accuracy: 52.05%

	Epoch: 8
	Training Loss: 2.5671; Training Accuracy: 

0,1
test_accuracy,▁

0,1
test_accuracy,0.7232


[34m[1mwandb[0m: Agent Starting Run: t921bnlx with config:
[34m[1mwandb[0m: 	down_factor: 0.1
[34m[1mwandb[0m: 	down_patience: 1
[34m[1mwandb[0m: 	up_factor: 0.3
[34m[1mwandb[0m: 	up_patience: 1




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1448; Training Accuracy: 22.55%
	Validation Loss: 1.8961; Validation Accuracy: 27.55%

	Epoch: 1
increasing learning rate of group 0 to 1.3000e-02.
	Training Loss: 3.656; Training Accuracy: 32.275%
	Validation Loss: 1.7221; Validation Accuracy: 35.55%

	Epoch: 2
	Training Loss: 3.466; Training Accuracy: 36.1375%
	Validation Loss: 1.763; Validation Accuracy: 38.85%

	Epoch: 3
increasing learning rate of group 0 to 1.6900e-02.
	Training Loss: 3.2691; Training Accuracy: 40.1%
	Validation Loss: 1.5625; Validation Accuracy: 41.4%

	Epoch: 4
	Training Loss: 3.1907; Training Accuracy: 40.5%
	Validation Loss: 1.6586; Validation Accuracy: 38.95%

	Epoch: 5
	Training Loss: 3.0286; Training Accuracy: 44.425%
	Validation Loss: 1.5306; Validation Accuracy: 44.25%

	Epoch: 6
increasing learning rate of group 0 to 2.1970e-02.
	Training Loss: 2.8691; Training Accuracy: 48.0625%
	Validation Loss: 1.3868; Validation Accuracy: 49.0%

	Epoch: 7
	Training L

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.6836


[34m[1mwandb[0m: Agent Starting Run: q0o469u2 with config:
[34m[1mwandb[0m: 	down_factor: 0.1
[34m[1mwandb[0m: 	down_patience: 1
[34m[1mwandb[0m: 	up_factor: 0.3
[34m[1mwandb[0m: 	up_patience: 3




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1228; Training Accuracy: 23.3625%
	Validation Loss: 1.8981; Validation Accuracy: 29.5%

	Epoch: 1
	Training Loss: 3.656; Training Accuracy: 32.4875%
	Validation Loss: 1.7354; Validation Accuracy: 33.85%

	Epoch: 2
	Training Loss: 3.4394; Training Accuracy: 36.5125%
	Validation Loss: 1.6397; Validation Accuracy: 38.0%

	Epoch: 3
increasing learning rate of group 0 to 1.3000e-02.
	Training Loss: 3.2771; Training Accuracy: 39.175%
	Validation Loss: 1.6399; Validation Accuracy: 40.15%

	Epoch: 4
	Training Loss: 3.212; Training Accuracy: 41.825%
	Validation Loss: 1.5693; Validation Accuracy: 41.9%

	Epoch: 5
	Training Loss: 3.051; Training Accuracy: 44.5375%
	Validation Loss: 1.5105; Validation Accuracy: 45.4%

	Epoch: 6
	Training Loss: 2.8948; Training Accuracy: 47.425%
	Validation Loss: 1.4989; Validation Accuracy: 45.3%

	Epoch: 7
	Training Loss: 2.7825; Training Accuracy: 49.2125%
	Validation Loss: 1.3774; Validation Accuracy: 49.35%

	E

0,1
test_accuracy,▁

0,1
test_accuracy,0.7279


[34m[1mwandb[0m: Agent Starting Run: 7keohc3b with config:
[34m[1mwandb[0m: 	down_factor: 0.1
[34m[1mwandb[0m: 	down_patience: 1
[34m[1mwandb[0m: 	up_factor: 0.3
[34m[1mwandb[0m: 	up_patience: 5




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1043; Training Accuracy: 23.65%
	Validation Loss: 1.9348; Validation Accuracy: 29.35%

	Epoch: 1
	Training Loss: 3.5468; Training Accuracy: 33.8875%
	Validation Loss: 1.9449; Validation Accuracy: 34.05%

	Epoch: 2
	Training Loss: 3.3194; Training Accuracy: 38.725%
	Validation Loss: 1.7316; Validation Accuracy: 39.45%

	Epoch: 3
	Training Loss: 3.1544; Training Accuracy: 41.7125%
	Validation Loss: 1.5311; Validation Accuracy: 44.05%

	Epoch: 4
	Training Loss: 2.9959; Training Accuracy: 45.2375%
	Validation Loss: 1.4872; Validation Accuracy: 44.7%

	Epoch: 5
increasing learning rate of group 0 to 1.3000e-02.
	Training Loss: 2.8494; Training Accuracy: 48.0%
	Validation Loss: 1.4917; Validation Accuracy: 45.55%

	Epoch: 6
	Training Loss: 2.8188; Training Accuracy: 49.3125%
	Validation Loss: 1.3548; Validation Accuracy: 50.1%

	Epoch: 7
	Training Loss: 2.6703; Training Accuracy: 52.1125%
	Validation Loss: 1.4011; Validation Accuracy: 50.25%


0,1
test_accuracy,▁

0,1
test_accuracy,0.7234


[34m[1mwandb[0m: Agent Starting Run: gbp9i8r6 with config:
[34m[1mwandb[0m: 	down_factor: 0.1
[34m[1mwandb[0m: 	down_patience: 1
[34m[1mwandb[0m: 	up_factor: 0.5
[34m[1mwandb[0m: 	up_patience: 1




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0717; Training Accuracy: 24.6875%
	Validation Loss: 1.7761; Validation Accuracy: 32.9%

	Epoch: 1
increasing learning rate of group 0 to 1.5000e-02.
	Training Loss: 3.5515; Training Accuracy: 34.4%
	Validation Loss: 1.6817; Validation Accuracy: 36.8%

	Epoch: 2
	Training Loss: 3.4171; Training Accuracy: 37.5875%
	Validation Loss: 1.6945; Validation Accuracy: 37.15%

	Epoch: 3
increasing learning rate of group 0 to 2.2500e-02.
	Training Loss: 3.2197; Training Accuracy: 41.0625%
	Validation Loss: 1.5531; Validation Accuracy: 42.8%

	Epoch: 4
	Training Loss: 3.1671; Training Accuracy: 43.1375%
	Validation Loss: 1.5428; Validation Accuracy: 43.0%

	Epoch: 5
increasing learning rate of group 0 to 3.3750e-02.
	Training Loss: 2.9714; Training Accuracy: 46.0125%
	Validation Loss: 1.4849; Validation Accuracy: 48.0%

	Epoch: 6
	Training Loss: 2.94; Training Accuracy: 47.1375%
	Validation Loss: 1.4136; Validation Accuracy: 49.1%

	Epoch: 7
increas

0,1
test_accuracy,▁

0,1
test_accuracy,0.6877


[34m[1mwandb[0m: Agent Starting Run: uogbnu89 with config:
[34m[1mwandb[0m: 	down_factor: 0.1
[34m[1mwandb[0m: 	down_patience: 1
[34m[1mwandb[0m: 	up_factor: 0.5
[34m[1mwandb[0m: 	up_patience: 3




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0695; Training Accuracy: 25.1625%
	Validation Loss: 1.8881; Validation Accuracy: 28.5%

	Epoch: 1
	Training Loss: 3.5784; Training Accuracy: 33.3875%
	Validation Loss: 1.7132; Validation Accuracy: 36.2%

	Epoch: 2
	Training Loss: 3.3544; Training Accuracy: 37.7%
	Validation Loss: 1.6425; Validation Accuracy: 39.6%

	Epoch: 3
increasing learning rate of group 0 to 1.5000e-02.
	Training Loss: 3.228; Training Accuracy: 41.05%
	Validation Loss: 1.5543; Validation Accuracy: 44.0%

	Epoch: 4
	Training Loss: 3.1928; Training Accuracy: 41.6125%
	Validation Loss: 1.5193; Validation Accuracy: 44.55%

	Epoch: 5
	Training Loss: 3.0184; Training Accuracy: 45.3%
	Validation Loss: 1.4886; Validation Accuracy: 46.7%

	Epoch: 6
	Training Loss: 2.8285; Training Accuracy: 49.025%
	Validation Loss: 1.5274; Validation Accuracy: 45.85%

	Epoch: 7
	Training Loss: 2.7218; Training Accuracy: 51.1375%
	Validation Loss: 1.4347; Validation Accuracy: 48.5%

	Epoch:

0,1
test_accuracy,▁

0,1
test_accuracy,0.6913


[34m[1mwandb[0m: Agent Starting Run: m5oqeala with config:
[34m[1mwandb[0m: 	down_factor: 0.1
[34m[1mwandb[0m: 	down_patience: 1
[34m[1mwandb[0m: 	up_factor: 0.5
[34m[1mwandb[0m: 	up_patience: 5




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0018; Training Accuracy: 25.9375%
	Validation Loss: 2.0873; Validation Accuracy: 25.5%

	Epoch: 1
	Training Loss: 3.569; Training Accuracy: 33.7%
	Validation Loss: 1.7379; Validation Accuracy: 34.45%

	Epoch: 2
	Training Loss: 3.3295; Training Accuracy: 38.5875%
	Validation Loss: 1.6088; Validation Accuracy: 41.05%

	Epoch: 3
	Training Loss: 3.1585; Training Accuracy: 41.9625%
	Validation Loss: 1.5022; Validation Accuracy: 44.5%

	Epoch: 4
	Training Loss: 3.0037; Training Accuracy: 44.375%
	Validation Loss: 1.4782; Validation Accuracy: 45.8%

	Epoch: 5
increasing learning rate of group 0 to 1.5000e-02.
	Training Loss: 2.8851; Training Accuracy: 47.5125%
	Validation Loss: 1.4441; Validation Accuracy: 48.15%

	Epoch: 6
	Training Loss: 2.8547; Training Accuracy: 48.8625%
	Validation Loss: 1.4999; Validation Accuracy: 46.35%

	Epoch: 7
	Training Loss: 2.7714; Training Accuracy: 50.4125%
	Validation Loss: 1.2952; Validation Accuracy: 53.8%



0,1
test_accuracy,▁

0,1
test_accuracy,0.7466


[34m[1mwandb[0m: Agent Starting Run: sa4gvpr3 with config:
[34m[1mwandb[0m: 	down_factor: 0.1
[34m[1mwandb[0m: 	down_patience: 3
[34m[1mwandb[0m: 	up_factor: 0.1
[34m[1mwandb[0m: 	up_patience: 1




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0393; Training Accuracy: 24.4875%
	Validation Loss: 1.8112; Validation Accuracy: 31.25%

	Epoch: 1
increasing learning rate of group 0 to 1.1000e-02.
	Training Loss: 3.5945; Training Accuracy: 32.7125%
	Validation Loss: 1.6713; Validation Accuracy: 38.55%

	Epoch: 2
	Training Loss: 3.3841; Training Accuracy: 36.95%
	Validation Loss: 1.6584; Validation Accuracy: 38.55%

	Epoch: 3
	Training Loss: 3.2252; Training Accuracy: 40.75%
	Validation Loss: 1.5621; Validation Accuracy: 42.75%

	Epoch: 4
increasing learning rate of group 0 to 1.2100e-02.
	Training Loss: 3.0737; Training Accuracy: 43.875%
	Validation Loss: 1.5122; Validation Accuracy: 46.5%

	Epoch: 5
	Training Loss: 2.9917; Training Accuracy: 45.35%
	Validation Loss: 1.5285; Validation Accuracy: 45.6%

	Epoch: 6
	Training Loss: 2.8577; Training Accuracy: 48.675%
	Validation Loss: 1.4375; Validation Accuracy: 48.15%

	Epoch: 7
increasing learning rate of group 0 to 1.3310e-02.
	Train

0,1
test_accuracy,▁

0,1
test_accuracy,0.7323


[34m[1mwandb[0m: Agent Starting Run: tgveimhv with config:
[34m[1mwandb[0m: 	down_factor: 0.1
[34m[1mwandb[0m: 	down_patience: 3
[34m[1mwandb[0m: 	up_factor: 0.1
[34m[1mwandb[0m: 	up_patience: 3




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1085; Training Accuracy: 23.4875%
	Validation Loss: 1.8731; Validation Accuracy: 30.35%

	Epoch: 1
	Training Loss: 3.6171; Training Accuracy: 32.325%
	Validation Loss: 1.7448; Validation Accuracy: 35.4%

	Epoch: 2
	Training Loss: 3.3932; Training Accuracy: 37.7%
	Validation Loss: 1.6395; Validation Accuracy: 41.5%

	Epoch: 3
increasing learning rate of group 0 to 1.1000e-02.
	Training Loss: 3.2265; Training Accuracy: 40.7625%
	Validation Loss: 1.5791; Validation Accuracy: 42.15%

	Epoch: 4
	Training Loss: 3.1177; Training Accuracy: 43.225%
	Validation Loss: 1.5625; Validation Accuracy: 44.0%

	Epoch: 5
	Training Loss: 2.9788; Training Accuracy: 45.4125%
	Validation Loss: 1.4734; Validation Accuracy: 45.2%

	Epoch: 6
	Training Loss: 2.8703; Training Accuracy: 47.7375%
	Validation Loss: 1.4579; Validation Accuracy: 47.7%

	Epoch: 7
increasing learning rate of group 0 to 1.2100e-02.
	Training Loss: 2.7529; Training Accuracy: 50.6125%
	Vali

0,1
test_accuracy,▁

0,1
test_accuracy,0.7107


[34m[1mwandb[0m: Agent Starting Run: l6vueoex with config:
[34m[1mwandb[0m: 	down_factor: 0.1
[34m[1mwandb[0m: 	down_patience: 3
[34m[1mwandb[0m: 	up_factor: 0.1
[34m[1mwandb[0m: 	up_patience: 5




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.2749; Training Accuracy: 21.1625%
	Validation Loss: 1.9042; Validation Accuracy: 27.55%

	Epoch: 1
	Training Loss: 3.7086; Training Accuracy: 30.35%
	Validation Loss: 1.7359; Validation Accuracy: 35.45%

	Epoch: 2
	Training Loss: 3.4509; Training Accuracy: 35.4375%
	Validation Loss: 1.674; Validation Accuracy: 38.8%

	Epoch: 3
	Training Loss: 3.2778; Training Accuracy: 39.325%
	Validation Loss: 1.6957; Validation Accuracy: 38.65%

	Epoch: 4
	Training Loss: 3.1422; Training Accuracy: 42.3875%
	Validation Loss: 1.5356; Validation Accuracy: 43.7%

	Epoch: 5
	Training Loss: 2.985; Training Accuracy: 45.225%
	Validation Loss: 1.4736; Validation Accuracy: 45.8%

	Epoch: 6
	Training Loss: 2.8664; Training Accuracy: 47.8%
	Validation Loss: 1.4559; Validation Accuracy: 48.15%

	Epoch: 7
	Training Loss: 2.7554; Training Accuracy: 50.3625%
	Validation Loss: 1.417; Validation Accuracy: 48.55%

	Epoch: 8
	Training Loss: 2.6529; Training Accuracy: 51

0,1
test_accuracy,▁

0,1
test_accuracy,0.6952


[34m[1mwandb[0m: Agent Starting Run: ylo8bthq with config:
[34m[1mwandb[0m: 	down_factor: 0.1
[34m[1mwandb[0m: 	down_patience: 3
[34m[1mwandb[0m: 	up_factor: 0.3
[34m[1mwandb[0m: 	up_patience: 1




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1212; Training Accuracy: 23.5625%
	Validation Loss: 1.8005; Validation Accuracy: 32.0%

	Epoch: 1
increasing learning rate of group 0 to 1.3000e-02.
	Training Loss: 3.6012; Training Accuracy: 33.0625%
	Validation Loss: 1.7371; Validation Accuracy: 36.25%

	Epoch: 2
	Training Loss: 3.4021; Training Accuracy: 37.3%
	Validation Loss: 1.6492; Validation Accuracy: 38.3%

	Epoch: 3
increasing learning rate of group 0 to 1.6900e-02.
	Training Loss: 3.2172; Training Accuracy: 41.35%
	Validation Loss: 1.5801; Validation Accuracy: 41.3%

	Epoch: 4
	Training Loss: 3.1478; Training Accuracy: 42.8875%
	Validation Loss: 1.5591; Validation Accuracy: 43.25%

	Epoch: 5
increasing learning rate of group 0 to 2.1970e-02.
	Training Loss: 2.942; Training Accuracy: 46.625%
	Validation Loss: 1.473; Validation Accuracy: 47.45%

	Epoch: 6
	Training Loss: 2.9138; Training Accuracy: 47.1%
	Validation Loss: 1.4486; Validation Accuracy: 46.9%

	Epoch: 7
	Training L

0,1
test_accuracy,▁

0,1
test_accuracy,0.6883


[34m[1mwandb[0m: Agent Starting Run: 4imkbvur with config:
[34m[1mwandb[0m: 	down_factor: 0.1
[34m[1mwandb[0m: 	down_patience: 3
[34m[1mwandb[0m: 	up_factor: 0.3
[34m[1mwandb[0m: 	up_patience: 3




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0712; Training Accuracy: 24.3875%
	Validation Loss: 1.889; Validation Accuracy: 28.95%

	Epoch: 1
	Training Loss: 3.6349; Training Accuracy: 32.7625%
	Validation Loss: 1.8615; Validation Accuracy: 32.65%

	Epoch: 2
	Training Loss: 3.4038; Training Accuracy: 36.9%
	Validation Loss: 1.6695; Validation Accuracy: 38.45%

	Epoch: 3
increasing learning rate of group 0 to 1.3000e-02.
	Training Loss: 3.2792; Training Accuracy: 39.9375%
	Validation Loss: 1.6675; Validation Accuracy: 39.55%

	Epoch: 4
	Training Loss: 3.1799; Training Accuracy: 41.9625%
	Validation Loss: 1.6993; Validation Accuracy: 39.35%

	Epoch: 5
	Training Loss: 3.0192; Training Accuracy: 45.05%
	Validation Loss: 1.4804; Validation Accuracy: 44.55%

	Epoch: 6
	Training Loss: 2.9123; Training Accuracy: 47.4875%
	Validation Loss: 1.5185; Validation Accuracy: 45.85%

	Epoch: 7
	Training Loss: 2.7993; Training Accuracy: 49.0625%
	Validation Loss: 1.362; Validation Accuracy: 50.35%

0,1
test_accuracy,▁

0,1
test_accuracy,0.6879


[34m[1mwandb[0m: Agent Starting Run: 4soj4pyk with config:
[34m[1mwandb[0m: 	down_factor: 0.1
[34m[1mwandb[0m: 	down_patience: 3
[34m[1mwandb[0m: 	up_factor: 0.3
[34m[1mwandb[0m: 	up_patience: 5




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1405; Training Accuracy: 23.675%
	Validation Loss: 1.9255; Validation Accuracy: 30.0%

	Epoch: 1
	Training Loss: 3.6541; Training Accuracy: 32.7875%
	Validation Loss: 1.7339; Validation Accuracy: 34.95%

	Epoch: 2
	Training Loss: 3.4292; Training Accuracy: 36.9125%
	Validation Loss: 1.6376; Validation Accuracy: 40.55%

	Epoch: 3
	Training Loss: 3.2608; Training Accuracy: 40.2875%
	Validation Loss: 1.5939; Validation Accuracy: 41.95%

	Epoch: 4
	Training Loss: 3.1087; Training Accuracy: 42.6875%
	Validation Loss: 1.5325; Validation Accuracy: 45.5%

	Epoch: 5
	Training Loss: 2.9959; Training Accuracy: 45.1%
	Validation Loss: 1.4853; Validation Accuracy: 44.8%

	Epoch: 6
	Training Loss: 2.8488; Training Accuracy: 48.1%
	Validation Loss: 1.5729; Validation Accuracy: 44.85%

	Epoch: 7
	Training Loss: 2.7343; Training Accuracy: 50.3625%
	Validation Loss: 1.4446; Validation Accuracy: 46.5%

	Epoch: 8
	Training Loss: 2.662; Training Accuracy: 5

VBox(children=(Label(value='0.001 MB of 0.010 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.110491…

0,1
test_accuracy,▁

0,1
test_accuracy,0.6922


[34m[1mwandb[0m: Agent Starting Run: gx8mm8zj with config:
[34m[1mwandb[0m: 	down_factor: 0.1
[34m[1mwandb[0m: 	down_patience: 3
[34m[1mwandb[0m: 	up_factor: 0.5
[34m[1mwandb[0m: 	up_patience: 1




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0963; Training Accuracy: 24.7%
	Validation Loss: 1.8974; Validation Accuracy: 29.3%

	Epoch: 1
increasing learning rate of group 0 to 1.5000e-02.
	Training Loss: 3.5572; Training Accuracy: 34.175%
	Validation Loss: 1.6987; Validation Accuracy: 37.6%

	Epoch: 2
	Training Loss: 3.4591; Training Accuracy: 36.4%
	Validation Loss: 1.6977; Validation Accuracy: 37.55%

	Epoch: 3
	Training Loss: 3.2764; Training Accuracy: 40.125%
	Validation Loss: 1.6382; Validation Accuracy: 40.8%

	Epoch: 4
increasing learning rate of group 0 to 2.2500e-02.
	Training Loss: 3.1022; Training Accuracy: 43.25%
	Validation Loss: 1.515; Validation Accuracy: 43.7%

	Epoch: 5
	Training Loss: 3.0872; Training Accuracy: 43.5875%
	Validation Loss: 1.5942; Validation Accuracy: 44.8%

	Epoch: 6
increasing learning rate of group 0 to 3.3750e-02.
	Training Loss: 2.9308; Training Accuracy: 47.5875%
	Validation Loss: 1.4809; Validation Accuracy: 45.8%

	Epoch: 7
	Training Los

0,1
test_accuracy,▁

0,1
test_accuracy,0.6889


[34m[1mwandb[0m: Agent Starting Run: 0lofh8x3 with config:
[34m[1mwandb[0m: 	down_factor: 0.1
[34m[1mwandb[0m: 	down_patience: 3
[34m[1mwandb[0m: 	up_factor: 0.5
[34m[1mwandb[0m: 	up_patience: 3




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0356; Training Accuracy: 24.6875%
	Validation Loss: 1.8698; Validation Accuracy: 31.05%

	Epoch: 1
	Training Loss: 3.6261; Training Accuracy: 32.4125%
	Validation Loss: 1.8782; Validation Accuracy: 31.95%

	Epoch: 2
	Training Loss: 3.4435; Training Accuracy: 37.1875%
	Validation Loss: 1.6654; Validation Accuracy: 40.25%

	Epoch: 3
increasing learning rate of group 0 to 1.5000e-02.
	Training Loss: 3.2851; Training Accuracy: 38.9125%
	Validation Loss: 1.5815; Validation Accuracy: 42.3%

	Epoch: 4
	Training Loss: 3.2711; Training Accuracy: 40.2625%
	Validation Loss: 1.6383; Validation Accuracy: 41.3%

	Epoch: 5
	Training Loss: 3.1599; Training Accuracy: 42.2875%
	Validation Loss: 1.6024; Validation Accuracy: 42.65%

	Epoch: 6
	Training Loss: 2.9722; Training Accuracy: 46.025%
	Validation Loss: 1.5143; Validation Accuracy: 44.55%

	Epoch: 7
	Training Loss: 2.8483; Training Accuracy: 47.75%
	Validation Loss: 1.4098; Validation Accuracy: 49.3

0,1
test_accuracy,▁

0,1
test_accuracy,0.7136


[34m[1mwandb[0m: Agent Starting Run: qo4k0shq with config:
[34m[1mwandb[0m: 	down_factor: 0.1
[34m[1mwandb[0m: 	down_patience: 3
[34m[1mwandb[0m: 	up_factor: 0.5
[34m[1mwandb[0m: 	up_patience: 5




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0938; Training Accuracy: 23.8375%
	Validation Loss: 1.8186; Validation Accuracy: 31.05%

	Epoch: 1
	Training Loss: 3.5636; Training Accuracy: 33.7875%
	Validation Loss: 1.7043; Validation Accuracy: 36.95%

	Epoch: 2
	Training Loss: 3.3653; Training Accuracy: 37.4875%
	Validation Loss: 1.6789; Validation Accuracy: 38.15%

	Epoch: 3
	Training Loss: 3.1372; Training Accuracy: 43.0875%
	Validation Loss: 1.5587; Validation Accuracy: 42.4%

	Epoch: 4
	Training Loss: 2.9779; Training Accuracy: 45.125%
	Validation Loss: 1.5249; Validation Accuracy: 42.45%

	Epoch: 5
increasing learning rate of group 0 to 1.5000e-02.
	Training Loss: 2.8548; Training Accuracy: 48.35%
	Validation Loss: 1.4133; Validation Accuracy: 49.25%

	Epoch: 6
	Training Loss: 2.8347; Training Accuracy: 48.875%
	Validation Loss: 1.5427; Validation Accuracy: 47.15%

	Epoch: 7
	Training Loss: 2.6576; Training Accuracy: 52.325%
	Validation Loss: 1.3004; Validation Accuracy: 54.85

0,1
test_accuracy,▁

0,1
test_accuracy,0.7204


[34m[1mwandb[0m: Agent Starting Run: wxa3mza1 with config:
[34m[1mwandb[0m: 	down_factor: 0.1
[34m[1mwandb[0m: 	down_patience: 5
[34m[1mwandb[0m: 	up_factor: 0.1
[34m[1mwandb[0m: 	up_patience: 1




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1324; Training Accuracy: 23.825%
	Validation Loss: 1.9091; Validation Accuracy: 29.3%

	Epoch: 1
increasing learning rate of group 0 to 1.1000e-02.
	Training Loss: 3.6219; Training Accuracy: 32.65%
	Validation Loss: 1.7639; Validation Accuracy: 34.1%

	Epoch: 2
	Training Loss: 3.4461; Training Accuracy: 36.1375%
	Validation Loss: 1.63; Validation Accuracy: 39.8%

	Epoch: 3
	Training Loss: 3.2394; Training Accuracy: 39.95%
	Validation Loss: 1.6427; Validation Accuracy: 39.75%

	Epoch: 4
	Training Loss: 3.0711; Training Accuracy: 43.725%
	Validation Loss: 1.5127; Validation Accuracy: 44.1%

	Epoch: 5
increasing learning rate of group 0 to 1.2100e-02.
	Training Loss: 2.9024; Training Accuracy: 46.525%
	Validation Loss: 1.5696; Validation Accuracy: 44.6%

	Epoch: 6
	Training Loss: 2.8367; Training Accuracy: 48.65%
	Validation Loss: 1.4584; Validation Accuracy: 49.05%

	Epoch: 7
increasing learning rate of group 0 to 1.3310e-02.
	Training Lo

0,1
test_accuracy,▁

0,1
test_accuracy,0.7117


[34m[1mwandb[0m: Agent Starting Run: pmrusgwa with config:
[34m[1mwandb[0m: 	down_factor: 0.1
[34m[1mwandb[0m: 	down_patience: 5
[34m[1mwandb[0m: 	up_factor: 0.1
[34m[1mwandb[0m: 	up_patience: 3




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1096; Training Accuracy: 23.675%
	Validation Loss: 1.902; Validation Accuracy: 29.85%

	Epoch: 1
	Training Loss: 3.6902; Training Accuracy: 31.2%
	Validation Loss: 1.7825; Validation Accuracy: 34.3%

	Epoch: 2
	Training Loss: 3.4603; Training Accuracy: 36.275%
	Validation Loss: 1.6259; Validation Accuracy: 40.45%

	Epoch: 3
	Training Loss: 3.2777; Training Accuracy: 40.2%
	Validation Loss: 1.6074; Validation Accuracy: 39.6%

	Epoch: 4
	Training Loss: 3.1403; Training Accuracy: 42.325%
	Validation Loss: 1.5014; Validation Accuracy: 45.1%

	Epoch: 5
	Training Loss: 2.9812; Training Accuracy: 45.725%
	Validation Loss: 1.5348; Validation Accuracy: 44.35%

	Epoch: 6
	Training Loss: 2.8411; Training Accuracy: 48.75%
	Validation Loss: 1.5368; Validation Accuracy: 46.8%

	Epoch: 7
	Training Loss: 2.7633; Training Accuracy: 49.8%
	Validation Loss: 1.4148; Validation Accuracy: 51.35%

	Epoch: 8
	Training Loss: 2.6678; Training Accuracy: 52.0875%


0,1
test_accuracy,▁

0,1
test_accuracy,0.7038


[34m[1mwandb[0m: Agent Starting Run: jq0c9snq with config:
[34m[1mwandb[0m: 	down_factor: 0.1
[34m[1mwandb[0m: 	down_patience: 5
[34m[1mwandb[0m: 	up_factor: 0.1
[34m[1mwandb[0m: 	up_patience: 5




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0593; Training Accuracy: 24.5125%
	Validation Loss: 1.8379; Validation Accuracy: 30.9%

	Epoch: 1
	Training Loss: 3.5671; Training Accuracy: 33.075%
	Validation Loss: 1.8526; Validation Accuracy: 33.55%

	Epoch: 2
	Training Loss: 3.352; Training Accuracy: 38.0625%
	Validation Loss: 1.6325; Validation Accuracy: 40.1%

	Epoch: 3
	Training Loss: 3.1916; Training Accuracy: 40.9625%
	Validation Loss: 1.5356; Validation Accuracy: 42.65%

	Epoch: 4
	Training Loss: 3.0318; Training Accuracy: 45.1%
	Validation Loss: 1.6031; Validation Accuracy: 41.6%

	Epoch: 5
	Training Loss: 2.9003; Training Accuracy: 47.0%
	Validation Loss: 1.4742; Validation Accuracy: 45.85%

	Epoch: 6
	Training Loss: 2.7646; Training Accuracy: 49.925%
	Validation Loss: 1.3558; Validation Accuracy: 51.05%

	Epoch: 7
	Training Loss: 2.6643; Training Accuracy: 52.5%
	Validation Loss: 1.3157; Validation Accuracy: 52.1%

	Epoch: 8
	Training Loss: 2.5119; Training Accuracy: 54.98

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.7357


[34m[1mwandb[0m: Agent Starting Run: rjs18qwo with config:
[34m[1mwandb[0m: 	down_factor: 0.1
[34m[1mwandb[0m: 	down_patience: 5
[34m[1mwandb[0m: 	up_factor: 0.3
[34m[1mwandb[0m: 	up_patience: 1




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0901; Training Accuracy: 24.1%
	Validation Loss: 2.1099; Validation Accuracy: 24.55%

	Epoch: 1
increasing learning rate of group 0 to 1.3000e-02.
	Training Loss: 3.6675; Training Accuracy: 31.5625%
	Validation Loss: 1.7701; Validation Accuracy: 33.6%

	Epoch: 2
	Training Loss: 3.4701; Training Accuracy: 35.8%
	Validation Loss: 1.8011; Validation Accuracy: 33.1%

	Epoch: 3
	Training Loss: 3.3114; Training Accuracy: 39.3%
	Validation Loss: 1.5634; Validation Accuracy: 41.55%

	Epoch: 4
increasing learning rate of group 0 to 1.6900e-02.
	Training Loss: 3.1526; Training Accuracy: 42.3625%
	Validation Loss: 1.5217; Validation Accuracy: 44.6%

	Epoch: 5
	Training Loss: 3.0761; Training Accuracy: 43.6%
	Validation Loss: 1.5386; Validation Accuracy: 44.0%

	Epoch: 6
	Training Loss: 2.9357; Training Accuracy: 46.275%
	Validation Loss: 1.4513; Validation Accuracy: 47.3%

	Epoch: 7
	Training Loss: 2.7947; Training Accuracy: 49.7375%
	Validation L

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.6829


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ldqcsdsm with config:
[34m[1mwandb[0m: 	down_factor: 0.1
[34m[1mwandb[0m: 	down_patience: 5
[34m[1mwandb[0m: 	up_factor: 0.3
[34m[1mwandb[0m: 	up_patience: 3




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1007; Training Accuracy: 23.825%
	Validation Loss: 1.8438; Validation Accuracy: 29.55%

	Epoch: 1
	Training Loss: 3.6911; Training Accuracy: 31.8375%
	Validation Loss: 1.7706; Validation Accuracy: 35.2%

	Epoch: 2
	Training Loss: 3.4236; Training Accuracy: 36.8125%
	Validation Loss: 1.6369; Validation Accuracy: 39.6%

	Epoch: 3
increasing learning rate of group 0 to 1.3000e-02.
	Training Loss: 3.2643; Training Accuracy: 40.3125%
	Validation Loss: 1.6209; Validation Accuracy: 41.5%

	Epoch: 4
	Training Loss: 3.1741; Training Accuracy: 42.125%
	Validation Loss: 1.5418; Validation Accuracy: 44.6%

	Epoch: 5
	Training Loss: 2.9699; Training Accuracy: 45.8625%
	Validation Loss: 1.534; Validation Accuracy: 43.5%

	Epoch: 6
	Training Loss: 2.8659; Training Accuracy: 48.3875%
	Validation Loss: 1.5602; Validation Accuracy: 43.75%

	Epoch: 7
	Training Loss: 2.756; Training Accuracy: 50.95%
	Validation Loss: 1.3597; Validation Accuracy: 52.6%

	Ep

0,1
test_accuracy,▁

0,1
test_accuracy,0.7192


[34m[1mwandb[0m: Agent Starting Run: b77497tq with config:
[34m[1mwandb[0m: 	down_factor: 0.1
[34m[1mwandb[0m: 	down_patience: 5
[34m[1mwandb[0m: 	up_factor: 0.3
[34m[1mwandb[0m: 	up_patience: 5




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1534; Training Accuracy: 22.725%
	Validation Loss: 1.8725; Validation Accuracy: 29.05%

	Epoch: 1
	Training Loss: 3.6507; Training Accuracy: 30.95%
	Validation Loss: 1.7358; Validation Accuracy: 33.3%

	Epoch: 2
	Training Loss: 3.4288; Training Accuracy: 36.2125%
	Validation Loss: 1.6624; Validation Accuracy: 37.8%

	Epoch: 3
	Training Loss: 3.2379; Training Accuracy: 40.1%
	Validation Loss: 1.5977; Validation Accuracy: 39.55%

	Epoch: 4
	Training Loss: 3.1209; Training Accuracy: 42.5125%
	Validation Loss: 1.4486; Validation Accuracy: 46.55%

	Epoch: 5
	Training Loss: 2.9663; Training Accuracy: 46.425%
	Validation Loss: 1.4633; Validation Accuracy: 46.5%

	Epoch: 6
	Training Loss: 2.8523; Training Accuracy: 47.8125%
	Validation Loss: 1.4257; Validation Accuracy: 47.8%

	Epoch: 7
	Training Loss: 2.7404; Training Accuracy: 49.775%
	Validation Loss: 1.4915; Validation Accuracy: 46.5%

	Epoch: 8
	Training Loss: 2.6271; Training Accuracy: 52

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.7038


[34m[1mwandb[0m: Agent Starting Run: qgj0stgf with config:
[34m[1mwandb[0m: 	down_factor: 0.1
[34m[1mwandb[0m: 	down_patience: 5
[34m[1mwandb[0m: 	up_factor: 0.5
[34m[1mwandb[0m: 	up_patience: 1




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0466; Training Accuracy: 24.5%
	Validation Loss: 1.8905; Validation Accuracy: 31.3%

	Epoch: 1
increasing learning rate of group 0 to 1.5000e-02.
	Training Loss: 3.5883; Training Accuracy: 32.9875%
	Validation Loss: 1.7194; Validation Accuracy: 36.3%

	Epoch: 2
	Training Loss: 3.4443; Training Accuracy: 36.1%
	Validation Loss: 1.844; Validation Accuracy: 32.15%

	Epoch: 3
	Training Loss: 3.2936; Training Accuracy: 40.2125%
	Validation Loss: 1.5567; Validation Accuracy: 42.55%

	Epoch: 4
increasing learning rate of group 0 to 2.2500e-02.
	Training Loss: 3.1228; Training Accuracy: 42.625%
	Validation Loss: 1.518; Validation Accuracy: 44.25%

	Epoch: 5
	Training Loss: 3.1005; Training Accuracy: 43.4375%
	Validation Loss: 1.5738; Validation Accuracy: 43.7%

	Epoch: 6
	Training Loss: 2.9541; Training Accuracy: 47.0%
	Validation Loss: 1.461; Validation Accuracy: 48.6%

	Epoch: 7
increasing learning rate of group 0 to 3.3750e-02.
	Training Los

0,1
test_accuracy,▁

0,1
test_accuracy,0.5843


[34m[1mwandb[0m: Agent Starting Run: 0eopk6qc with config:
[34m[1mwandb[0m: 	down_factor: 0.1
[34m[1mwandb[0m: 	down_patience: 5
[34m[1mwandb[0m: 	up_factor: 0.5
[34m[1mwandb[0m: 	up_patience: 3




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0796; Training Accuracy: 24.2375%
	Validation Loss: 1.905; Validation Accuracy: 29.4%

	Epoch: 1
	Training Loss: 3.6065; Training Accuracy: 33.0875%
	Validation Loss: 1.7288; Validation Accuracy: 37.55%

	Epoch: 2
	Training Loss: 3.3611; Training Accuracy: 38.2375%
	Validation Loss: 1.6156; Validation Accuracy: 43.2%

	Epoch: 3
	Training Loss: 3.2036; Training Accuracy: 41.1125%
	Validation Loss: 1.5657; Validation Accuracy: 43.2%

	Epoch: 4
	Training Loss: 3.0292; Training Accuracy: 44.3625%
	Validation Loss: 1.5065; Validation Accuracy: 44.4%

	Epoch: 5
	Training Loss: 2.8884; Training Accuracy: 47.15%
	Validation Loss: 1.4634; Validation Accuracy: 46.25%

	Epoch: 6
	Training Loss: 2.7483; Training Accuracy: 49.8625%
	Validation Loss: 1.4324; Validation Accuracy: 48.1%

	Epoch: 7
increasing learning rate of group 0 to 1.5000e-02.
	Training Loss: 2.6531; Training Accuracy: 52.6125%
	Validation Loss: 1.305; Validation Accuracy: 51.9%

	

0,1
test_accuracy,▁

0,1
test_accuracy,0.7151


[34m[1mwandb[0m: Agent Starting Run: x9vkf2k8 with config:
[34m[1mwandb[0m: 	down_factor: 0.1
[34m[1mwandb[0m: 	down_patience: 5
[34m[1mwandb[0m: 	up_factor: 0.5
[34m[1mwandb[0m: 	up_patience: 5




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0794; Training Accuracy: 24.1375%
	Validation Loss: 1.8412; Validation Accuracy: 30.75%

	Epoch: 1
	Training Loss: 3.5478; Training Accuracy: 34.125%
	Validation Loss: 1.7342; Validation Accuracy: 35.1%

	Epoch: 2
	Training Loss: 3.3198; Training Accuracy: 38.775%
	Validation Loss: 1.6194; Validation Accuracy: 40.55%

	Epoch: 3
	Training Loss: 3.1938; Training Accuracy: 41.7625%
	Validation Loss: 1.517; Validation Accuracy: 45.75%

	Epoch: 4
	Training Loss: 3.0182; Training Accuracy: 45.425%
	Validation Loss: 1.5478; Validation Accuracy: 43.5%

	Epoch: 5
	Training Loss: 2.8748; Training Accuracy: 48.0875%
	Validation Loss: 1.4038; Validation Accuracy: 47.85%

	Epoch: 6
	Training Loss: 2.7531; Training Accuracy: 50.4375%
	Validation Loss: 1.3783; Validation Accuracy: 49.75%

	Epoch: 7
	Training Loss: 2.6226; Training Accuracy: 52.9875%
	Validation Loss: 1.4025; Validation Accuracy: 49.75%

	Epoch: 8
	Training Loss: 2.487; Training Accura

0,1
test_accuracy,▁

0,1
test_accuracy,0.7211


[34m[1mwandb[0m: Agent Starting Run: j33bqzlj with config:
[34m[1mwandb[0m: 	down_factor: 0.3
[34m[1mwandb[0m: 	down_patience: 1
[34m[1mwandb[0m: 	up_factor: 0.1
[34m[1mwandb[0m: 	up_patience: 1




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1698; Training Accuracy: 22.75%
	Validation Loss: 1.8578; Validation Accuracy: 32.05%

	Epoch: 1
increasing learning rate of group 0 to 1.1000e-02.
	Training Loss: 3.6168; Training Accuracy: 32.9%
	Validation Loss: 1.7214; Validation Accuracy: 37.85%

	Epoch: 2
	Training Loss: 3.4066; Training Accuracy: 36.5875%
	Validation Loss: 1.6806; Validation Accuracy: 37.05%

	Epoch: 3
	Training Loss: 3.2406; Training Accuracy: 40.25%
	Validation Loss: 1.5863; Validation Accuracy: 41.35%

	Epoch: 4
increasing learning rate of group 0 to 1.2100e-02.
	Training Loss: 3.0861; Training Accuracy: 43.675%
	Validation Loss: 1.5134; Validation Accuracy: 45.7%

	Epoch: 5
	Training Loss: 2.9322; Training Accuracy: 47.2625%
	Validation Loss: 1.4001; Validation Accuracy: 49.1%

	Epoch: 6
	Training Loss: 2.8041; Training Accuracy: 49.3125%
	Validation Loss: 1.4764; Validation Accuracy: 46.55%

	Epoch: 7
	Training Loss: 2.658; Training Accuracy: 51.9875%
	Valid

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.7422


[34m[1mwandb[0m: Agent Starting Run: mg3nrd5s with config:
[34m[1mwandb[0m: 	down_factor: 0.3
[34m[1mwandb[0m: 	down_patience: 1
[34m[1mwandb[0m: 	up_factor: 0.1
[34m[1mwandb[0m: 	up_patience: 3




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1181; Training Accuracy: 23.6375%
	Validation Loss: 1.8919; Validation Accuracy: 28.1%

	Epoch: 1
	Training Loss: 3.6433; Training Accuracy: 33.05%
	Validation Loss: 1.8081; Validation Accuracy: 34.9%

	Epoch: 2
	Training Loss: 3.4143; Training Accuracy: 36.475%
	Validation Loss: 1.5986; Validation Accuracy: 41.9%

	Epoch: 3
increasing learning rate of group 0 to 1.1000e-02.
	Training Loss: 3.2096; Training Accuracy: 40.4%
	Validation Loss: 1.5496; Validation Accuracy: 44.3%

	Epoch: 4
	Training Loss: 3.076; Training Accuracy: 43.9625%
	Validation Loss: 1.5511; Validation Accuracy: 44.75%

	Epoch: 5
	Training Loss: 2.9447; Training Accuracy: 46.7%
	Validation Loss: 1.4773; Validation Accuracy: 44.9%

	Epoch: 6
	Training Loss: 2.8252; Training Accuracy: 49.55%
	Validation Loss: 1.5038; Validation Accuracy: 46.25%

	Epoch: 7
increasing learning rate of group 0 to 1.2100e-02.
	Training Loss: 2.7018; Training Accuracy: 50.9125%
	Validation 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.736


[34m[1mwandb[0m: Agent Starting Run: wci4nrux with config:
[34m[1mwandb[0m: 	down_factor: 0.3
[34m[1mwandb[0m: 	down_patience: 1
[34m[1mwandb[0m: 	up_factor: 0.1
[34m[1mwandb[0m: 	up_patience: 5




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1003; Training Accuracy: 23.075%
	Validation Loss: 1.8798; Validation Accuracy: 28.6%

	Epoch: 1
	Training Loss: 3.5983; Training Accuracy: 33.2875%
	Validation Loss: 1.713; Validation Accuracy: 35.2%

	Epoch: 2
	Training Loss: 3.3736; Training Accuracy: 37.3375%
	Validation Loss: 1.5875; Validation Accuracy: 42.25%

	Epoch: 3
	Training Loss: 3.1825; Training Accuracy: 41.9%
	Validation Loss: 1.5713; Validation Accuracy: 43.75%

	Epoch: 4
	Training Loss: 3.0366; Training Accuracy: 43.9375%
	Validation Loss: 1.427; Validation Accuracy: 48.25%

	Epoch: 5
	Training Loss: 2.8896; Training Accuracy: 47.9125%
	Validation Loss: 1.489; Validation Accuracy: 47.15%

	Epoch: 6
	Training Loss: 2.7529; Training Accuracy: 49.8375%
	Validation Loss: 1.4141; Validation Accuracy: 50.95%

	Epoch: 7
	Training Loss: 2.6121; Training Accuracy: 53.0625%
	Validation Loss: 1.3578; Validation Accuracy: 51.1%

	Epoch: 8
	Training Loss: 2.5359; Training Accuracy:

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.7427


[34m[1mwandb[0m: Agent Starting Run: 6xp95s2d with config:
[34m[1mwandb[0m: 	down_factor: 0.3
[34m[1mwandb[0m: 	down_patience: 1
[34m[1mwandb[0m: 	up_factor: 0.3
[34m[1mwandb[0m: 	up_patience: 1




Num Params: 4891338


	Epoch: 0
	Training Loss: 3.9914; Training Accuracy: 25.8125%
	Validation Loss: 2.0061; Validation Accuracy: 30.1%

	Epoch: 1
increasing learning rate of group 0 to 1.3000e-02.
	Training Loss: 3.5725; Training Accuracy: 34.2875%
	Validation Loss: 1.7382; Validation Accuracy: 36.75%

	Epoch: 2
	Training Loss: 3.4187; Training Accuracy: 36.6875%
	Validation Loss: 1.6719; Validation Accuracy: 40.0%

	Epoch: 3
	Training Loss: 3.2487; Training Accuracy: 39.575%
	Validation Loss: 1.6208; Validation Accuracy: 39.55%

	Epoch: 4
	Training Loss: 3.0814; Training Accuracy: 43.6125%
	Validation Loss: 1.5691; Validation Accuracy: 42.6%

	Epoch: 5
increasing learning rate of group 0 to 1.6900e-02.
	Training Loss: 2.9583; Training Accuracy: 46.4875%
	Validation Loss: 1.4217; Validation Accuracy: 47.5%

	Epoch: 6
	Training Loss: 2.8969; Training Accuracy: 47.6125%
	Validation Loss: 1.4914; Validation Accuracy: 45.75%

	Epoch: 7
	Training Loss: 2.7582; Training Accuracy: 50.2875%


0,1
test_accuracy,▁

0,1
test_accuracy,0.7504


[34m[1mwandb[0m: Agent Starting Run: ehafs5u8 with config:
[34m[1mwandb[0m: 	down_factor: 0.3
[34m[1mwandb[0m: 	down_patience: 1
[34m[1mwandb[0m: 	up_factor: 0.3
[34m[1mwandb[0m: 	up_patience: 3




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0198; Training Accuracy: 26.15%
	Validation Loss: 1.7882; Validation Accuracy: 33.25%

	Epoch: 1
	Training Loss: 3.5578; Training Accuracy: 34.55%
	Validation Loss: 1.722; Validation Accuracy: 34.55%

	Epoch: 2
	Training Loss: 3.3601; Training Accuracy: 37.1625%
	Validation Loss: 1.5924; Validation Accuracy: 40.1%

	Epoch: 3
increasing learning rate of group 0 to 1.3000e-02.
	Training Loss: 3.2023; Training Accuracy: 41.0625%
	Validation Loss: 1.5624; Validation Accuracy: 41.65%

	Epoch: 4
	Training Loss: 3.1169; Training Accuracy: 43.3125%
	Validation Loss: 1.5322; Validation Accuracy: 44.25%

	Epoch: 5
	Training Loss: 2.97; Training Accuracy: 46.0625%
	Validation Loss: 1.6896; Validation Accuracy: 42.6%

	Epoch: 6
	Training Loss: 2.8377; Training Accuracy: 48.0625%
	Validation Loss: 1.4415; Validation Accuracy: 45.3%

	Epoch: 7
	Training Loss: 2.7101; Training Accuracy: 51.175%
	Validation Loss: 1.4015; Validation Accuracy: 49.55%

	E

0,1
test_accuracy,▁

0,1
test_accuracy,0.7145


[34m[1mwandb[0m: Agent Starting Run: qq2agkiu with config:
[34m[1mwandb[0m: 	down_factor: 0.3
[34m[1mwandb[0m: 	down_patience: 1
[34m[1mwandb[0m: 	up_factor: 0.3
[34m[1mwandb[0m: 	up_patience: 5




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0852; Training Accuracy: 24.5125%
	Validation Loss: 2.1377; Validation Accuracy: 26.65%

	Epoch: 1
	Training Loss: 3.5921; Training Accuracy: 33.7375%
	Validation Loss: 1.8099; Validation Accuracy: 31.95%

	Epoch: 2
	Training Loss: 3.3869; Training Accuracy: 37.5375%
	Validation Loss: 1.635; Validation Accuracy: 39.85%

	Epoch: 3
	Training Loss: 3.2183; Training Accuracy: 41.025%
	Validation Loss: 1.6183; Validation Accuracy: 40.95%

	Epoch: 4
	Training Loss: 3.0828; Training Accuracy: 43.3875%
	Validation Loss: 1.4927; Validation Accuracy: 46.25%

	Epoch: 5
	Training Loss: 2.9687; Training Accuracy: 46.0%
	Validation Loss: 1.5902; Validation Accuracy: 43.25%

	Epoch: 6
	Training Loss: 2.8538; Training Accuracy: 48.975%
	Validation Loss: 1.4377; Validation Accuracy: 48.8%

	Epoch: 7
	Training Loss: 2.7183; Training Accuracy: 51.0625%
	Validation Loss: 1.3527; Validation Accuracy: 51.0%

	Epoch: 8
	Training Loss: 2.6147; Training Accurac

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.7324


[34m[1mwandb[0m: Agent Starting Run: b4y8o61o with config:
[34m[1mwandb[0m: 	down_factor: 0.3
[34m[1mwandb[0m: 	down_patience: 1
[34m[1mwandb[0m: 	up_factor: 0.5
[34m[1mwandb[0m: 	up_patience: 1




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0249; Training Accuracy: 24.9375%
	Validation Loss: 1.8643; Validation Accuracy: 30.45%

	Epoch: 1
increasing learning rate of group 0 to 1.5000e-02.
	Training Loss: 3.568; Training Accuracy: 33.85%
	Validation Loss: 1.6947; Validation Accuracy: 36.9%

	Epoch: 2
	Training Loss: 3.4501; Training Accuracy: 35.675%
	Validation Loss: 1.7627; Validation Accuracy: 35.0%

	Epoch: 3
	Training Loss: 3.2827; Training Accuracy: 39.875%
	Validation Loss: 1.5857; Validation Accuracy: 42.95%

	Epoch: 4
increasing learning rate of group 0 to 2.2500e-02.
	Training Loss: 3.0937; Training Accuracy: 43.5875%
	Validation Loss: 1.5078; Validation Accuracy: 44.75%

	Epoch: 5
	Training Loss: 3.091; Training Accuracy: 43.3875%
	Validation Loss: 1.5231; Validation Accuracy: 45.2%

	Epoch: 6
increasing learning rate of group 0 to 3.3750e-02.
	Training Loss: 2.897; Training Accuracy: 47.6%
	Validation Loss: 1.4623; Validation Accuracy: 48.5%

	Epoch: 7
	Training 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.7148


[34m[1mwandb[0m: Agent Starting Run: dmbujjts with config:
[34m[1mwandb[0m: 	down_factor: 0.3
[34m[1mwandb[0m: 	down_patience: 1
[34m[1mwandb[0m: 	up_factor: 0.5
[34m[1mwandb[0m: 	up_patience: 3




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1666; Training Accuracy: 22.7125%
	Validation Loss: 1.8915; Validation Accuracy: 28.85%

	Epoch: 1
	Training Loss: 3.671; Training Accuracy: 31.125%
	Validation Loss: 1.6737; Validation Accuracy: 36.55%

	Epoch: 2
	Training Loss: 3.4129; Training Accuracy: 36.2%
	Validation Loss: 1.6654; Validation Accuracy: 38.3%

	Epoch: 3
increasing learning rate of group 0 to 1.5000e-02.
	Training Loss: 3.2496; Training Accuracy: 39.925%
	Validation Loss: 1.5822; Validation Accuracy: 41.2%

	Epoch: 4
	Training Loss: 3.2052; Training Accuracy: 41.525%
	Validation Loss: 1.5815; Validation Accuracy: 40.95%

	Epoch: 5
	Training Loss: 3.0285; Training Accuracy: 44.7%
	Validation Loss: 1.4466; Validation Accuracy: 47.25%

	Epoch: 6
	Training Loss: 2.8903; Training Accuracy: 47.225%
	Validation Loss: 1.3549; Validation Accuracy: 51.35%

	Epoch: 7
	Training Loss: 2.7375; Training Accuracy: 50.425%
	Validation Loss: 1.3314; Validation Accuracy: 51.45%

	Epoc

0,1
test_accuracy,▁

0,1
test_accuracy,0.7414


[34m[1mwandb[0m: Agent Starting Run: ezjn12mu with config:
[34m[1mwandb[0m: 	down_factor: 0.3
[34m[1mwandb[0m: 	down_patience: 1
[34m[1mwandb[0m: 	up_factor: 0.5
[34m[1mwandb[0m: 	up_patience: 5




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0398; Training Accuracy: 24.4875%
	Validation Loss: 1.9177; Validation Accuracy: 31.0%

	Epoch: 1
	Training Loss: 3.5672; Training Accuracy: 33.5625%
	Validation Loss: 1.7871; Validation Accuracy: 35.0%

	Epoch: 2
	Training Loss: 3.3803; Training Accuracy: 36.4625%
	Validation Loss: 1.6061; Validation Accuracy: 41.65%

	Epoch: 3
	Training Loss: 3.2036; Training Accuracy: 41.4625%
	Validation Loss: 1.5466; Validation Accuracy: 42.6%

	Epoch: 4
	Training Loss: 3.0579; Training Accuracy: 44.2875%
	Validation Loss: 1.6855; Validation Accuracy: 41.75%

	Epoch: 5
	Training Loss: 2.9369; Training Accuracy: 46.225%
	Validation Loss: 1.443; Validation Accuracy: 46.25%

	Epoch: 6
	Training Loss: 2.8229; Training Accuracy: 48.9375%
	Validation Loss: 1.4267; Validation Accuracy: 48.2%

	Epoch: 7
	Training Loss: 2.7126; Training Accuracy: 51.375%
	Validation Loss: 1.3184; Validation Accuracy: 51.4%

	Epoch: 8
	Training Loss: 2.608; Training Accuracy

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.6948


[34m[1mwandb[0m: Agent Starting Run: k5a30gmp with config:
[34m[1mwandb[0m: 	down_factor: 0.3
[34m[1mwandb[0m: 	down_patience: 3
[34m[1mwandb[0m: 	up_factor: 0.1
[34m[1mwandb[0m: 	up_patience: 1




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1382; Training Accuracy: 22.35%
	Validation Loss: 1.996; Validation Accuracy: 25.6%

	Epoch: 1
increasing learning rate of group 0 to 1.1000e-02.
	Training Loss: 3.6216; Training Accuracy: 32.4375%
	Validation Loss: 1.7658; Validation Accuracy: 35.15%

	Epoch: 2
	Training Loss: 3.3927; Training Accuracy: 37.9%
	Validation Loss: 1.659; Validation Accuracy: 38.9%

	Epoch: 3
increasing learning rate of group 0 to 1.2100e-02.
	Training Loss: 3.1701; Training Accuracy: 41.7625%
	Validation Loss: 1.5794; Validation Accuracy: 41.45%

	Epoch: 4
	Training Loss: 3.0767; Training Accuracy: 43.4125%
	Validation Loss: 1.5652; Validation Accuracy: 40.9%

	Epoch: 5
	Training Loss: 2.9428; Training Accuracy: 46.6625%
	Validation Loss: 1.397; Validation Accuracy: 50.05%

	Epoch: 6
	Training Loss: 2.8171; Training Accuracy: 48.2375%
	Validation Loss: 1.3658; Validation Accuracy: 49.6%

	Epoch: 7
	Training Loss: 2.7021; Training Accuracy: 51.15%
	Validati

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.7211


[34m[1mwandb[0m: Agent Starting Run: upbck8yg with config:
[34m[1mwandb[0m: 	down_factor: 0.3
[34m[1mwandb[0m: 	down_patience: 3
[34m[1mwandb[0m: 	up_factor: 0.1
[34m[1mwandb[0m: 	up_patience: 3




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0356; Training Accuracy: 25.55%
	Validation Loss: 1.8457; Validation Accuracy: 30.35%

	Epoch: 1
	Training Loss: 3.5371; Training Accuracy: 34.3875%
	Validation Loss: 1.7991; Validation Accuracy: 34.95%

	Epoch: 2
	Training Loss: 3.3139; Training Accuracy: 39.175%
	Validation Loss: 1.6076; Validation Accuracy: 41.05%

	Epoch: 3
increasing learning rate of group 0 to 1.1000e-02.
	Training Loss: 3.1931; Training Accuracy: 41.8375%
	Validation Loss: 1.5359; Validation Accuracy: 44.05%

	Epoch: 4
	Training Loss: 3.0505; Training Accuracy: 44.125%
	Validation Loss: 1.4951; Validation Accuracy: 44.5%

	Epoch: 5
	Training Loss: 2.8755; Training Accuracy: 47.8375%
	Validation Loss: 1.4144; Validation Accuracy: 48.75%

	Epoch: 6
	Training Loss: 2.7532; Training Accuracy: 50.2125%
	Validation Loss: 1.4283; Validation Accuracy: 49.4%

	Epoch: 7
increasing learning rate of group 0 to 1.2100e-02.
	Training Loss: 2.6511; Training Accuracy: 52.5125%
	

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.108333…

0,1
test_accuracy,▁

0,1
test_accuracy,0.6185


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: zotybwls with config:
[34m[1mwandb[0m: 	down_factor: 0.3
[34m[1mwandb[0m: 	down_patience: 3
[34m[1mwandb[0m: 	up_factor: 0.1
[34m[1mwandb[0m: 	up_patience: 5




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.128; Training Accuracy: 22.65%
	Validation Loss: 1.9171; Validation Accuracy: 28.25%

	Epoch: 1
	Training Loss: 3.6324; Training Accuracy: 32.25%
	Validation Loss: 1.7528; Validation Accuracy: 34.8%

	Epoch: 2
	Training Loss: 3.4091; Training Accuracy: 36.975%
	Validation Loss: 1.6433; Validation Accuracy: 40.0%

	Epoch: 3
	Training Loss: 3.1777; Training Accuracy: 41.3%
	Validation Loss: 1.662; Validation Accuracy: 39.65%

	Epoch: 4
	Training Loss: 3.0593; Training Accuracy: 44.0125%
	Validation Loss: 1.5396; Validation Accuracy: 42.2%

	Epoch: 5
	Training Loss: 2.9168; Training Accuracy: 46.7375%
	Validation Loss: 1.421; Validation Accuracy: 49.6%

	Epoch: 6
	Training Loss: 2.7718; Training Accuracy: 49.8625%
	Validation Loss: 1.3762; Validation Accuracy: 51.2%

	Epoch: 7
	Training Loss: 2.6516; Training Accuracy: 51.65%
	Validation Loss: 1.3073; Validation Accuracy: 51.7%

	Epoch: 8
	Training Loss: 2.5195; Training Accuracy: 54.8125%

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.7297


[34m[1mwandb[0m: Agent Starting Run: qoetsibm with config:
[34m[1mwandb[0m: 	down_factor: 0.3
[34m[1mwandb[0m: 	down_patience: 3
[34m[1mwandb[0m: 	up_factor: 0.3
[34m[1mwandb[0m: 	up_patience: 1




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0699; Training Accuracy: 24.7%
	Validation Loss: 1.907; Validation Accuracy: 29.4%

	Epoch: 1
increasing learning rate of group 0 to 1.3000e-02.
	Training Loss: 3.6325; Training Accuracy: 32.825%
	Validation Loss: 1.7704; Validation Accuracy: 35.65%

	Epoch: 2
	Training Loss: 3.4762; Training Accuracy: 36.0125%
	Validation Loss: 1.718; Validation Accuracy: 38.65%

	Epoch: 3
increasing learning rate of group 0 to 1.6900e-02.
	Training Loss: 3.2942; Training Accuracy: 39.6%
	Validation Loss: 1.5837; Validation Accuracy: 42.45%

	Epoch: 4
	Training Loss: 3.2502; Training Accuracy: 40.25%
	Validation Loss: 1.535; Validation Accuracy: 43.6%

	Epoch: 5
	Training Loss: 3.087; Training Accuracy: 43.2%
	Validation Loss: 1.5399; Validation Accuracy: 42.85%

	Epoch: 6
	Training Loss: 2.9285; Training Accuracy: 46.675%
	Validation Loss: 1.5111; Validation Accuracy: 45.3%

	Epoch: 7
increasing learning rate of group 0 to 2.1970e-02.
	Training Loss: 

0,1
test_accuracy,▁

0,1
test_accuracy,0.6925


[34m[1mwandb[0m: Agent Starting Run: jpaq2a14 with config:
[34m[1mwandb[0m: 	down_factor: 0.3
[34m[1mwandb[0m: 	down_patience: 3
[34m[1mwandb[0m: 	up_factor: 0.3
[34m[1mwandb[0m: 	up_patience: 3




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1566; Training Accuracy: 22.55%
	Validation Loss: 1.8721; Validation Accuracy: 31.4%

	Epoch: 1
	Training Loss: 3.6649; Training Accuracy: 31.7%
	Validation Loss: 1.7679; Validation Accuracy: 33.9%

	Epoch: 2
	Training Loss: 3.4357; Training Accuracy: 37.05%
	Validation Loss: 1.742; Validation Accuracy: 37.25%

	Epoch: 3
increasing learning rate of group 0 to 1.3000e-02.
	Training Loss: 3.2502; Training Accuracy: 40.525%
	Validation Loss: 1.6147; Validation Accuracy: 41.4%

	Epoch: 4
	Training Loss: 3.223; Training Accuracy: 40.9125%
	Validation Loss: 1.5692; Validation Accuracy: 40.9%

	Epoch: 5
	Training Loss: 3.0668; Training Accuracy: 44.35%
	Validation Loss: 1.6327; Validation Accuracy: 40.25%

	Epoch: 6
	Training Loss: 2.9452; Training Accuracy: 46.6625%
	Validation Loss: 1.4421; Validation Accuracy: 47.1%

	Epoch: 7
	Training Loss: 2.7924; Training Accuracy: 49.8375%
	Validation Loss: 1.4525; Validation Accuracy: 47.15%

	Epoch: 

0,1
test_accuracy,▁

0,1
test_accuracy,0.6836


[34m[1mwandb[0m: Agent Starting Run: zf5it1im with config:
[34m[1mwandb[0m: 	down_factor: 0.3
[34m[1mwandb[0m: 	down_patience: 3
[34m[1mwandb[0m: 	up_factor: 0.3
[34m[1mwandb[0m: 	up_patience: 5




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1229; Training Accuracy: 23.4875%
	Validation Loss: 1.8585; Validation Accuracy: 30.9%

	Epoch: 1
	Training Loss: 3.6362; Training Accuracy: 32.4375%
	Validation Loss: 1.7479; Validation Accuracy: 36.75%

	Epoch: 2
	Training Loss: 3.3755; Training Accuracy: 37.575%
	Validation Loss: 1.6328; Validation Accuracy: 40.65%

	Epoch: 3
	Training Loss: 3.2251; Training Accuracy: 41.075%
	Validation Loss: 1.6039; Validation Accuracy: 42.15%

	Epoch: 4
	Training Loss: 3.0831; Training Accuracy: 43.675%
	Validation Loss: 1.4637; Validation Accuracy: 46.65%

	Epoch: 5
increasing learning rate of group 0 to 1.3000e-02.
	Training Loss: 2.9344; Training Accuracy: 47.3125%
	Validation Loss: 1.4805; Validation Accuracy: 47.2%

	Epoch: 6
	Training Loss: 2.8965; Training Accuracy: 47.2875%
	Validation Loss: 1.4314; Validation Accuracy: 46.8%

	Epoch: 7
	Training Loss: 2.7412; Training Accuracy: 50.9125%
	Validation Loss: 1.3495; Validation Accuracy: 52.6%

0,1
test_accuracy,▁

0,1
test_accuracy,0.6788


[34m[1mwandb[0m: Agent Starting Run: sby2tekz with config:
[34m[1mwandb[0m: 	down_factor: 0.3
[34m[1mwandb[0m: 	down_patience: 3
[34m[1mwandb[0m: 	up_factor: 0.5
[34m[1mwandb[0m: 	up_patience: 1




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1449; Training Accuracy: 23.275%
	Validation Loss: 1.8899; Validation Accuracy: 27.2%

	Epoch: 1
increasing learning rate of group 0 to 1.5000e-02.
	Training Loss: 3.6886; Training Accuracy: 31.7%
	Validation Loss: 1.7141; Validation Accuracy: 36.3%

	Epoch: 2
	Training Loss: 3.531; Training Accuracy: 34.425%
	Validation Loss: 1.7727; Validation Accuracy: 35.4%

	Epoch: 3
	Training Loss: 3.3167; Training Accuracy: 38.2625%
	Validation Loss: 1.6034; Validation Accuracy: 40.4%

	Epoch: 4
increasing learning rate of group 0 to 2.2500e-02.
	Training Loss: 3.1691; Training Accuracy: 41.6375%
	Validation Loss: 1.5873; Validation Accuracy: 40.7%

	Epoch: 5
	Training Loss: 3.1577; Training Accuracy: 42.1%
	Validation Loss: 1.5779; Validation Accuracy: 42.35%

	Epoch: 6
increasing learning rate of group 0 to 3.3750e-02.
	Training Loss: 2.9951; Training Accuracy: 45.6375%
	Validation Loss: 1.4296; Validation Accuracy: 48.35%

	Epoch: 7
	Training 

0,1
test_accuracy,▁

0,1
test_accuracy,0.6687


[34m[1mwandb[0m: Agent Starting Run: nc30sx2p with config:
[34m[1mwandb[0m: 	down_factor: 0.3
[34m[1mwandb[0m: 	down_patience: 3
[34m[1mwandb[0m: 	up_factor: 0.5
[34m[1mwandb[0m: 	up_patience: 3




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0128; Training Accuracy: 25.5125%
	Validation Loss: 1.9227; Validation Accuracy: 31.2%

	Epoch: 1
	Training Loss: 3.592; Training Accuracy: 33.225%
	Validation Loss: 1.9064; Validation Accuracy: 33.25%

	Epoch: 2
	Training Loss: 3.3571; Training Accuracy: 38.1625%
	Validation Loss: 1.8217; Validation Accuracy: 34.9%

	Epoch: 3
increasing learning rate of group 0 to 1.5000e-02.
	Training Loss: 3.1842; Training Accuracy: 41.35%
	Validation Loss: 1.6183; Validation Accuracy: 40.55%

	Epoch: 4
	Training Loss: 3.1527; Training Accuracy: 42.125%
	Validation Loss: 1.5429; Validation Accuracy: 43.25%

	Epoch: 5
	Training Loss: 3.0184; Training Accuracy: 44.8%
	Validation Loss: 1.478; Validation Accuracy: 46.3%

	Epoch: 6
	Training Loss: 2.8817; Training Accuracy: 47.45%
	Validation Loss: 1.5303; Validation Accuracy: 46.2%

	Epoch: 7
	Training Loss: 2.7305; Training Accuracy: 50.4375%
	Validation Loss: 1.3605; Validation Accuracy: 49.5%

	Epoch:

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.108508…

0,1
test_accuracy,▁

0,1
test_accuracy,0.6887


[34m[1mwandb[0m: Agent Starting Run: s0c1jrvc with config:
[34m[1mwandb[0m: 	down_factor: 0.3
[34m[1mwandb[0m: 	down_patience: 3
[34m[1mwandb[0m: 	up_factor: 0.5
[34m[1mwandb[0m: 	up_patience: 5




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0658; Training Accuracy: 24.1875%
	Validation Loss: 1.8609; Validation Accuracy: 29.9%

	Epoch: 1
	Training Loss: 3.5938; Training Accuracy: 33.2875%
	Validation Loss: 1.8166; Validation Accuracy: 33.25%

	Epoch: 2
	Training Loss: 3.3832; Training Accuracy: 37.125%
	Validation Loss: 1.6557; Validation Accuracy: 37.25%

	Epoch: 3
	Training Loss: 3.2225; Training Accuracy: 40.6875%
	Validation Loss: 1.6373; Validation Accuracy: 40.0%

	Epoch: 4
	Training Loss: 3.1091; Training Accuracy: 43.425%
	Validation Loss: 1.5082; Validation Accuracy: 44.75%

	Epoch: 5
increasing learning rate of group 0 to 1.5000e-02.
	Training Loss: 2.9699; Training Accuracy: 45.8%
	Validation Loss: 1.426; Validation Accuracy: 47.9%

	Epoch: 6
	Training Loss: 2.9754; Training Accuracy: 45.7625%
	Validation Loss: 1.4137; Validation Accuracy: 49.0%

	Epoch: 7
	Training Loss: 2.8127; Training Accuracy: 48.7625%
	Validation Loss: 1.4574; Validation Accuracy: 47.85%

	

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.7066


[34m[1mwandb[0m: Agent Starting Run: 9aeylvx3 with config:
[34m[1mwandb[0m: 	down_factor: 0.3
[34m[1mwandb[0m: 	down_patience: 5
[34m[1mwandb[0m: 	up_factor: 0.1
[34m[1mwandb[0m: 	up_patience: 1




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1769; Training Accuracy: 22.2375%
	Validation Loss: 1.9937; Validation Accuracy: 24.25%

	Epoch: 1
increasing learning rate of group 0 to 1.1000e-02.
	Training Loss: 3.6694; Training Accuracy: 31.8875%
	Validation Loss: 1.8578; Validation Accuracy: 32.95%

	Epoch: 2
	Training Loss: 3.4909; Training Accuracy: 35.2%
	Validation Loss: 1.6873; Validation Accuracy: 37.55%

	Epoch: 3
increasing learning rate of group 0 to 1.2100e-02.
	Training Loss: 3.2994; Training Accuracy: 39.8%
	Validation Loss: 1.5923; Validation Accuracy: 42.65%

	Epoch: 4
	Training Loss: 3.1628; Training Accuracy: 42.9125%
	Validation Loss: 1.5647; Validation Accuracy: 42.65%

	Epoch: 5
	Training Loss: 3.0014; Training Accuracy: 44.9125%
	Validation Loss: 1.4968; Validation Accuracy: 46.5%

	Epoch: 6
	Training Loss: 2.9112; Training Accuracy: 47.35%
	Validation Loss: 1.432; Validation Accuracy: 46.35%

	Epoch: 7
	Training Loss: 2.7925; Training Accuracy: 49.4%
	Validat

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.104697…

0,1
test_accuracy,▁

0,1
test_accuracy,0.6945


[34m[1mwandb[0m: Agent Starting Run: 8r05f74p with config:
[34m[1mwandb[0m: 	down_factor: 0.3
[34m[1mwandb[0m: 	down_patience: 5
[34m[1mwandb[0m: 	up_factor: 0.1
[34m[1mwandb[0m: 	up_patience: 3




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0415; Training Accuracy: 24.7875%
	Validation Loss: 2.0041; Validation Accuracy: 27.7%

	Epoch: 1
	Training Loss: 3.5589; Training Accuracy: 34.5875%
	Validation Loss: 1.7309; Validation Accuracy: 36.25%

	Epoch: 2
	Training Loss: 3.3458; Training Accuracy: 38.3625%
	Validation Loss: 1.6347; Validation Accuracy: 39.75%

	Epoch: 3
increasing learning rate of group 0 to 1.1000e-02.
	Training Loss: 3.166; Training Accuracy: 41.8375%
	Validation Loss: 1.5749; Validation Accuracy: 41.95%

	Epoch: 4
	Training Loss: 3.032; Training Accuracy: 44.9875%
	Validation Loss: 1.5797; Validation Accuracy: 42.95%

	Epoch: 5
	Training Loss: 2.8978; Training Accuracy: 47.95%
	Validation Loss: 1.5425; Validation Accuracy: 43.7%

	Epoch: 6
	Training Loss: 2.7672; Training Accuracy: 50.075%
	Validation Loss: 1.3654; Validation Accuracy: 50.1%

	Epoch: 7
increasing learning rate of group 0 to 1.2100e-02.
	Training Loss: 2.6546; Training Accuracy: 52.3625%
	Va

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.695


[34m[1mwandb[0m: Agent Starting Run: nsdasm90 with config:
[34m[1mwandb[0m: 	down_factor: 0.3
[34m[1mwandb[0m: 	down_patience: 5
[34m[1mwandb[0m: 	up_factor: 0.1
[34m[1mwandb[0m: 	up_patience: 5




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0694; Training Accuracy: 23.4125%
	Validation Loss: 1.8409; Validation Accuracy: 31.4%

	Epoch: 1
	Training Loss: 3.5816; Training Accuracy: 33.1625%
	Validation Loss: 1.7119; Validation Accuracy: 34.35%

	Epoch: 2
	Training Loss: 3.3816; Training Accuracy: 36.725%
	Validation Loss: 1.5992; Validation Accuracy: 40.45%

	Epoch: 3
	Training Loss: 3.1968; Training Accuracy: 40.8375%
	Validation Loss: 1.5484; Validation Accuracy: 43.15%

	Epoch: 4
	Training Loss: 3.0267; Training Accuracy: 44.175%
	Validation Loss: 1.4658; Validation Accuracy: 45.7%

	Epoch: 5
increasing learning rate of group 0 to 1.1000e-02.
	Training Loss: 2.8545; Training Accuracy: 47.5125%
	Validation Loss: 1.4444; Validation Accuracy: 48.25%

	Epoch: 6
	Training Loss: 2.7973; Training Accuracy: 49.3125%
	Validation Loss: 1.4323; Validation Accuracy: 47.15%

	Epoch: 7
	Training Loss: 2.6613; Training Accuracy: 52.2375%
	Validation Loss: 1.4352; Validation Accuracy: 50.

VBox(children=(Label(value='0.001 MB of 0.010 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.109788…

0,1
test_accuracy,▁

0,1
test_accuracy,0.716


[34m[1mwandb[0m: Agent Starting Run: s4b62f11 with config:
[34m[1mwandb[0m: 	down_factor: 0.3
[34m[1mwandb[0m: 	down_patience: 5
[34m[1mwandb[0m: 	up_factor: 0.3
[34m[1mwandb[0m: 	up_patience: 1




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0518; Training Accuracy: 24.5125%
	Validation Loss: 1.798; Validation Accuracy: 32.0%

	Epoch: 1
increasing learning rate of group 0 to 1.3000e-02.
	Training Loss: 3.5626; Training Accuracy: 33.6875%
	Validation Loss: 1.6992; Validation Accuracy: 38.35%

	Epoch: 2
	Training Loss: 3.3587; Training Accuracy: 38.3%
	Validation Loss: 1.6122; Validation Accuracy: 40.1%

	Epoch: 3
increasing learning rate of group 0 to 1.6900e-02.
	Training Loss: 3.1575; Training Accuracy: 41.3875%
	Validation Loss: 1.6369; Validation Accuracy: 40.7%

	Epoch: 4
	Training Loss: 3.0541; Training Accuracy: 44.0625%
	Validation Loss: 1.4811; Validation Accuracy: 46.3%

	Epoch: 5
increasing learning rate of group 0 to 2.1970e-02.
	Training Loss: 2.9236; Training Accuracy: 46.375%
	Validation Loss: 1.4399; Validation Accuracy: 48.8%

	Epoch: 6
	Training Loss: 2.8559; Training Accuracy: 48.175%
	Validation Loss: 1.6312; Validation Accuracy: 43.0%

	Epoch: 7
	Trainin

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.7221


[34m[1mwandb[0m: Agent Starting Run: raniyg5i with config:
[34m[1mwandb[0m: 	down_factor: 0.3
[34m[1mwandb[0m: 	down_patience: 5
[34m[1mwandb[0m: 	up_factor: 0.3
[34m[1mwandb[0m: 	up_patience: 3




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1337; Training Accuracy: 22.4125%
	Validation Loss: 1.9326; Validation Accuracy: 30.45%

	Epoch: 1
	Training Loss: 3.6336; Training Accuracy: 32.3625%
	Validation Loss: 1.7193; Validation Accuracy: 35.75%

	Epoch: 2
	Training Loss: 3.3836; Training Accuracy: 37.3375%
	Validation Loss: 1.6068; Validation Accuracy: 39.95%

	Epoch: 3
increasing learning rate of group 0 to 1.3000e-02.
	Training Loss: 3.2262; Training Accuracy: 40.9%
	Validation Loss: 1.5192; Validation Accuracy: 43.7%

	Epoch: 4
	Training Loss: 3.1529; Training Accuracy: 42.4%
	Validation Loss: 1.5223; Validation Accuracy: 44.85%

	Epoch: 5
	Training Loss: 2.9851; Training Accuracy: 45.625%
	Validation Loss: 1.4559; Validation Accuracy: 47.45%

	Epoch: 6
	Training Loss: 2.8391; Training Accuracy: 47.9%
	Validation Loss: 1.3925; Validation Accuracy: 51.15%

	Epoch: 7
	Training Loss: 2.7163; Training Accuracy: 50.85%
	Validation Loss: 1.4633; Validation Accuracy: 48.15%

	Epo

0,1
test_accuracy,▁

0,1
test_accuracy,0.7189


[34m[1mwandb[0m: Agent Starting Run: 33ylkhoj with config:
[34m[1mwandb[0m: 	down_factor: 0.3
[34m[1mwandb[0m: 	down_patience: 5
[34m[1mwandb[0m: 	up_factor: 0.3
[34m[1mwandb[0m: 	up_patience: 5




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0436; Training Accuracy: 24.8625%
	Validation Loss: 1.8529; Validation Accuracy: 30.8%

	Epoch: 1
	Training Loss: 3.6059; Training Accuracy: 33.4625%
	Validation Loss: 1.6805; Validation Accuracy: 39.45%

	Epoch: 2
	Training Loss: 3.3979; Training Accuracy: 37.3125%
	Validation Loss: 1.6234; Validation Accuracy: 39.75%

	Epoch: 3
	Training Loss: 3.2318; Training Accuracy: 39.925%
	Validation Loss: 1.6721; Validation Accuracy: 40.25%

	Epoch: 4
	Training Loss: 3.0952; Training Accuracy: 43.6%
	Validation Loss: 1.6061; Validation Accuracy: 40.65%

	Epoch: 5
increasing learning rate of group 0 to 1.3000e-02.
	Training Loss: 2.95; Training Accuracy: 45.65%
	Validation Loss: 1.4518; Validation Accuracy: 46.85%

	Epoch: 6
	Training Loss: 2.9498; Training Accuracy: 45.8875%
	Validation Loss: 1.4565; Validation Accuracy: 47.85%

	Epoch: 7
	Training Loss: 2.7666; Training Accuracy: 49.2625%
	Validation Loss: 1.3588; Validation Accuracy: 50.75%



0,1
test_accuracy,▁

0,1
test_accuracy,0.6912


[34m[1mwandb[0m: Agent Starting Run: w2ht1od5 with config:
[34m[1mwandb[0m: 	down_factor: 0.3
[34m[1mwandb[0m: 	down_patience: 5
[34m[1mwandb[0m: 	up_factor: 0.5
[34m[1mwandb[0m: 	up_patience: 1




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1358; Training Accuracy: 22.5125%
	Validation Loss: 1.9095; Validation Accuracy: 27.45%

	Epoch: 1
increasing learning rate of group 0 to 1.5000e-02.
	Training Loss: 3.6378; Training Accuracy: 32.6875%
	Validation Loss: 1.7823; Validation Accuracy: 33.75%

	Epoch: 2
	Training Loss: 3.4801; Training Accuracy: 35.4%
	Validation Loss: 1.7091; Validation Accuracy: 36.4%

	Epoch: 3
increasing learning rate of group 0 to 2.2500e-02.
	Training Loss: 3.2994; Training Accuracy: 38.5625%
	Validation Loss: 1.6042; Validation Accuracy: 41.6%

	Epoch: 4
	Training Loss: 3.2666; Training Accuracy: 40.15%
	Validation Loss: 1.6066; Validation Accuracy: 42.65%

	Epoch: 5
	Training Loss: 3.1133; Training Accuracy: 43.3875%
	Validation Loss: 1.5782; Validation Accuracy: 41.2%

	Epoch: 6
	Training Loss: 2.9604; Training Accuracy: 46.7375%
	Validation Loss: 1.5; Validation Accuracy: 44.6%

	Epoch: 7
increasing learning rate of group 0 to 3.3750e-02.
	Trainin

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.6566


[34m[1mwandb[0m: Agent Starting Run: 9henu1xe with config:
[34m[1mwandb[0m: 	down_factor: 0.3
[34m[1mwandb[0m: 	down_patience: 5
[34m[1mwandb[0m: 	up_factor: 0.5
[34m[1mwandb[0m: 	up_patience: 3




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.144; Training Accuracy: 23.5125%
	Validation Loss: 2.2795; Validation Accuracy: 24.8%

	Epoch: 1
	Training Loss: 3.6462; Training Accuracy: 32.8375%
	Validation Loss: 1.7684; Validation Accuracy: 34.25%

	Epoch: 2
	Training Loss: 3.4114; Training Accuracy: 36.375%
	Validation Loss: 1.7446; Validation Accuracy: 35.35%

	Epoch: 3
increasing learning rate of group 0 to 1.5000e-02.
	Training Loss: 3.2517; Training Accuracy: 39.55%
	Validation Loss: 1.6707; Validation Accuracy: 39.75%

	Epoch: 4
	Training Loss: 3.222; Training Accuracy: 40.5875%
	Validation Loss: 1.5347; Validation Accuracy: 44.1%

	Epoch: 5
	Training Loss: 3.0807; Training Accuracy: 43.625%
	Validation Loss: 1.5624; Validation Accuracy: 43.6%

	Epoch: 6
	Training Loss: 2.9203; Training Accuracy: 47.1%
	Validation Loss: 1.5261; Validation Accuracy: 46.75%

	Epoch: 7
	Training Loss: 2.7977; Training Accuracy: 49.55%
	Validation Loss: 1.405; Validation Accuracy: 50.5%

	Epoch:

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.6672


[34m[1mwandb[0m: Agent Starting Run: 56nvbzo3 with config:
[34m[1mwandb[0m: 	down_factor: 0.3
[34m[1mwandb[0m: 	down_patience: 5
[34m[1mwandb[0m: 	up_factor: 0.5
[34m[1mwandb[0m: 	up_patience: 5




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0707; Training Accuracy: 24.45%
	Validation Loss: 1.8471; Validation Accuracy: 32.6%

	Epoch: 1
	Training Loss: 3.5735; Training Accuracy: 33.8%
	Validation Loss: 1.7947; Validation Accuracy: 35.2%

	Epoch: 2
	Training Loss: 3.3702; Training Accuracy: 37.775%
	Validation Loss: 1.6014; Validation Accuracy: 41.25%

	Epoch: 3
	Training Loss: 3.1754; Training Accuracy: 41.525%
	Validation Loss: 1.5953; Validation Accuracy: 42.35%

	Epoch: 4
	Training Loss: 3.042; Training Accuracy: 44.95%
	Validation Loss: 1.4768; Validation Accuracy: 46.85%

	Epoch: 5
	Training Loss: 2.9073; Training Accuracy: 46.7125%
	Validation Loss: 1.4904; Validation Accuracy: 46.75%

	Epoch: 6
	Training Loss: 2.7539; Training Accuracy: 50.475%
	Validation Loss: 1.3775; Validation Accuracy: 49.9%

	Epoch: 7
	Training Loss: 2.6534; Training Accuracy: 52.0125%
	Validation Loss: 1.3689; Validation Accuracy: 50.75%

	Epoch: 8
	Training Loss: 2.5074; Training Accuracy: 54.

VBox(children=(Label(value='0.001 MB of 0.010 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.110451…

0,1
test_accuracy,▁

0,1
test_accuracy,0.7098


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: vljvmuyy with config:
[34m[1mwandb[0m: 	down_factor: 0.5
[34m[1mwandb[0m: 	down_patience: 1
[34m[1mwandb[0m: 	up_factor: 0.1
[34m[1mwandb[0m: 	up_patience: 1




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0893; Training Accuracy: 24.425%
	Validation Loss: 1.8177; Validation Accuracy: 32.5%

	Epoch: 1
increasing learning rate of group 0 to 1.1000e-02.
	Training Loss: 3.5433; Training Accuracy: 34.025%
	Validation Loss: 1.6587; Validation Accuracy: 38.0%

	Epoch: 2
	Training Loss: 3.3269; Training Accuracy: 39.275%
	Validation Loss: 1.7303; Validation Accuracy: 37.45%

	Epoch: 3
	Training Loss: 3.1486; Training Accuracy: 41.6875%
	Validation Loss: 1.5794; Validation Accuracy: 42.95%

	Epoch: 4
increasing learning rate of group 0 to 1.2100e-02.
	Training Loss: 2.9659; Training Accuracy: 45.975%
	Validation Loss: 1.5121; Validation Accuracy: 43.9%

	Epoch: 5
	Training Loss: 2.8743; Training Accuracy: 47.75%
	Validation Loss: 1.3837; Validation Accuracy: 49.45%

	Epoch: 6
increasing learning rate of group 0 to 1.3310e-02.
	Training Loss: 2.7694; Training Accuracy: 49.6625%
	Validation Loss: 1.3832; Validation Accuracy: 50.5%

	Epoch: 7
	Train

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.6716


[34m[1mwandb[0m: Agent Starting Run: 2q2s9eik with config:
[34m[1mwandb[0m: 	down_factor: 0.5
[34m[1mwandb[0m: 	down_patience: 1
[34m[1mwandb[0m: 	up_factor: 0.1
[34m[1mwandb[0m: 	up_patience: 3




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0666; Training Accuracy: 23.8125%
	Validation Loss: 1.8263; Validation Accuracy: 33.0%

	Epoch: 1
	Training Loss: 3.5766; Training Accuracy: 33.8625%
	Validation Loss: 1.8382; Validation Accuracy: 32.5%

	Epoch: 2
	Training Loss: 3.3409; Training Accuracy: 37.9875%
	Validation Loss: 1.6101; Validation Accuracy: 39.7%

	Epoch: 3
	Training Loss: 3.1881; Training Accuracy: 41.6375%
	Validation Loss: 1.7561; Validation Accuracy: 37.5%

	Epoch: 4
	Training Loss: 3.0297; Training Accuracy: 44.5%
	Validation Loss: 1.5503; Validation Accuracy: 44.85%

	Epoch: 5
	Training Loss: 2.9123; Training Accuracy: 46.9625%
	Validation Loss: 1.4035; Validation Accuracy: 48.85%

	Epoch: 6
	Training Loss: 2.7844; Training Accuracy: 49.175%
	Validation Loss: 1.6629; Validation Accuracy: 42.9%

	Epoch: 7
reducing learning rate of group 0 to 5.0000e-03.
	Training Loss: 2.6762; Training Accuracy: 51.8125%
	Validation Loss: 1.4835; Validation Accuracy: 47.1%

	Ep

0,1
test_accuracy,▁

0,1
test_accuracy,0.673


[34m[1mwandb[0m: Agent Starting Run: oditsc92 with config:
[34m[1mwandb[0m: 	down_factor: 0.5
[34m[1mwandb[0m: 	down_patience: 1
[34m[1mwandb[0m: 	up_factor: 0.1
[34m[1mwandb[0m: 	up_patience: 5




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0581; Training Accuracy: 25.4%
	Validation Loss: 1.8371; Validation Accuracy: 32.5%

	Epoch: 1
	Training Loss: 3.6256; Training Accuracy: 32.85%
	Validation Loss: 1.7443; Validation Accuracy: 34.55%

	Epoch: 2
	Training Loss: 3.3959; Training Accuracy: 37.3375%
	Validation Loss: 1.6406; Validation Accuracy: 39.6%

	Epoch: 3
	Training Loss: 3.1763; Training Accuracy: 40.85%
	Validation Loss: 1.6534; Validation Accuracy: 39.5%

	Epoch: 4
	Training Loss: 3.0655; Training Accuracy: 44.175%
	Validation Loss: 1.74; Validation Accuracy: 42.4%

	Epoch: 5
	Training Loss: 2.9398; Training Accuracy: 46.3%
	Validation Loss: 1.4976; Validation Accuracy: 46.45%

	Epoch: 6
	Training Loss: 2.7977; Training Accuracy: 49.15%
	Validation Loss: 1.4522; Validation Accuracy: 48.35%

	Epoch: 7
	Training Loss: 2.7178; Training Accuracy: 50.4625%
	Validation Loss: 1.3983; Validation Accuracy: 50.55%

	Epoch: 8
	Training Loss: 2.6264; Training Accuracy: 52.825%


0,1
test_accuracy,▁

0,1
test_accuracy,0.741


[34m[1mwandb[0m: Agent Starting Run: wmmrmf6a with config:
[34m[1mwandb[0m: 	down_factor: 0.5
[34m[1mwandb[0m: 	down_patience: 1
[34m[1mwandb[0m: 	up_factor: 0.3
[34m[1mwandb[0m: 	up_patience: 1




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0701; Training Accuracy: 24.8%
	Validation Loss: 1.8755; Validation Accuracy: 29.5%

	Epoch: 1
increasing learning rate of group 0 to 1.3000e-02.
	Training Loss: 3.6008; Training Accuracy: 33.225%
	Validation Loss: 1.6911; Validation Accuracy: 35.35%

	Epoch: 2
	Training Loss: 3.4014; Training Accuracy: 36.4375%
	Validation Loss: 1.6555; Validation Accuracy: 38.0%

	Epoch: 3
increasing learning rate of group 0 to 1.6900e-02.
	Training Loss: 3.2331; Training Accuracy: 40.7375%
	Validation Loss: 1.5857; Validation Accuracy: 43.05%

	Epoch: 4
	Training Loss: 3.1368; Training Accuracy: 42.8625%
	Validation Loss: 1.6696; Validation Accuracy: 38.05%

	Epoch: 5
	Training Loss: 2.9514; Training Accuracy: 45.525%
	Validation Loss: 1.4673; Validation Accuracy: 46.85%

	Epoch: 6
	Training Loss: 2.8222; Training Accuracy: 48.6%
	Validation Loss: 1.7039; Validation Accuracy: 42.45%

	Epoch: 7
	Training Loss: 2.7077; Training Accuracy: 51.025%
	Valid

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.105479…

0,1
test_accuracy,▁

0,1
test_accuracy,0.7659


[34m[1mwandb[0m: Agent Starting Run: 5kd7hrky with config:
[34m[1mwandb[0m: 	down_factor: 0.5
[34m[1mwandb[0m: 	down_patience: 1
[34m[1mwandb[0m: 	up_factor: 0.3
[34m[1mwandb[0m: 	up_patience: 3




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1315; Training Accuracy: 24.275%
	Validation Loss: 1.9196; Validation Accuracy: 31.55%

	Epoch: 1
	Training Loss: 3.6789; Training Accuracy: 32.1625%
	Validation Loss: 1.7937; Validation Accuracy: 34.85%

	Epoch: 2
	Training Loss: 3.4354; Training Accuracy: 37.4875%
	Validation Loss: 1.6652; Validation Accuracy: 38.45%

	Epoch: 3
increasing learning rate of group 0 to 1.3000e-02.
	Training Loss: 3.2804; Training Accuracy: 39.1625%
	Validation Loss: 1.6095; Validation Accuracy: 39.55%

	Epoch: 4
	Training Loss: 3.1721; Training Accuracy: 42.0625%
	Validation Loss: 1.5527; Validation Accuracy: 43.15%

	Epoch: 5
	Training Loss: 3.0256; Training Accuracy: 44.55%
	Validation Loss: 1.5085; Validation Accuracy: 43.9%

	Epoch: 6
	Training Loss: 2.8757; Training Accuracy: 47.775%
	Validation Loss: 1.396; Validation Accuracy: 48.75%

	Epoch: 7
increasing learning rate of group 0 to 1.6900e-02.
	Training Loss: 2.7723; Training Accuracy: 49.3875%
	

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.7522


[34m[1mwandb[0m: Agent Starting Run: t2d8ljxa with config:
[34m[1mwandb[0m: 	down_factor: 0.5
[34m[1mwandb[0m: 	down_patience: 1
[34m[1mwandb[0m: 	up_factor: 0.3
[34m[1mwandb[0m: 	up_patience: 5




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1542; Training Accuracy: 23.2%
	Validation Loss: 2.0866; Validation Accuracy: 28.65%

	Epoch: 1
	Training Loss: 3.6299; Training Accuracy: 31.8%
	Validation Loss: 1.6903; Validation Accuracy: 35.15%

	Epoch: 2
	Training Loss: 3.4064; Training Accuracy: 36.35%
	Validation Loss: 1.7398; Validation Accuracy: 39.2%

	Epoch: 3
	Training Loss: 3.256; Training Accuracy: 39.8%
	Validation Loss: 1.6586; Validation Accuracy: 38.95%

	Epoch: 4
	Training Loss: 3.1166; Training Accuracy: 43.15%
	Validation Loss: 1.6625; Validation Accuracy: 43.3%

	Epoch: 5
	Training Loss: 2.9688; Training Accuracy: 45.575%
	Validation Loss: 1.4463; Validation Accuracy: 48.8%

	Epoch: 6
	Training Loss: 2.8345; Training Accuracy: 48.2875%
	Validation Loss: 1.389; Validation Accuracy: 50.2%

	Epoch: 7
	Training Loss: 2.7197; Training Accuracy: 50.3375%
	Validation Loss: 1.4059; Validation Accuracy: 49.8%

	Epoch: 8
reducing learning rate of group 0 to 5.0000e-03.
	Tra

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.107869…

0,1
test_accuracy,▁

0,1
test_accuracy,0.7254


[34m[1mwandb[0m: Agent Starting Run: pqrmitpt with config:
[34m[1mwandb[0m: 	down_factor: 0.5
[34m[1mwandb[0m: 	down_patience: 1
[34m[1mwandb[0m: 	up_factor: 0.5
[34m[1mwandb[0m: 	up_patience: 1




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0198; Training Accuracy: 24.9625%
	Validation Loss: 1.8423; Validation Accuracy: 31.1%

	Epoch: 1
increasing learning rate of group 0 to 1.5000e-02.
	Training Loss: 3.5587; Training Accuracy: 33.775%
	Validation Loss: 1.6592; Validation Accuracy: 38.35%

	Epoch: 2
	Training Loss: 3.4686; Training Accuracy: 35.7625%
	Validation Loss: 1.6472; Validation Accuracy: 40.45%

	Epoch: 3
	Training Loss: 3.2457; Training Accuracy: 39.4125%
	Validation Loss: 1.6909; Validation Accuracy: 39.75%

	Epoch: 4
	Training Loss: 3.1297; Training Accuracy: 43.2%
	Validation Loss: 1.5176; Validation Accuracy: 44.95%

	Epoch: 5
	Training Loss: 2.9728; Training Accuracy: 45.95%
	Validation Loss: 1.5149; Validation Accuracy: 44.2%

	Epoch: 6
	Training Loss: 2.8328; Training Accuracy: 48.1%
	Validation Loss: 1.4132; Validation Accuracy: 48.85%

	Epoch: 7
	Training Loss: 2.7278; Training Accuracy: 50.475%
	Validation Loss: 1.4854; Validation Accuracy: 46.4%

	Epo

0,1
test_accuracy,▁

0,1
test_accuracy,0.7451


[34m[1mwandb[0m: Agent Starting Run: 307ybh49 with config:
[34m[1mwandb[0m: 	down_factor: 0.5
[34m[1mwandb[0m: 	down_patience: 1
[34m[1mwandb[0m: 	up_factor: 0.5
[34m[1mwandb[0m: 	up_patience: 3




Num Params: 4891338


	Epoch: 0
	Training Loss: 3.9791; Training Accuracy: 25.8625%
	Validation Loss: 1.799; Validation Accuracy: 34.35%

	Epoch: 1
	Training Loss: 3.5818; Training Accuracy: 33.175%
	Validation Loss: 1.8019; Validation Accuracy: 33.85%

	Epoch: 2
	Training Loss: 3.3838; Training Accuracy: 37.025%
	Validation Loss: 1.638; Validation Accuracy: 40.7%

	Epoch: 3
	Training Loss: 3.1976; Training Accuracy: 41.1625%
	Validation Loss: 1.5761; Validation Accuracy: 42.7%

	Epoch: 4
	Training Loss: 3.0844; Training Accuracy: 43.725%
	Validation Loss: 1.5143; Validation Accuracy: 43.75%

	Epoch: 5
increasing learning rate of group 0 to 1.5000e-02.
	Training Loss: 2.9382; Training Accuracy: 46.4875%
	Validation Loss: 1.4211; Validation Accuracy: 48.85%

	Epoch: 6
	Training Loss: 2.9383; Training Accuracy: 46.1625%
	Validation Loss: 1.5213; Validation Accuracy: 46.0%

	Epoch: 7
	Training Loss: 2.8391; Training Accuracy: 48.5625%
	Validation Loss: 1.415; Validation Accuracy: 49.4%

	

0,1
test_accuracy,▁

0,1
test_accuracy,0.7615


[34m[1mwandb[0m: Agent Starting Run: bi17zzsz with config:
[34m[1mwandb[0m: 	down_factor: 0.5
[34m[1mwandb[0m: 	down_patience: 1
[34m[1mwandb[0m: 	up_factor: 0.5
[34m[1mwandb[0m: 	up_patience: 5




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0228; Training Accuracy: 24.8625%
	Validation Loss: 1.7929; Validation Accuracy: 32.7%

	Epoch: 1
	Training Loss: 3.5664; Training Accuracy: 33.4375%
	Validation Loss: 1.7132; Validation Accuracy: 36.7%

	Epoch: 2
	Training Loss: 3.3396; Training Accuracy: 38.7125%
	Validation Loss: 1.6138; Validation Accuracy: 39.6%

	Epoch: 3
	Training Loss: 3.1914; Training Accuracy: 41.3625%
	Validation Loss: 1.6673; Validation Accuracy: 40.25%

	Epoch: 4
	Training Loss: 3.0364; Training Accuracy: 44.1875%
	Validation Loss: 1.4618; Validation Accuracy: 48.0%

	Epoch: 5
	Training Loss: 2.8628; Training Accuracy: 47.9375%
	Validation Loss: 1.4296; Validation Accuracy: 47.8%

	Epoch: 6
	Training Loss: 2.7339; Training Accuracy: 50.55%
	Validation Loss: 1.4008; Validation Accuracy: 50.2%

	Epoch: 7
	Training Loss: 2.6232; Training Accuracy: 52.8625%
	Validation Loss: 1.5221; Validation Accuracy: 45.1%

	Epoch: 8
	Training Loss: 2.5101; Training Accuracy

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.7332


[34m[1mwandb[0m: Agent Starting Run: 3nhbkotb with config:
[34m[1mwandb[0m: 	down_factor: 0.5
[34m[1mwandb[0m: 	down_patience: 3
[34m[1mwandb[0m: 	up_factor: 0.1
[34m[1mwandb[0m: 	up_patience: 1




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0313; Training Accuracy: 24.575%
	Validation Loss: 1.8291; Validation Accuracy: 28.85%

	Epoch: 1
increasing learning rate of group 0 to 1.1000e-02.
	Training Loss: 3.5566; Training Accuracy: 34.0125%
	Validation Loss: 1.7543; Validation Accuracy: 34.8%

	Epoch: 2
	Training Loss: 3.3655; Training Accuracy: 37.475%
	Validation Loss: 1.6225; Validation Accuracy: 39.55%

	Epoch: 3
increasing learning rate of group 0 to 1.2100e-02.
	Training Loss: 3.1946; Training Accuracy: 40.6625%
	Validation Loss: 1.5658; Validation Accuracy: 42.4%

	Epoch: 4
	Training Loss: 3.1005; Training Accuracy: 43.125%
	Validation Loss: 1.5025; Validation Accuracy: 45.75%

	Epoch: 5
	Training Loss: 2.9415; Training Accuracy: 47.025%
	Validation Loss: 1.7333; Validation Accuracy: 41.55%

	Epoch: 6
	Training Loss: 2.8162; Training Accuracy: 49.45%
	Validation Loss: 1.3803; Validation Accuracy: 50.0%

	Epoch: 7
increasing learning rate of group 0 to 1.3310e-02.
	Trai

0,1
test_accuracy,▁

0,1
test_accuracy,0.6812


[34m[1mwandb[0m: Agent Starting Run: zzzlepeb with config:
[34m[1mwandb[0m: 	down_factor: 0.5
[34m[1mwandb[0m: 	down_patience: 3
[34m[1mwandb[0m: 	up_factor: 0.1
[34m[1mwandb[0m: 	up_patience: 3




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0501; Training Accuracy: 24.4875%
	Validation Loss: 1.8482; Validation Accuracy: 30.55%

	Epoch: 1
	Training Loss: 3.568; Training Accuracy: 34.0625%
	Validation Loss: 1.8811; Validation Accuracy: 36.5%

	Epoch: 2
	Training Loss: 3.3311; Training Accuracy: 38.175%
	Validation Loss: 1.5911; Validation Accuracy: 41.65%

	Epoch: 3
increasing learning rate of group 0 to 1.1000e-02.
	Training Loss: 3.1521; Training Accuracy: 42.6125%
	Validation Loss: 1.6183; Validation Accuracy: 42.55%

	Epoch: 4
	Training Loss: 3.0685; Training Accuracy: 43.85%
	Validation Loss: 1.5499; Validation Accuracy: 44.1%

	Epoch: 5
	Training Loss: 2.9035; Training Accuracy: 47.175%
	Validation Loss: 1.3784; Validation Accuracy: 51.65%

	Epoch: 6
	Training Loss: 2.7657; Training Accuracy: 50.7875%
	Validation Loss: 1.4469; Validation Accuracy: 48.85%

	Epoch: 7
	Training Loss: 2.6298; Training Accuracy: 52.7125%
	Validation Loss: 1.3159; Validation Accuracy: 52.9%


0,1
test_accuracy,▁

0,1
test_accuracy,0.7502


[34m[1mwandb[0m: Agent Starting Run: i2gqen08 with config:
[34m[1mwandb[0m: 	down_factor: 0.5
[34m[1mwandb[0m: 	down_patience: 3
[34m[1mwandb[0m: 	up_factor: 0.1
[34m[1mwandb[0m: 	up_patience: 5




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1236; Training Accuracy: 23.95%
	Validation Loss: 1.8241; Validation Accuracy: 32.25%

	Epoch: 1
	Training Loss: 3.6017; Training Accuracy: 33.3125%
	Validation Loss: 1.6869; Validation Accuracy: 36.65%

	Epoch: 2
	Training Loss: 3.3629; Training Accuracy: 37.5375%
	Validation Loss: 1.6333; Validation Accuracy: 41.15%

	Epoch: 3
	Training Loss: 3.204; Training Accuracy: 41.2375%
	Validation Loss: 1.5411; Validation Accuracy: 42.9%

	Epoch: 4
	Training Loss: 3.05; Training Accuracy: 44.2375%
	Validation Loss: 1.5051; Validation Accuracy: 44.15%

	Epoch: 5
increasing learning rate of group 0 to 1.1000e-02.
	Training Loss: 2.9184; Training Accuracy: 47.4%
	Validation Loss: 1.3721; Validation Accuracy: 50.0%

	Epoch: 6
	Training Loss: 2.8085; Training Accuracy: 48.7375%
	Validation Loss: 1.4751; Validation Accuracy: 46.4%

	Epoch: 7
	Training Loss: 2.6942; Training Accuracy: 50.9625%
	Validation Loss: 1.4161; Validation Accuracy: 47.65%

	E

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.7134


[34m[1mwandb[0m: Agent Starting Run: z7065n7f with config:
[34m[1mwandb[0m: 	down_factor: 0.5
[34m[1mwandb[0m: 	down_patience: 3
[34m[1mwandb[0m: 	up_factor: 0.3
[34m[1mwandb[0m: 	up_patience: 1




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1064; Training Accuracy: 23.925%
	Validation Loss: 1.8967; Validation Accuracy: 30.1%

	Epoch: 1
increasing learning rate of group 0 to 1.3000e-02.
	Training Loss: 3.5678; Training Accuracy: 33.725%
	Validation Loss: 1.7192; Validation Accuracy: 35.7%

	Epoch: 2
	Training Loss: 3.4114; Training Accuracy: 36.45%
	Validation Loss: 1.7031; Validation Accuracy: 36.35%

	Epoch: 3
increasing learning rate of group 0 to 1.6900e-02.
	Training Loss: 3.2414; Training Accuracy: 40.325%
	Validation Loss: 1.6723; Validation Accuracy: 37.95%

	Epoch: 4
	Training Loss: 3.1199; Training Accuracy: 42.8875%
	Validation Loss: 1.6038; Validation Accuracy: 42.0%

	Epoch: 5
increasing learning rate of group 0 to 2.1970e-02.
	Training Loss: 2.9599; Training Accuracy: 46.3125%
	Validation Loss: 1.4089; Validation Accuracy: 48.65%

	Epoch: 6
	Training Loss: 2.9058; Training Accuracy: 48.0125%
	Validation Loss: 1.6155; Validation Accuracy: 44.0%

	Epoch: 7
	Trai

0,1
test_accuracy,▁

0,1
test_accuracy,0.6691


[34m[1mwandb[0m: Agent Starting Run: 0r6mjer2 with config:
[34m[1mwandb[0m: 	down_factor: 0.5
[34m[1mwandb[0m: 	down_patience: 3
[34m[1mwandb[0m: 	up_factor: 0.3
[34m[1mwandb[0m: 	up_patience: 3




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0854; Training Accuracy: 24.2125%
	Validation Loss: 1.8839; Validation Accuracy: 31.9%

	Epoch: 1
	Training Loss: 3.6592; Training Accuracy: 32.625%
	Validation Loss: 1.786; Validation Accuracy: 34.55%

	Epoch: 2
	Training Loss: 3.4072; Training Accuracy: 36.775%
	Validation Loss: 1.6339; Validation Accuracy: 40.85%

	Epoch: 3
	Training Loss: 3.2463; Training Accuracy: 40.8125%
	Validation Loss: 1.6075; Validation Accuracy: 39.85%

	Epoch: 4
	Training Loss: 3.0975; Training Accuracy: 43.2875%
	Validation Loss: 1.5152; Validation Accuracy: 43.6%

	Epoch: 5
	Training Loss: 2.9789; Training Accuracy: 46.15%
	Validation Loss: 1.4793; Validation Accuracy: 45.5%

	Epoch: 6
	Training Loss: 2.8368; Training Accuracy: 48.4875%
	Validation Loss: 1.426; Validation Accuracy: 49.4%

	Epoch: 7
increasing learning rate of group 0 to 1.3000e-02.
	Training Loss: 2.7348; Training Accuracy: 50.7%
	Validation Loss: 1.3831; Validation Accuracy: 51.2%

	Epoc

0,1
test_accuracy,▁

0,1
test_accuracy,0.717


[34m[1mwandb[0m: Agent Starting Run: ncv9xgwo with config:
[34m[1mwandb[0m: 	down_factor: 0.5
[34m[1mwandb[0m: 	down_patience: 3
[34m[1mwandb[0m: 	up_factor: 0.3
[34m[1mwandb[0m: 	up_patience: 5




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0911; Training Accuracy: 24.15%
	Validation Loss: 1.9603; Validation Accuracy: 29.85%

	Epoch: 1
	Training Loss: 3.59; Training Accuracy: 33.5125%
	Validation Loss: 1.7592; Validation Accuracy: 33.4%

	Epoch: 2
	Training Loss: 3.366; Training Accuracy: 37.8375%
	Validation Loss: 1.6535; Validation Accuracy: 40.45%

	Epoch: 3
	Training Loss: 3.206; Training Accuracy: 40.7%
	Validation Loss: 1.6805; Validation Accuracy: 39.55%

	Epoch: 4
	Training Loss: 3.0695; Training Accuracy: 44.2875%
	Validation Loss: 1.4602; Validation Accuracy: 46.9%

	Epoch: 5
	Training Loss: 2.9421; Training Accuracy: 46.5625%
	Validation Loss: 1.4606; Validation Accuracy: 47.95%

	Epoch: 6
	Training Loss: 2.8157; Training Accuracy: 49.425%
	Validation Loss: 1.3919; Validation Accuracy: 50.85%

	Epoch: 7
	Training Loss: 2.6408; Training Accuracy: 52.6%
	Validation Loss: 1.3604; Validation Accuracy: 51.45%

	Epoch: 8
	Training Loss: 2.5304; Training Accuracy: 55.0

0,1
test_accuracy,▁

0,1
test_accuracy,0.7068


[34m[1mwandb[0m: Agent Starting Run: nepox9lv with config:
[34m[1mwandb[0m: 	down_factor: 0.5
[34m[1mwandb[0m: 	down_patience: 3
[34m[1mwandb[0m: 	up_factor: 0.5
[34m[1mwandb[0m: 	up_patience: 1




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.069; Training Accuracy: 24.325%
	Validation Loss: 1.8272; Validation Accuracy: 31.65%

	Epoch: 1
increasing learning rate of group 0 to 1.5000e-02.
	Training Loss: 3.5932; Training Accuracy: 33.4375%
	Validation Loss: 1.7031; Validation Accuracy: 36.85%

	Epoch: 2
	Training Loss: 3.4564; Training Accuracy: 36.625%
	Validation Loss: 1.6856; Validation Accuracy: 37.25%

	Epoch: 3
increasing learning rate of group 0 to 2.2500e-02.
	Training Loss: 3.2528; Training Accuracy: 40.925%
	Validation Loss: 1.5537; Validation Accuracy: 42.8%

	Epoch: 4
	Training Loss: 3.1584; Training Accuracy: 42.4375%
	Validation Loss: 1.5907; Validation Accuracy: 44.9%

	Epoch: 5
increasing learning rate of group 0 to 3.3750e-02.
	Training Loss: 2.9819; Training Accuracy: 45.5125%
	Validation Loss: 1.4647; Validation Accuracy: 45.75%

	Epoch: 6
	Training Loss: 2.9612; Training Accuracy: 46.85%
	Validation Loss: 1.4461; Validation Accuracy: 47.7%

	Epoch: 7
incre

0,1
test_accuracy,▁

0,1
test_accuracy,0.5448


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ahesvw1t with config:
[34m[1mwandb[0m: 	down_factor: 0.5
[34m[1mwandb[0m: 	down_patience: 3
[34m[1mwandb[0m: 	up_factor: 0.5
[34m[1mwandb[0m: 	up_patience: 3




Num Params: 4891338


	Epoch: 0
	Training Loss: 3.9731; Training Accuracy: 26.5375%
	Validation Loss: 1.8111; Validation Accuracy: 32.9%

	Epoch: 1
	Training Loss: 3.5111; Training Accuracy: 34.8625%
	Validation Loss: 1.6808; Validation Accuracy: 37.65%

	Epoch: 2
	Training Loss: 3.2993; Training Accuracy: 39.0375%
	Validation Loss: 1.6486; Validation Accuracy: 40.0%

	Epoch: 3
increasing learning rate of group 0 to 1.5000e-02.
	Training Loss: 3.1835; Training Accuracy: 41.35%
	Validation Loss: 1.5964; Validation Accuracy: 41.25%

	Epoch: 4
	Training Loss: 3.163; Training Accuracy: 42.75%
	Validation Loss: 1.6832; Validation Accuracy: 39.55%

	Epoch: 5
	Training Loss: 2.9744; Training Accuracy: 45.95%
	Validation Loss: 1.5186; Validation Accuracy: 44.4%

	Epoch: 6
	Training Loss: 2.8455; Training Accuracy: 48.0625%
	Validation Loss: 1.3909; Validation Accuracy: 49.45%

	Epoch: 7
	Training Loss: 2.6965; Training Accuracy: 51.6375%
	Validation Loss: 1.4098; Validation Accuracy: 50.1%

	E

0,1
test_accuracy,▁

0,1
test_accuracy,0.7085


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: hpr146mr with config:
[34m[1mwandb[0m: 	down_factor: 0.5
[34m[1mwandb[0m: 	down_patience: 3
[34m[1mwandb[0m: 	up_factor: 0.5
[34m[1mwandb[0m: 	up_patience: 5




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1198; Training Accuracy: 23.0875%
	Validation Loss: 1.8231; Validation Accuracy: 30.2%

	Epoch: 1
	Training Loss: 3.608; Training Accuracy: 32.85%
	Validation Loss: 1.6836; Validation Accuracy: 37.85%

	Epoch: 2
	Training Loss: 3.3719; Training Accuracy: 37.6875%
	Validation Loss: 1.6682; Validation Accuracy: 37.0%

	Epoch: 3
	Training Loss: 3.2311; Training Accuracy: 40.775%
	Validation Loss: 1.5756; Validation Accuracy: 41.15%

	Epoch: 4
	Training Loss: 3.095; Training Accuracy: 43.3625%
	Validation Loss: 1.4951; Validation Accuracy: 45.25%

	Epoch: 5
	Training Loss: 2.9433; Training Accuracy: 45.825%
	Validation Loss: 1.5355; Validation Accuracy: 44.15%

	Epoch: 6
	Training Loss: 2.8128; Training Accuracy: 49.175%
	Validation Loss: 1.3997; Validation Accuracy: 49.2%

	Epoch: 7
	Training Loss: 2.7291; Training Accuracy: 50.875%
	Validation Loss: 1.4247; Validation Accuracy: 46.95%

	Epoch: 8
	Training Loss: 2.6035; Training Accuracy: 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.678


[34m[1mwandb[0m: Agent Starting Run: sek4cw2c with config:
[34m[1mwandb[0m: 	down_factor: 0.5
[34m[1mwandb[0m: 	down_patience: 5
[34m[1mwandb[0m: 	up_factor: 0.1
[34m[1mwandb[0m: 	up_patience: 1




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0058; Training Accuracy: 25.55%
	Validation Loss: 1.7995; Validation Accuracy: 30.8%

	Epoch: 1
increasing learning rate of group 0 to 1.1000e-02.
	Training Loss: 3.5763; Training Accuracy: 33.8125%
	Validation Loss: 1.7987; Validation Accuracy: 38.0%

	Epoch: 2
	Training Loss: 3.3765; Training Accuracy: 37.925%
	Validation Loss: 1.637; Validation Accuracy: 40.3%

	Epoch: 3
increasing learning rate of group 0 to 1.2100e-02.
	Training Loss: 3.1768; Training Accuracy: 41.975%
	Validation Loss: 1.6001; Validation Accuracy: 41.1%

	Epoch: 4
	Training Loss: 3.0815; Training Accuracy: 44.3625%
	Validation Loss: 1.4438; Validation Accuracy: 48.0%

	Epoch: 5
	Training Loss: 2.9204; Training Accuracy: 46.875%
	Validation Loss: 1.6469; Validation Accuracy: 41.4%

	Epoch: 6
	Training Loss: 2.8101; Training Accuracy: 49.2875%
	Validation Loss: 1.4354; Validation Accuracy: 49.85%

	Epoch: 7
increasing learning rate of group 0 to 1.3310e-02.
	Trainin

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.6688


[34m[1mwandb[0m: Agent Starting Run: r6gks750 with config:
[34m[1mwandb[0m: 	down_factor: 0.5
[34m[1mwandb[0m: 	down_patience: 5
[34m[1mwandb[0m: 	up_factor: 0.1
[34m[1mwandb[0m: 	up_patience: 3




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0621; Training Accuracy: 23.9375%
	Validation Loss: 1.9769; Validation Accuracy: 29.9%

	Epoch: 1
	Training Loss: 3.5946; Training Accuracy: 33.275%
	Validation Loss: 1.7265; Validation Accuracy: 36.7%

	Epoch: 2
	Training Loss: 3.3906; Training Accuracy: 37.4125%
	Validation Loss: 1.732; Validation Accuracy: 39.2%

	Epoch: 3
increasing learning rate of group 0 to 1.1000e-02.
	Training Loss: 3.2407; Training Accuracy: 40.35%
	Validation Loss: 1.5348; Validation Accuracy: 44.45%

	Epoch: 4
	Training Loss: 3.1302; Training Accuracy: 43.0125%
	Validation Loss: 1.5365; Validation Accuracy: 45.25%

	Epoch: 5
	Training Loss: 3.0025; Training Accuracy: 44.8%
	Validation Loss: 1.579; Validation Accuracy: 44.3%

	Epoch: 6
	Training Loss: 2.8962; Training Accuracy: 46.625%
	Validation Loss: 1.3959; Validation Accuracy: 49.15%

	Epoch: 7
	Training Loss: 2.755; Training Accuracy: 50.0875%
	Validation Loss: 1.3412; Validation Accuracy: 50.75%

	Epoc

0,1
test_accuracy,▁

0,1
test_accuracy,0.6964


[34m[1mwandb[0m: Agent Starting Run: 3ul36ndb with config:
[34m[1mwandb[0m: 	down_factor: 0.5
[34m[1mwandb[0m: 	down_patience: 5
[34m[1mwandb[0m: 	up_factor: 0.1
[34m[1mwandb[0m: 	up_patience: 5




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1786; Training Accuracy: 22.825%
	Validation Loss: 1.8898; Validation Accuracy: 30.05%

	Epoch: 1
	Training Loss: 3.6324; Training Accuracy: 32.325%
	Validation Loss: 1.789; Validation Accuracy: 34.0%

	Epoch: 2
	Training Loss: 3.4023; Training Accuracy: 37.4625%
	Validation Loss: 1.7697; Validation Accuracy: 35.4%

	Epoch: 3
	Training Loss: 3.237; Training Accuracy: 40.575%
	Validation Loss: 1.6128; Validation Accuracy: 39.4%

	Epoch: 4
	Training Loss: 3.087; Training Accuracy: 43.225%
	Validation Loss: 1.5389; Validation Accuracy: 43.15%

	Epoch: 5
increasing learning rate of group 0 to 1.1000e-02.
	Training Loss: 2.9635; Training Accuracy: 45.5875%
	Validation Loss: 1.4518; Validation Accuracy: 47.0%

	Epoch: 6
	Training Loss: 2.8686; Training Accuracy: 47.4875%
	Validation Loss: 1.4143; Validation Accuracy: 49.2%

	Epoch: 7
	Training Loss: 2.7711; Training Accuracy: 50.075%
	Validation Loss: 1.4505; Validation Accuracy: 48.1%

	Epoc

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.7124


[34m[1mwandb[0m: Agent Starting Run: 5y51jeoq with config:
[34m[1mwandb[0m: 	down_factor: 0.5
[34m[1mwandb[0m: 	down_patience: 5
[34m[1mwandb[0m: 	up_factor: 0.3
[34m[1mwandb[0m: 	up_patience: 1




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1358; Training Accuracy: 23.7625%
	Validation Loss: 1.8481; Validation Accuracy: 29.25%

	Epoch: 1
increasing learning rate of group 0 to 1.3000e-02.
	Training Loss: 3.6049; Training Accuracy: 32.9125%
	Validation Loss: 1.6992; Validation Accuracy: 36.55%

	Epoch: 2
	Training Loss: 3.4245; Training Accuracy: 37.025%
	Validation Loss: 1.6081; Validation Accuracy: 41.3%

	Epoch: 3
increasing learning rate of group 0 to 1.6900e-02.
	Training Loss: 3.2644; Training Accuracy: 40.3%
	Validation Loss: 1.5773; Validation Accuracy: 42.45%

	Epoch: 4
	Training Loss: 3.1819; Training Accuracy: 41.6125%
	Validation Loss: 1.6798; Validation Accuracy: 39.95%

	Epoch: 5
	Training Loss: 3.0554; Training Accuracy: 44.225%
	Validation Loss: 1.4819; Validation Accuracy: 47.5%

	Epoch: 6
increasing learning rate of group 0 to 2.1970e-02.
	Training Loss: 2.8806; Training Accuracy: 47.55%
	Validation Loss: 1.4398; Validation Accuracy: 49.5%

	Epoch: 7
	Train

0,1
test_accuracy,▁

0,1
test_accuracy,0.6697


[34m[1mwandb[0m: Agent Starting Run: svsru4hz with config:
[34m[1mwandb[0m: 	down_factor: 0.5
[34m[1mwandb[0m: 	down_patience: 5
[34m[1mwandb[0m: 	up_factor: 0.3
[34m[1mwandb[0m: 	up_patience: 3




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1169; Training Accuracy: 23.9625%
	Validation Loss: 1.8889; Validation Accuracy: 29.7%

	Epoch: 1
	Training Loss: 3.6755; Training Accuracy: 32.25%
	Validation Loss: 1.8005; Validation Accuracy: 32.6%

	Epoch: 2
	Training Loss: 3.4348; Training Accuracy: 37.0%
	Validation Loss: 1.6578; Validation Accuracy: 37.15%

	Epoch: 3
increasing learning rate of group 0 to 1.3000e-02.
	Training Loss: 3.2708; Training Accuracy: 39.95%
	Validation Loss: 1.5209; Validation Accuracy: 42.55%

	Epoch: 4
	Training Loss: 3.1658; Training Accuracy: 41.975%
	Validation Loss: 1.5829; Validation Accuracy: 42.65%

	Epoch: 5
	Training Loss: 3.0144; Training Accuracy: 45.075%
	Validation Loss: 1.4656; Validation Accuracy: 46.0%

	Epoch: 6
	Training Loss: 2.884; Training Accuracy: 47.2%
	Validation Loss: 1.4255; Validation Accuracy: 48.75%

	Epoch: 7
increasing learning rate of group 0 to 1.6900e-02.
	Training Loss: 2.7671; Training Accuracy: 50.05%
	Validation L

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.108197…

0,1
test_accuracy,▁

0,1
test_accuracy,0.7226


[34m[1mwandb[0m: Agent Starting Run: 6rsntt4y with config:
[34m[1mwandb[0m: 	down_factor: 0.5
[34m[1mwandb[0m: 	down_patience: 5
[34m[1mwandb[0m: 	up_factor: 0.3
[34m[1mwandb[0m: 	up_patience: 5




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.1837; Training Accuracy: 22.7125%
	Validation Loss: 2.1448; Validation Accuracy: 29.65%

	Epoch: 1
	Training Loss: 3.6443; Training Accuracy: 32.375%
	Validation Loss: 1.7359; Validation Accuracy: 35.1%

	Epoch: 2
	Training Loss: 3.4008; Training Accuracy: 37.025%
	Validation Loss: 2.0242; Validation Accuracy: 35.0%

	Epoch: 3
	Training Loss: 3.2729; Training Accuracy: 39.875%
	Validation Loss: 1.6151; Validation Accuracy: 40.75%

	Epoch: 4
	Training Loss: 3.0983; Training Accuracy: 42.975%
	Validation Loss: 1.5634; Validation Accuracy: 43.15%

	Epoch: 5
	Training Loss: 2.9985; Training Accuracy: 45.5375%
	Validation Loss: 1.4776; Validation Accuracy: 47.6%

	Epoch: 6
	Training Loss: 2.8746; Training Accuracy: 48.3875%
	Validation Loss: 1.4286; Validation Accuracy: 47.05%

	Epoch: 7
	Training Loss: 2.8075; Training Accuracy: 49.0875%
	Validation Loss: 1.4103; Validation Accuracy: 50.25%

	Epoch: 8
	Training Loss: 2.7149; Training Accura

0,1
test_accuracy,▁

0,1
test_accuracy,0.7073


[34m[1mwandb[0m: Agent Starting Run: uhfubnad with config:
[34m[1mwandb[0m: 	down_factor: 0.5
[34m[1mwandb[0m: 	down_patience: 5
[34m[1mwandb[0m: 	up_factor: 0.5
[34m[1mwandb[0m: 	up_patience: 1




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0887; Training Accuracy: 24.0875%
	Validation Loss: 1.8843; Validation Accuracy: 29.4%

	Epoch: 1
increasing learning rate of group 0 to 1.5000e-02.
	Training Loss: 3.5876; Training Accuracy: 33.3875%
	Validation Loss: 1.7136; Validation Accuracy: 35.95%

	Epoch: 2
	Training Loss: 3.45; Training Accuracy: 35.125%
	Validation Loss: 1.6496; Validation Accuracy: 38.4%

	Epoch: 3
increasing learning rate of group 0 to 2.2500e-02.
	Training Loss: 3.2117; Training Accuracy: 41.1125%
	Validation Loss: 1.524; Validation Accuracy: 44.45%

	Epoch: 4
	Training Loss: 3.1774; Training Accuracy: 41.9625%
	Validation Loss: 1.5492; Validation Accuracy: 45.1%

	Epoch: 5
	Training Loss: 2.9691; Training Accuracy: 45.7875%
	Validation Loss: 1.6056; Validation Accuracy: 43.75%

	Epoch: 6
	Training Loss: 2.8001; Training Accuracy: 49.5%
	Validation Loss: 1.3625; Validation Accuracy: 50.4%

	Epoch: 7
increasing learning rate of group 0 to 3.3750e-02.
	Traini

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.107888…

0,1
test_accuracy,▁

0,1
test_accuracy,0.6772


[34m[1mwandb[0m: Agent Starting Run: mbb3z8gu with config:
[34m[1mwandb[0m: 	down_factor: 0.5
[34m[1mwandb[0m: 	down_patience: 5
[34m[1mwandb[0m: 	up_factor: 0.5
[34m[1mwandb[0m: 	up_patience: 3




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0719; Training Accuracy: 23.5375%
	Validation Loss: 1.8974; Validation Accuracy: 29.1%

	Epoch: 1
	Training Loss: 3.6271; Training Accuracy: 32.4125%
	Validation Loss: 1.6748; Validation Accuracy: 37.35%

	Epoch: 2
	Training Loss: 3.3901; Training Accuracy: 36.6875%
	Validation Loss: 1.7004; Validation Accuracy: 37.8%

	Epoch: 3
increasing learning rate of group 0 to 1.5000e-02.
	Training Loss: 3.2182; Training Accuracy: 41.0375%
	Validation Loss: 1.5785; Validation Accuracy: 40.9%

	Epoch: 4
	Training Loss: 3.1933; Training Accuracy: 41.8875%
	Validation Loss: 1.6172; Validation Accuracy: 39.85%

	Epoch: 5
	Training Loss: 3.0323; Training Accuracy: 44.3875%
	Validation Loss: 1.4466; Validation Accuracy: 48.0%

	Epoch: 6
	Training Loss: 2.8648; Training Accuracy: 47.7625%
	Validation Loss: 1.4539; Validation Accuracy: 46.1%

	Epoch: 7
	Training Loss: 2.7771; Training Accuracy: 50.55%
	Validation Loss: 1.3783; Validation Accuracy: 50.05%

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.7219


[34m[1mwandb[0m: Agent Starting Run: kezulrdl with config:
[34m[1mwandb[0m: 	down_factor: 0.5
[34m[1mwandb[0m: 	down_patience: 5
[34m[1mwandb[0m: 	up_factor: 0.5
[34m[1mwandb[0m: 	up_patience: 5




Num Params: 4891338


	Epoch: 0
	Training Loss: 4.0661; Training Accuracy: 24.9%
	Validation Loss: 1.8491; Validation Accuracy: 31.05%

	Epoch: 1
	Training Loss: 3.6166; Training Accuracy: 32.8125%
	Validation Loss: 1.7679; Validation Accuracy: 34.45%

	Epoch: 2
	Training Loss: 3.3855; Training Accuracy: 37.9875%
	Validation Loss: 1.6479; Validation Accuracy: 39.9%

	Epoch: 3
	Training Loss: 3.2053; Training Accuracy: 41.225%
	Validation Loss: 1.7593; Validation Accuracy: 37.95%

	Epoch: 4
	Training Loss: 3.0631; Training Accuracy: 43.9875%
	Validation Loss: 1.6377; Validation Accuracy: 41.35%

	Epoch: 5
	Training Loss: 2.989; Training Accuracy: 45.5%
	Validation Loss: 1.4896; Validation Accuracy: 45.25%

	Epoch: 6
	Training Loss: 2.8438; Training Accuracy: 48.075%
	Validation Loss: 1.3917; Validation Accuracy: 48.55%

	Epoch: 7
	Training Loss: 2.7045; Training Accuracy: 50.85%
	Validation Loss: 1.3295; Validation Accuracy: 51.75%

	Epoch: 8
	Training Loss: 2.5884; Training Accuracy: 5

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test_accuracy,▁

0,1
test_accuracy,0.6811


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Sweep Agent: Exiting.


### Best Parameters for 100 epochs

In [12]:
batch_size = 32
    
transform_train = torchvision.transforms.Compose([
  torchvision.transforms.RandomCrop(32, padding=4),
  torchvision.transforms.RandomHorizontalFlip(),
  torchvision.transforms.RandomResizedCrop(32, scale=(0.8, 1.0), ratio=(0.8, 1.2)),
  torchvision.transforms.ToTensor(),
  torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

transform_test = torchvision.transforms.Compose([
                  torchvision.transforms.ToTensor(), 
                  torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])

trainset = torchvision.datasets.CIFAR10(root = './data', train = True, download = True, transform = transform_train)

# Split the train data into train and validation sets
train_size = int(0.8 * len(trainset))
val_size = len(trainset) - train_size
trainset, valset = torch.utils.data.random_split(trainset, [train_size, val_size])

testset = torchvision.datasets.CIFAR10(root = './data', train = False, download = True, transform = transform_test)

train_loader = torch.utils.data.DataLoader(trainset, batch_size = batch_size, shuffle = True)
val_loader   = torch.utils.data.DataLoader(valset, batch_size = batch_size, shuffle = True)
test_loader  = torch.utils.data.DataLoader(testset, batch_size = batch_size, shuffle = False)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
    
best_test_acc = 0

torch.cuda.empty_cache()
model = ZigZag_ResNet(BasicBlock, [2, 2, 2, 2, 2, 1, 1])
num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Num Params: {num_params}\n")
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), 0.01, momentum = 0.8, weight_decay = 0.0005 , nesterov=True)

scheduler = ZigZagLROnPlateau(optimizer, mode='max', up_factor=0.3, down_factor=0.5, 
                                                     up_patience=1, down_patience=1, 
                                                     verbose = True)
train_losses_ = []
train_accuracies_ = []
valid_losses_ = []
valid_accuracies_ = []

Files already downloaded and verified
Files already downloaded and verified
Num Params: 4891338



In [13]:
epochs = 100

for epoch in range(epochs):
    print(f"\n\tEpoch: {epoch}")

    train_loss, train_accuracy, val_loss, val_accuracy = train_val(model, criterion, optimizer, 
                                                                train_loader, val_loader, device,
                                                                scheduler = scheduler, use_scheduler = True)
    train_losses_.append(train_loss)
    train_accuracies_.append(train_accuracy)
    valid_losses_.append(val_loss)
    valid_accuracies_.append(val_accuracy)
    print(f"\tTraining Loss: {round(train_loss, 4)}; Training Accuracy: {round(train_accuracy*100, 4)}%")
    print(f"\tValidation Loss: {round(val_loss, 4)}; Validation Accuracy: {round(val_accuracy*100, 4)}%")

test_loss, test_accuracy = test(model, criterion, test_loader, device)
print(f"\n\tTesting Loss: {round(test_loss, 4)}; Testing Accuracy: {round(test_accuracy*100, 4)}%")

if test_accuracy > best_test_acc:
    best_test_acc = test_accuracy

    metrics_dict = {'train_loss': train_losses_, 'train_accuracy': train_accuracies_, 
                  'valid_loss': valid_losses_, 'valid_accuracy': valid_accuracies_,
                  'test_loss': test_loss, 'test_accuracy': test_accuracy}


	Epoch: 0
	Training Loss: 3.4926; Training Accuracy: 35.61%
	Validation Loss: 1.5039; Validation Accuracy: 44.86%

	Epoch: 1
increasing learning rate of group 0 to 1.3000e-02.
	Training Loss: 2.817; Training Accuracy: 49.03%
	Validation Loss: 1.2185; Validation Accuracy: 56.55%

	Epoch: 2
	Training Loss: 2.3721; Training Accuracy: 57.6475%
	Validation Loss: 1.0677; Validation Accuracy: 62.27%

	Epoch: 3
increasing learning rate of group 0 to 1.6900e-02.
	Training Loss: 1.9618; Training Accuracy: 65.5025%
	Validation Loss: 0.9125; Validation Accuracy: 68.24%

	Epoch: 4
	Training Loss: 1.7833; Training Accuracy: 69.14%
	Validation Loss: 0.9361; Validation Accuracy: 67.61%

	Epoch: 5
	Training Loss: 1.5898; Training Accuracy: 72.58%
	Validation Loss: 0.8057; Validation Accuracy: 71.9%

	Epoch: 6
increasing learning rate of group 0 to 2.1970e-02.
	Training Loss: 1.4362; Training Accuracy: 75.5475%
	Validation Loss: 0.7226; Validation Accuracy: 75.22%

	Epoch: 7
	Training Loss: 1.4233; Tra

### Restarts

In [30]:
import numpy as np

class ZigZagLROnPlateauRestarts(torch.optim.lr_scheduler._LRScheduler):
    def __init__(self, optimizer, mode='min', lr=0.01, up_factor=1.1, down_factor=0.8, up_patience=10, down_patience=10, restart_after=30, verbose=True):
        super(ZigZagLROnPlateauRestarts).__init__()
        self.optimizer = optimizer
        self.mode = mode
        self.up_factor = 1 + up_factor
        self.down_factor = 1 - down_factor
        self.up_patience = up_patience
        self.down_patience = down_patience
        self.num_bad_epochs = 0
        self.num_good_epochs = 0
        self.best_metric = np.Inf if self.mode == 'min' else -np.Inf
        self.best_lr = lr
        self.restart_after = restart_after
        self.verbose = verbose
        self.num_epochs = 0

    def step(self, metric):
        self.num_epochs += 1
        if self.mode == 'min':
            if metric < self.best_metric:
                self.best_metric = metric
                self.best_lr = self.optimizer.param_groups[0]['lr']
                self.num_bad_epochs = 0
                self.num_good_epochs += 1
                if self.num_good_epochs > self.up_patience:
                    old_lr = self.optimizer.param_groups[0]['lr']
                    new_lr = old_lr * self.up_factor
                    self.optimizer.param_groups[0]['lr'] = new_lr
                    if self.verbose:
                        print(f"increasing learning rate of group 0 to {new_lr:.4e}.")
                    self.num_good_epochs = 0
            else:
                self.num_bad_epochs += 1
                self.num_good_epochs = 0
                if self.num_bad_epochs > self.down_patience:
                    old_lr = self.optimizer.param_groups[0]['lr']
                    new_lr = old_lr * self.down_factor
                    self.optimizer.param_groups[0]['lr'] = new_lr
                    if self.verbose:
                        print(f"reducing learning rate of group 0 to {new_lr:.4e}.")
                    self.num_bad_epochs = 0
        else:
            if metric > self.best_metric:
                self.best_metric = metric
                self.best_lr = self.optimizer.param_groups[0]['lr']
                self.num_bad_epochs = 0
                self.num_good_epochs += 1
                if self.num_good_epochs > self.up_patience:
                    old_lr = self.optimizer.param_groups[0]['lr']
                    new_lr = old_lr * self.up_factor
                    self.optimizer.param_groups[0]['lr'] = new_lr
                    if self.verbose:
                        print(f"increasing learning rate of group 0 to {new_lr:.4e}.")
                    self.num_good_epochs = 0
            else:
                self.num_bad_epochs += 1
                self.num_good_epochs = 0
                if self.num_bad_epochs > self.down_patience:
                    old_lr = self.optimizer.param_groups[0]['lr']
                    new_lr = old_lr * self.down_factor
                    self.optimizer.param_groups[0]['lr'] = new_lr
                    if self.verbose:
                        print(f"reducing learning rate of group 0 to {new_lr:.4e}.")
                    self.num_bad_epochs = 0
                    
        if self.num_epochs % self.restart_after == 0:
            self.best_metric = np.Inf if self.mode == 'min' else -np.Inf
            self.optimizer.param_groups[0]['lr'] = self.best_lr
            if self.verbose:
                print(f"restart: setting learning rate of group 0 to best learning rate value: {self.best_lr:.4e}.")

In [None]:
# transform_train = torchvision.transforms.Compose([
#     torchvision.transforms.RandomCrop(32, padding=4),
#     torchvision.transforms.RandomHorizontalFlip(),
#     torchvision.transforms.RandomRotation(15),
#     torchvision.transforms.RandomResizedCrop(32, scale=(0.8, 1.0), ratio=(0.8, 1.2)),
#     torchvision.transforms.RandomAffine(degrees=15, translate=(0.1, 0.1), scale=(0.8, 1.2)),
#     torchvision.transforms.ToTensor(),
#     torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
#     torchvision.transforms.RandomErasing(p=0.5, scale=(0.02, 0.25), ratio=(0.3, 3.3))])

In [None]:
batch_size = 32
    
transform_train = torchvision.transforms.Compose([
  torchvision.transforms.RandomCrop(32, padding=4),
  torchvision.transforms.RandomHorizontalFlip(),
  torchvision.transforms.RandomResizedCrop(32, scale=(0.8, 1.0), ratio=(0.8, 1.2)),
  torchvision.transforms.ToTensor(),
  torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

transform_test = torchvision.transforms.Compose([
                  torchvision.transforms.ToTensor(), 
                  torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])

trainset = torchvision.datasets.CIFAR10(root = './data', train = True, download = True, transform = transform_train)

# Split the train data into train and validation sets
train_size = int(0.8 * len(trainset))
val_size = len(trainset) - train_size
trainset, valset = torch.utils.data.random_split(trainset, [train_size, val_size])

# train_size = int(0.2 * len(trainset))
# val_size = int(0.2 * len(valset))
# trainset, _ = torch.utils.data.random_split(trainset, [train_size, len(trainset) - train_size])
# valset, _ = torch.utils.data.random_split(valset, [val_size, len(valset) - val_size])

testset = torchvision.datasets.CIFAR10(root = './data', train = False, download = True, transform = transform_test)

train_loader = torch.utils.data.DataLoader(trainset, batch_size = batch_size, shuffle = True)
val_loader   = torch.utils.data.DataLoader(valset, batch_size = batch_size, shuffle = True)
test_loader  = torch.utils.data.DataLoader(testset, batch_size = batch_size, shuffle = False)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
    
best_test_acc = 0

torch.cuda.empty_cache()
model = ZigZag_ResNet(BasicBlock, [2, 2, 2, 2, 2, 1, 1])
num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Num Params: {num_params}\n")
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), 0.01, momentum = 0.8, weight_decay = 0.0005 , nesterov=True)

scheduler = ZigZagLROnPlateauRestarts(optimizer, mode='max', lr=0.01,
                                      up_factor=0.3, down_factor=0.5, 
                                      up_patience=1, down_patience=1, 
                                      restart_after=30, verbose = True)
train_losses_ = []
train_accuracies_ = []
valid_losses_ = []
valid_accuracies_ = []

epochs = 250
for epoch in range(epochs):
    print(f"\n\tEpoch: {epoch}")

    train_loss, train_accuracy, val_loss, val_accuracy = train_val(model, criterion, optimizer, 
                                                                train_loader, val_loader, device,
                                                                scheduler = scheduler, use_scheduler = True)
    train_losses_.append(train_loss)
    train_accuracies_.append(train_accuracy)
    valid_losses_.append(val_loss)
    valid_accuracies_.append(val_accuracy)
    print(f"\tTraining Loss: {round(train_loss, 4)}; Training Accuracy: {round(train_accuracy*100, 4)}%")
    print(f"\tValidation Loss: {round(val_loss, 4)}; Validation Accuracy: {round(val_accuracy*100, 4)}%")

test_loss, test_accuracy = test(model, criterion, test_loader, device)
print(f"\n\tTesting Loss: {round(test_loss, 4)}; Testing Accuracy: {round(test_accuracy*100, 4)}%")

if test_accuracy > best_test_acc:
    best_test_acc = test_accuracy

    metrics_dict = {'train_loss': train_losses_, 'train_accuracy': train_accuracies_, 
                  'valid_loss': valid_losses_, 'valid_accuracy': valid_accuracies_,
                  'test_loss': test_loss, 'test_accuracy': test_accuracy}

Files already downloaded and verified
Files already downloaded and verified
Num Params: 4891338


	Epoch: 0
	Training Loss: 3.4899; Training Accuracy: 35.27%
	Validation Loss: 1.5092; Validation Accuracy: 44.51%

	Epoch: 1
increasing learning rate of group 0 to 1.3000e-02.
	Training Loss: 2.7668; Training Accuracy: 50.0025%
	Validation Loss: 1.2261; Validation Accuracy: 56.45%

	Epoch: 2
	Training Loss: 2.3352; Training Accuracy: 58.475%
	Validation Loss: 0.9845; Validation Accuracy: 64.66%

	Epoch: 3
increasing learning rate of group 0 to 1.6900e-02.
	Training Loss: 1.9294; Training Accuracy: 66.245%
	Validation Loss: 0.9166; Validation Accuracy: 67.94%

	Epoch: 4
	Training Loss: 1.7819; Training Accuracy: 69.1175%
	Validation Loss: 0.9183; Validation Accuracy: 68.11%

	Epoch: 5
increasing learning rate of group 0 to 2.1970e-02.
	Training Loss: 1.5816; Training Accuracy: 73.2125%
	Validation Loss: 0.773; Validation Accuracy: 73.28%

	Epoch: 6
	Training Loss: 1.5324; Training Accuracy:

In [39]:
import numpy as np

class ZigZagLROnPlateauRestarts(torch.optim.lr_scheduler._LRScheduler):
    def __init__(self, optimizer, mode='min', lr=0.01, up_factor=1.1, down_factor=0.8, up_patience=10, down_patience=10, restart_after=30, verbose=True):
        super(ZigZagLROnPlateauRestarts).__init__()
        self.optimizer = optimizer
        self.mode = mode
        self.up_factor = 1 + up_factor
        self.down_factor = 1 - down_factor
        self.up_patience = up_patience
        self.down_patience = down_patience
        self.num_bad_epochs = 0
        self.num_good_epochs = 0
        self.prev_metric = np.Inf if self.mode == 'min' else -np.Inf
        self.best_lr = lr
        self.restart_after = restart_after
        self.verbose = verbose
        self.num_epochs = 0

    def step(self, metric):
        self.num_epochs += 1
        if self.mode == 'min':
            if metric < self.prev_metric:
                self.best_lr = self.optimizer.param_groups[0]['lr']
                self.num_bad_epochs = 0
                self.num_good_epochs += 1
                if self.num_good_epochs > self.up_patience:
                    old_lr = self.optimizer.param_groups[0]['lr']
                    new_lr = old_lr * self.up_factor
                    self.optimizer.param_groups[0]['lr'] = new_lr
                    if self.verbose:
                        print(f"increasing learning rate of group 0 to {new_lr:.4e}.")
                    self.num_good_epochs = 0
            else:
                self.num_bad_epochs += 1
                self.num_good_epochs = 0
                if self.num_bad_epochs > self.down_patience:
                    old_lr = self.optimizer.param_groups[0]['lr']
                    new_lr = old_lr * self.down_factor
                    self.optimizer.param_groups[0]['lr'] = new_lr
                    if self.verbose:
                        print(f"reducing learning rate of group 0 to {new_lr:.4e}.")
                    self.num_bad_epochs = 0
        else:
            if metric > self.prev_metric:
                self.best_lr = self.optimizer.param_groups[0]['lr']
                self.num_bad_epochs = 0
                self.num_good_epochs += 1
                if self.num_good_epochs > self.up_patience:
                    old_lr = self.optimizer.param_groups[0]['lr']
                    new_lr = old_lr * self.up_factor
                    self.optimizer.param_groups[0]['lr'] = new_lr
                    if self.verbose:
                        print(f"increasing learning rate of group 0 to {new_lr:.4e}.")
                    self.num_good_epochs = 0
            else:
                self.num_bad_epochs += 1
                self.num_good_epochs = 0
                if self.num_bad_epochs > self.down_patience:
                    old_lr = self.optimizer.param_groups[0]['lr']
                    new_lr = old_lr * self.down_factor
                    self.optimizer.param_groups[0]['lr'] = new_lr
                    if self.verbose:
                        print(f"reducing learning rate of group 0 to {new_lr:.4e}.")
                    self.num_bad_epochs = 0
        self.prev_metric = metric
                    
        if self.num_epochs % self.restart_after == 0:
            self.optimizer.param_groups[0]['lr'] = self.best_lr
            if self.verbose:
                print(f"restart: setting learning rate of group 0 to best learning rate value: {self.best_lr:.4e}.")

In [40]:
def train_val(model, criterion, optimizer, train_loader, val_loader, device, scheduler = None, use_scheduler = True):
    model.train()
    train_loss = 0
    correct = 0
    count = 0
    total = 0
    for i, data in enumerate(train_loader, 0):
        image, label = data
        image = image.to(device)
        label = label.to(device)
    
        optimizer.zero_grad()
        output = model(image)
        loss = criterion(output, label)

        train_loss += loss.item()

        pred = torch.max(output.data, 1)[1]
        cur_correct = (pred == label).sum().item()
        cur_loss = loss.item()

        loss.backward()

        optimizer.step()
        
        total += label.size(0)
        correct += cur_correct
        train_loss += cur_loss

    train_accuracy = correct/total
    train_loss = train_loss/len(train_loader)
    
    model.eval()
    valid_loss = 0
    correct = 0
    count = 0
    total = 0
    for i, data in enumerate(val_loader, 0):
        image, label = data
        image = image.to(device)
        label = label.to(device)

        output = model(image)
        loss = criterion(output, label)

        pred = torch.max(output.data, 1)[1]
        cur_correct = (pred == label).sum().item()
        cur_loss = loss.item()

        total += label.size(0)
        correct += cur_correct
        valid_loss += cur_loss

    valid_accuracy = correct/total
    valid_loss = valid_loss/len(val_loader)

    val_metric = 2*valid_accuracy * (1 - valid_loss) / (valid_accuracy + (1 - valid_loss))
    train_metric = 2*train_accuracy * (1 - train_loss) / (train_accuracy + (1 - train_loss))
    val_importance = 0.7
    metric = val_importance*val_metric + (1 - val_importance)*train_metric
    
    if use_scheduler:
        scheduler.step(metric)

    return train_loss, train_accuracy, valid_loss, valid_accuracy

def test(model, criterion, dataloader, device):
    model.eval()
    test_loss = 0
    correct = 0
    count = 0
    total = 0
    for i, data in enumerate(dataloader, 0):
        image, label = data
        image = image.to(device)
        label = label.to(device)
                
        output = model(image)
        loss = criterion(output, label)

        pred = torch.max(output.data, 1)[1]
        cur_correct = (pred == label).sum().item()
        cur_loss = loss.item()
            
        total += label.size(0)
        correct += cur_correct
        test_loss += cur_loss

    accuracy = correct/total
    test_loss = test_loss/len(dataloader)

    return test_loss, accuracy

In [41]:
batch_size = 32
    
transform_train = torchvision.transforms.Compose([
  torchvision.transforms.RandomCrop(32, padding=4),
  torchvision.transforms.RandomHorizontalFlip(),
  torchvision.transforms.RandomResizedCrop(32, scale=(0.8, 1.0), ratio=(0.8, 1.2)),
  torchvision.transforms.ToTensor(),
  torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

transform_test = torchvision.transforms.Compose([
                  torchvision.transforms.ToTensor(), 
                  torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])

trainset = torchvision.datasets.CIFAR10(root = './data', train = True, download = True)

# Split the train data into train and validation sets
train_size = int(0.9 * len(trainset))
val_size = len(trainset) - train_size
trainset, valset = torch.utils.data.random_split(trainset, [train_size, val_size])

trainset.dataset.transform = transform_train
valset.dataset.transform = transform_test

testset = torchvision.datasets.CIFAR10(root = './data', train = False, download = True, transform = transform_test)

train_loader = torch.utils.data.DataLoader(trainset, batch_size = batch_size, shuffle = True)
val_loader   = torch.utils.data.DataLoader(valset, batch_size = batch_size, shuffle = True)
test_loader  = torch.utils.data.DataLoader(testset, batch_size = batch_size, shuffle = False)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
    
best_test_acc = 0

torch.cuda.empty_cache()
model = ZigZag_ResNet(BasicBlock, [2, 2, 2, 2, 2, 1, 1])
num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Num Params: {num_params}\n")
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), 0.01, momentum = 0.8, weight_decay = 0.0005 , nesterov=True)

scheduler = ZigZagLROnPlateauRestarts(optimizer, mode='max', lr=0.01,
                                      up_factor=0.3, down_factor=0.5, 
                                      up_patience=1, down_patience=1, 
                                      restart_after=30, verbose = True)
train_losses_ = []
train_accuracies_ = []
valid_losses_ = []
valid_accuracies_ = []

epochs = 100
for epoch in range(epochs):
    print(f"\n\tEpoch: {epoch}")

    train_loss, train_accuracy, val_loss, val_accuracy = train_val(model, criterion, optimizer, 
                                                                train_loader, val_loader, device,
                                                                scheduler = scheduler, use_scheduler = True)
    train_losses_.append(train_loss)
    train_accuracies_.append(train_accuracy)
    valid_losses_.append(val_loss)
    valid_accuracies_.append(val_accuracy)
    print(f"\tTraining Loss: {round(train_loss, 4)}; Training Accuracy: {round(train_accuracy*100, 4)}%")
    print(f"\tValidation Loss: {round(val_loss, 4)}; Validation Accuracy: {round(val_accuracy*100, 4)}%")

test_loss, test_accuracy = test(model, criterion, test_loader, device)
print(f"\n\tTesting Loss: {round(test_loss, 4)}; Testing Accuracy: {round(test_accuracy*100, 4)}%")

if test_accuracy > best_test_acc:
    best_test_acc = test_accuracy

    metrics_dict = {'train_loss': train_losses_, 'train_accuracy': train_accuracies_, 
                  'valid_loss': valid_losses_, 'valid_accuracy': valid_accuracies_,
                  'test_loss': test_loss, 'test_accuracy': test_accuracy}

Files already downloaded and verified
Files already downloaded and verified
Num Params: 4891338


	Epoch: 0
	Training Loss: 3.0757; Training Accuracy: 44.0067%
	Validation Loss: 1.4212; Validation Accuracy: 49.7%

	Epoch: 1
increasing learning rate of group 0 to 1.3000e-02.
	Training Loss: 2.1627; Training Accuracy: 61.86%
	Validation Loss: 0.9919; Validation Accuracy: 66.06%

	Epoch: 2
	Training Loss: 1.6972; Training Accuracy: 70.7533%
	Validation Loss: 0.7266; Validation Accuracy: 74.96%

	Epoch: 3
	Training Loss: 1.3047; Training Accuracy: 77.5489%
	Validation Loss: 0.6589; Validation Accuracy: 77.36%

	Epoch: 4
increasing learning rate of group 0 to 1.6900e-02.
	Training Loss: 1.0545; Training Accuracy: 82.0311%
	Validation Loss: 0.6461; Validation Accuracy: 77.68%

	Epoch: 5
	Training Loss: 0.9977; Training Accuracy: 83.0089%
	Validation Loss: 0.6354; Validation Accuracy: 78.7%

	Epoch: 6
increasing learning rate of group 0 to 2.1970e-02.
	Training Loss: 0.8067; Training Accuracy

In [42]:
import numpy as np

class ZigZagLROnPlateau(torch.optim.lr_scheduler._LRScheduler):
    def __init__(self, optimizer, mode='min', up_factor=1.1, down_factor=0.8, up_patience=10, down_patience=10, verbose=True):
        super(ZigZagLROnPlateau).__init__()
        self.optimizer = optimizer
        self.mode = mode
        self.up_factor = 1 + up_factor
        self.down_factor = 1 - down_factor
        self.up_patience = up_patience
        self.down_patience = down_patience
        self.num_bad_epochs = 0
        self.num_good_epochs = 0
        self.best_metric = np.Inf if self.mode == 'min' else -np.Inf
        self.verbose = verbose

    def step(self, metric):
        if self.mode == 'min':
            if metric < self.best_metric:
                self.best_metric = metric
                self.num_bad_epochs = 0
                self.num_good_epochs += 1
                if self.num_good_epochs > self.up_patience:
                    old_lr = self.optimizer.param_groups[0]['lr']
                    new_lr = old_lr * self.up_factor
                    self.optimizer.param_groups[0]['lr'] = new_lr
                    if self.verbose:
                        print(f"increasing learning rate of group 0 to {new_lr:.4e}.")
                    self.num_good_epochs = 0
            else:
                self.num_bad_epochs += 1
                self.num_good_epochs = 0
                if self.num_bad_epochs > self.down_patience:
                    old_lr = self.optimizer.param_groups[0]['lr']
                    new_lr = old_lr * self.down_factor
                    self.optimizer.param_groups[0]['lr'] = new_lr
                    if self.verbose:
                        print(f"reducing learning rate of group 0 to {new_lr:.4e}.")
                    self.num_bad_epochs = 0
        else:
            if metric > self.best_metric:
                self.best_metric = metric
                self.num_bad_epochs = 0
                self.num_good_epochs += 1
                if self.num_good_epochs > self.up_patience:
                    old_lr = self.optimizer.param_groups[0]['lr']
                    new_lr = old_lr * self.up_factor
                    self.optimizer.param_groups[0]['lr'] = new_lr
                    if self.verbose:
                        print(f"increasing learning rate of group 0 to {new_lr:.4e}.")
                    self.num_good_epochs = 0
            else:
                self.num_bad_epochs += 1
                self.num_good_epochs = 0
                if self.num_bad_epochs > self.down_patience:
                    old_lr = self.optimizer.param_groups[0]['lr']
                    new_lr = old_lr * self.down_factor
                    self.optimizer.param_groups[0]['lr'] = new_lr
                    if self.verbose:
                        print(f"reducing learning rate of group 0 to {new_lr:.4e}.")
                    self.num_bad_epochs = 0

In [9]:
batch_size = 32
    
transform_train = torchvision.transforms.Compose([
  torchvision.transforms.RandomCrop(32, padding=4),
  torchvision.transforms.RandomHorizontalFlip(),
  torchvision.transforms.RandomResizedCrop(32, scale=(0.8, 1.0), ratio=(0.8, 1.2)),
  torchvision.transforms.ToTensor(),
  torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

transform_test = torchvision.transforms.Compose([
                  torchvision.transforms.ToTensor(), 
                  torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])

trainset = torchvision.datasets.CIFAR10(root = './data', train = True, download = True, transform = transform_train)

# Split the train data into train and validation sets
train_size = int(0.9 * len(trainset))
val_size = len(trainset) - train_size
trainset, valset = torch.utils.data.random_split(trainset, [train_size, val_size])

testset = torchvision.datasets.CIFAR10(root = './data', train = False, download = True, transform = transform_test)

train_loader = torch.utils.data.DataLoader(trainset, batch_size = batch_size, shuffle = True)
val_loader   = torch.utils.data.DataLoader(valset, batch_size = batch_size, shuffle = True)
test_loader  = torch.utils.data.DataLoader(testset, batch_size = batch_size, shuffle = False)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
    
best_test_acc = 0

torch.cuda.empty_cache()
model = ZigZag_ResNet(BasicBlock, [2, 2, 2, 2, 2, 1, 1])
num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Num Params: {num_params}\n")
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), 0.01, momentum = 0.8, weight_decay = 0.0005 , nesterov=True)

scheduler = ZigZagLROnPlateau(optimizer, mode='max',
                              up_factor=0.3, down_factor=0.5, 
                              up_patience=1, down_patience=1, 
                              verbose = True)
train_losses_ = []
train_accuracies_ = []
valid_losses_ = []
valid_accuracies_ = []

epochs = 100
for epoch in range(epochs):
    print(f"\n\tEpoch: {epoch}")

    train_loss, train_accuracy, val_loss, val_accuracy = train_val(model, criterion, optimizer, 
                                                                train_loader, val_loader, device,
                                                                scheduler = scheduler, use_scheduler = True)
    train_losses_.append(train_loss)
    train_accuracies_.append(train_accuracy)
    valid_losses_.append(val_loss)
    valid_accuracies_.append(val_accuracy)
    print(f"\tTraining Loss: {round(train_loss, 4)}; Training Accuracy: {round(train_accuracy*100, 4)}%")
    print(f"\tValidation Loss: {round(val_loss, 4)}; Validation Accuracy: {round(val_accuracy*100, 4)}%")

test_loss, test_accuracy = test(model, criterion, test_loader, device)
print(f"\n\tTesting Loss: {round(test_loss, 4)}; Testing Accuracy: {round(test_accuracy*100, 4)}%")

if test_accuracy > best_test_acc:
    best_test_acc = test_accuracy

    metrics_dict = {'train_loss': train_losses_, 'train_accuracy': train_accuracies_, 
                  'valid_loss': valid_losses_, 'valid_accuracy': valid_accuracies_,
                  'test_loss': test_loss, 'test_accuracy': test_accuracy}

Files already downloaded and verified
Files already downloaded and verified
Num Params: 4891338


	Epoch: 0
	Training Loss: 3.4786; Training Accuracy: 35.8022%
	Validation Loss: 1.4736; Validation Accuracy: 45.24%

	Epoch: 1
increasing learning rate of group 0 to 1.3000e-02.
	Training Loss: 2.7384; Training Accuracy: 50.3756%
	Validation Loss: 1.1993; Validation Accuracy: 56.5%

	Epoch: 2
	Training Loss: 2.2896; Training Accuracy: 59.54%
	Validation Loss: 1.0006; Validation Accuracy: 63.72%

	Epoch: 3
increasing learning rate of group 0 to 1.6900e-02.
	Training Loss: 1.8775; Training Accuracy: 67.2556%
	Validation Loss: 1.0133; Validation Accuracy: 65.4%

	Epoch: 4
	Training Loss: 1.7355; Training Accuracy: 70.0556%
	Validation Loss: 0.8403; Validation Accuracy: 70.66%

	Epoch: 5
increasing learning rate of group 0 to 2.1970e-02.
	Training Loss: 1.5167; Training Accuracy: 74.0933%
	Validation Loss: 0.7653; Validation Accuracy: 73.84%

	Epoch: 6
	Training Loss: 1.4665; Training Accuracy

In [36]:
import numpy as np

class ZigZagLROnPlateauRestarts(torch.optim.lr_scheduler._LRScheduler):
    def __init__(self, optimizer, mode='min', up_factor=1.1, down_factor=0.8, 
                                              up_patience=10, down_patience=10, 
                                              max_lr=0.1, min_lr=0.00001,
                                              verbose=True):
        super(ZigZagLROnPlateauRestarts).__init__()
        self.optimizer = optimizer
        self.mode = mode
        self.up_factor = 1 + up_factor
        self.down_factor = 1 - down_factor
        self.up_patience = up_patience
        self.down_patience = down_patience
        self.num_bad_epochs = 0
        self.num_good_epochs = 0
        self.prev_metric = np.Inf if self.mode == 'min' else -np.Inf
        self.best_lr = self.optimizer.param_groups[0]['lr']
        self.max_lr = max_lr
        self.min_lr = min_lr
        self.verbose = verbose

    def step(self, metric):
        if self.mode == 'min':
            if metric < self.prev_metric:
                #self.best_metric = metric
                self.best_lr = self.optimizer.param_groups[0]['lr']
                self.num_bad_epochs = 0
                self.num_good_epochs += 1
                if self.num_good_epochs > self.up_patience:
                    old_lr = self.optimizer.param_groups[0]['lr']
                    new_lr = old_lr * self.up_factor
                    if new_lr <= self.min_lr or new_lr >= self.max_lr:
                        new_lr = self.best_lr
                    self.optimizer.param_groups[0]['lr'] = new_lr
                    if self.verbose:
                        print(f"increasing learning rate of group 0 to {new_lr:.4e}.")
                    self.num_good_epochs = 0
            else:
                self.num_bad_epochs += 1
                self.num_good_epochs = 0
                if self.num_bad_epochs > self.down_patience:
                    old_lr = self.optimizer.param_groups[0]['lr']
                    new_lr = old_lr * self.down_factor
                    if new_lr <= self.min_lr or new_lr >= self.max_lr:
                        new_lr = self.best_lr
                    self.optimizer.param_groups[0]['lr'] = new_lr
                    if self.verbose:
                        print(f"reducing learning rate of group 0 to {new_lr:.4e}.")
                    self.num_bad_epochs = 0
        else:
            if metric > self.prev_metric:
                #self.best_metric = metric
                self.best_lr = self.optimizer.param_groups[0]['lr']
                self.num_bad_epochs = 0
                self.num_good_epochs += 1
                if self.num_good_epochs > self.up_patience:
                    old_lr = self.optimizer.param_groups[0]['lr']
                    new_lr = old_lr * self.up_factor
                    if new_lr <= self.min_lr or new_lr >= self.max_lr:
                        new_lr = self.best_lr
                    self.optimizer.param_groups[0]['lr'] = new_lr
                    if self.verbose:
                        print(f"increasing learning rate of group 0 to {new_lr:.4e}.")
                    self.num_good_epochs = 0
            else:
                self.num_bad_epochs += 1
                self.num_good_epochs = 0
                if self.num_bad_epochs > self.down_patience:
                    old_lr = self.optimizer.param_groups[0]['lr']
                    new_lr = old_lr * self.down_factor
                    if new_lr <= self.min_lr or new_lr >= self.max_lr:
                        new_lr = self.best_lr
                    self.optimizer.param_groups[0]['lr'] = new_lr
                    if self.verbose:
                        print(f"reducing learning rate of group 0 to {new_lr:.4e}.")
                    self.num_bad_epochs = 0
        self.prev_metric = metric

In [37]:
def train_val(model, criterion, optimizer, train_loader, val_loader, device, scheduler = None, use_scheduler = True):
    model.train()
    train_loss = 0
    correct = 0
    count = 0
    total = 0
    for i, data in enumerate(train_loader, 0):
        image, label = data
        image = image.to(device)
        label = label.to(device)
    
        optimizer.zero_grad()
        output = model(image)
        loss = criterion(output, label)

        train_loss += loss.item()

        pred = torch.max(output.data, 1)[1]
        cur_correct = (pred == label).sum().item()
        cur_loss = loss.item()

        loss.backward()

        optimizer.step()
        
        total += label.size(0)
        correct += cur_correct
        train_loss += cur_loss

    train_accuracy = correct/total
    train_loss = train_loss/len(train_loader)
    
    model.eval()
    valid_loss = 0
    correct = 0
    count = 0
    total = 0
    for i, data in enumerate(val_loader, 0):
        image, label = data
        image = image.to(device)
        label = label.to(device)

        output = model(image)
        loss = criterion(output, label)

        pred = torch.max(output.data, 1)[1]
        cur_correct = (pred == label).sum().item()
        cur_loss = loss.item()

        total += label.size(0)
        correct += cur_correct
        valid_loss += cur_loss

    valid_accuracy = correct/total
    valid_loss = valid_loss/len(val_loader)

    val_loss_sigmoid = torch.sigmoid(torch.tensor(valid_loss))
    train_loss_sigmoid = torch.sigmoid(torch.tensor(train_loss))

    val_metric = valid_accuracy * (1 - val_loss_sigmoid)
    train_metric = train_accuracy * (1 - train_loss_sigmoid)
    val_importance = 0.5
    metric = val_importance * val_metric + (1 - val_importance) * (1- train_metric)

    print(f"\tmetric: {round(metric.item(), 4)}")
    if use_scheduler:
        scheduler.step(metric)

    return train_loss, train_accuracy, valid_loss, valid_accuracy

def test(model, criterion, dataloader, device):
    model.eval()
    test_loss = 0
    correct = 0
    count = 0
    total = 0
    for i, data in enumerate(dataloader, 0):
        image, label = data
        image = image.to(device)
        label = label.to(device)
                
        output = model(image)
        loss = criterion(output, label)

        pred = torch.max(output.data, 1)[1]
        cur_correct = (pred == label).sum().item()
        cur_loss = loss.item()
            
        total += label.size(0)
        correct += cur_correct
        test_loss += cur_loss

    accuracy = correct/total
    test_loss = test_loss/len(dataloader)

    return test_loss, accuracy

In [38]:
batch_size = 32
    
transform_train = torchvision.transforms.Compose([
  torchvision.transforms.RandomCrop(32, padding=4),
  torchvision.transforms.RandomHorizontalFlip(),
  torchvision.transforms.RandomResizedCrop(32, scale=(0.8, 1.0), ratio=(0.8, 1.2)),
  torchvision.transforms.ToTensor(),
  torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

transform_test = torchvision.transforms.Compose([
                  torchvision.transforms.ToTensor(), 
                  torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])

trainset = torchvision.datasets.CIFAR10(root = './data', train = True, download = True, transform = transform_train)

# Split the train data into train and validation sets
train_size = int(0.9 * len(trainset))
val_size = len(trainset) - train_size
trainset, valset = torch.utils.data.random_split(trainset, [train_size, val_size])

testset = torchvision.datasets.CIFAR10(root = './data', train = False, download = True, transform = transform_test)

train_loader = torch.utils.data.DataLoader(trainset, batch_size = batch_size, shuffle = True)
val_loader   = torch.utils.data.DataLoader(valset, batch_size = batch_size, shuffle = True)
test_loader  = torch.utils.data.DataLoader(testset, batch_size = batch_size, shuffle = False)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
    
best_test_acc = 0

torch.cuda.empty_cache()
model = ZigZag_ResNet(BasicBlock, [2, 2, 2, 2, 2, 1, 1])
num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Num Params: {num_params}\n")
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), 0.01, momentum = 0.8, weight_decay = 0.0005 , nesterov=True)

scheduler = ZigZagLROnPlateauRestarts(optimizer, mode='max',
                                      up_factor=0.3, down_factor=0.5, 
                                      up_patience=1, down_patience=1, 
                                      max_lr=0.1, min_lr=0.00001,
                                      verbose = True)
train_losses_ = []
train_accuracies_ = []
valid_losses_ = []
valid_accuracies_ = []

epochs = 100
for epoch in range(epochs):
    print(f"\n\tEpoch: {epoch}")

    train_loss, train_accuracy, val_loss, val_accuracy = train_val(model, criterion, optimizer, 
                                                                train_loader, val_loader, device,
                                                                scheduler = scheduler, use_scheduler = True)
    train_losses_.append(train_loss)
    train_accuracies_.append(train_accuracy)
    valid_losses_.append(val_loss)
    valid_accuracies_.append(val_accuracy)
    print(f"\tTraining Loss: {round(train_loss, 4)}; Training Accuracy: {round(train_accuracy*100, 4)}%")
    print(f"\tValidation Loss: {round(val_loss, 4)}; Validation Accuracy: {round(val_accuracy*100, 4)}%")

test_loss, test_accuracy = test(model, criterion, test_loader, device)
print(f"\n\tTesting Loss: {round(test_loss, 4)}; Testing Accuracy: {round(test_accuracy*100, 4)}%")

if test_accuracy > best_test_acc:
    best_test_acc = test_accuracy

    metrics_dict = {'train_loss': train_losses_, 'train_accuracy': train_accuracies_, 
                  'valid_loss': valid_losses_, 'valid_accuracy': valid_accuracies_,
                  'test_loss': test_loss, 'test_accuracy': test_accuracy}

Files already downloaded and verified
Files already downloaded and verified
Num Params: 4891338


	Epoch: 0
	metric: 0.516
	Training Loss: 3.4402; Training Accuracy: 36.6133%
	Validation Loss: 1.94; Validation Accuracy: 34.54%

	Epoch: 1
	metric: 0.5387
increasing learning rate of group 0 to 1.3000e-02.
	Training Loss: 2.6793; Training Accuracy: 51.6444%
	Validation Loss: 1.3224; Validation Accuracy: 52.58%

	Epoch: 2
	metric: 0.5332
	Training Loss: 2.2076; Training Accuracy: 60.8933%
	Validation Loss: 1.2521; Validation Accuracy: 56.96%

	Epoch: 3
	metric: 0.5583
	Training Loss: 1.8518; Training Accuracy: 67.6711%
	Validation Loss: 0.8671; Validation Accuracy: 70.44%

	Epoch: 4
	metric: 0.5582
	Training Loss: 1.6209; Training Accuracy: 72.0022%
	Validation Loss: 0.7524; Validation Accuracy: 73.44%

	Epoch: 5
	metric: 0.5508
reducing learning rate of group 0 to 6.5000e-03.
	Training Loss: 1.4461; Training Accuracy: 75.2311%
	Validation Loss: 0.7249; Validation Accuracy: 75.06%

	Epoch:

In [17]:
epochs = 100
for epoch in range(epochs):
    print(f"\n\tEpoch: {epoch + 100}")

    train_loss, train_accuracy, val_loss, val_accuracy = train_val(model, criterion, optimizer, 
                                                                train_loader, val_loader, device,
                                                                scheduler = scheduler, use_scheduler = True)
    train_losses_.append(train_loss)
    train_accuracies_.append(train_accuracy)
    valid_losses_.append(val_loss)
    valid_accuracies_.append(val_accuracy)
    print(f"\tTraining Loss: {round(train_loss, 4)}; Training Accuracy: {round(train_accuracy*100, 4)}%")
    print(f"\tValidation Loss: {round(val_loss, 4)}; Validation Accuracy: {round(val_accuracy*100, 4)}%")

test_loss, test_accuracy = test(model, criterion, test_loader, device)
print(f"\n\tTesting Loss: {round(test_loss, 4)}; Testing Accuracy: {round(test_accuracy*100, 4)}%")

if test_accuracy > best_test_acc:
    best_test_acc = test_accuracy

    metrics_dict = {'train_loss': train_losses_, 'train_accuracy': train_accuracies_, 
                  'valid_loss': valid_losses_, 'valid_accuracy': valid_accuracies_,
                  'test_loss': test_loss, 'test_accuracy': test_accuracy}


	Epoch: 100
	Training Loss: 0.157; Training Accuracy: 97.3556%
	Validation Loss: 0.2582; Validation Accuracy: 91.84%

	Epoch: 101
reducing learning rate of group 0 to 4.0625e-04.
	Training Loss: 0.1477; Training Accuracy: 97.5133%
	Validation Loss: 0.2515; Validation Accuracy: 92.36%

	Epoch: 102
	Training Loss: 0.1343; Training Accuracy: 97.7467%
	Validation Loss: 0.2398; Validation Accuracy: 92.44%

	Epoch: 103
reducing learning rate of group 0 to 2.0313e-04.
	Training Loss: 0.1302; Training Accuracy: 97.8378%
	Validation Loss: 0.2473; Validation Accuracy: 92.26%

	Epoch: 104
	Training Loss: 0.1235; Training Accuracy: 97.9489%
	Validation Loss: 0.2448; Validation Accuracy: 92.42%

	Epoch: 105
reducing learning rate of group 0 to 1.0156e-04.
	Training Loss: 0.1192; Training Accuracy: 97.98%
	Validation Loss: 0.2453; Validation Accuracy: 92.1%

	Epoch: 106
	Training Loss: 0.1171; Training Accuracy: 98.1067%
	Validation Loss: 0.2531; Validation Accuracy: 92.34%

	Epoch: 107
reducing le

KeyboardInterrupt: 