In [None]:
# revise this. Make the current working directory to where the repository is (in your google drive)
%cd /content/drive/MyDrive/fall22_dl_mini_project-master

/content/drive/MyDrive/fall22_dl_mini_project-master


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn

import torchvision
import torchvision.transforms as transforms
from torchsummary import summary
from tqdm.notebook import tqdm
import os

In [None]:
from models import * 

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
summary(ResNet18().to(device), (3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]           1,728
       BatchNorm2d-2           [-1, 64, 32, 32]             128
            Conv2d-3           [-1, 64, 32, 32]          36,864
       BatchNorm2d-4           [-1, 64, 32, 32]             128
            Conv2d-5           [-1, 64, 32, 32]          36,864
       BatchNorm2d-6           [-1, 64, 32, 32]             128
        BasicBlock-7           [-1, 64, 32, 32]               0
            Conv2d-8           [-1, 64, 32, 32]          36,864
       BatchNorm2d-9           [-1, 64, 32, 32]             128
           Conv2d-10           [-1, 64, 32, 32]          36,864
      BatchNorm2d-11           [-1, 64, 32, 32]             128
       BasicBlock-12           [-1, 64, 32, 32]               0
           Conv2d-13          [-1, 128, 16, 16]          73,728
      BatchNorm2d-14          [-1, 128,

In [None]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

In [None]:
# I modified the ResNet-18 model by making all the number of channels to 1/2

class ModifiedResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ModifiedResNet, self).__init__()
        self.in_planes = 32

        self.conv1 = nn.Conv2d(3, 32, kernel_size=3,stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        self.layer1 = self._make_layer(block, 32, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 64, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 128, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 256, num_blocks[3], stride=2)
        self.linear = nn.Linear(256*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

In [None]:
modified_model = ModifiedResNet(BasicBlock, [2, 2, 2, 2])

In [None]:
summary(modified_model.to(device), (3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 32, 32]             864
       BatchNorm2d-2           [-1, 32, 32, 32]              64
            Conv2d-3           [-1, 32, 32, 32]           9,216
       BatchNorm2d-4           [-1, 32, 32, 32]              64
            Conv2d-5           [-1, 32, 32, 32]           9,216
       BatchNorm2d-6           [-1, 32, 32, 32]              64
        BasicBlock-7           [-1, 32, 32, 32]               0
            Conv2d-8           [-1, 32, 32, 32]           9,216
       BatchNorm2d-9           [-1, 32, 32, 32]              64
           Conv2d-10           [-1, 32, 32, 32]           9,216
      BatchNorm2d-11           [-1, 32, 32, 32]              64
       BasicBlock-12           [-1, 32, 32, 32]               0
           Conv2d-13           [-1, 64, 16, 16]          18,432
      BatchNorm2d-14           [-1, 64,

# Load data

In [None]:
import sklearn

In [None]:
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

In [None]:
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
validset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_test) # download the train set with test transform as the validation set
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
Files already downloaded and verified


In [None]:
model = ModifiedResNet(BasicBlock, [2, 2, 2, 2])
model = model.to(device)
if device == 'cuda':
    model = torch.nn.DataParallel(model)
    cudnn.benchmark = True

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

# training

In [None]:
import time

def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [None]:
# Training
def train(epoch, model, trainloader, optimizer):
    model.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in tqdm(enumerate(trainloader), total=len(trainloader)):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()
    
    return train_loss/(batch_idx+1), 100.*correct/total

In [None]:
def test(epoch, model, testloader, fold = None):
    global best_acc
    global patience
    model.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()


    # Save checkpoint.
    acc = 100.*correct/total
    if acc > best_acc:
        state = {'model': model.state_dict(),'acc': acc,'epoch': epoch}
        if not fold:
          torch.save(state, f'./checkpoint/trainAll_ckpt.pth')
        else:
          torch.save(state, f'./checkpoint/fold{fold}_ckpt.pth')
        best_acc = acc
        patience = 0
    else:
      patience += 1
    
    return test_loss/(batch_idx+1), acc

In [None]:
from sklearn.model_selection import KFold
from torch.utils.data import SubsetRandomSampler
import numpy as np

def train_from_scratch(N_EPOCHS, N_FOLD=5, lr=0.1, isTest=False):
  global best_acc
  global patience # to record how many epoches are not improving

  # make the directory for storing checkpoint
  if not os.path.isdir('checkpoint'):
      os.mkdir('checkpoint')

  # Do k-fold cross validation
  splits = KFold(n_splits = N_FOLD, shuffle = True)

  if isTest:
    dataset_len = 1000
  else:
    dataset_len = len(trainset)

  for fold, (train_idx,val_idx) in enumerate(splits.split(np.arange(dataset_len))):
    
    print('Fold {}'.format(fold + 1))
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(val_idx)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, sampler=train_sampler)
    validloader = torch.utils.data.DataLoader(validset, batch_size=100, sampler=valid_sampler)

    model = ModifiedResNet(BasicBlock, [2, 2, 2, 2])
    # model = ResNet18()
    model.to(device)
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=N_EPOCHS)
    patience = 0
    best_acc = 0
    for epoch in range(N_EPOCHS):
      start_time = time.time()
      if patience == 5:
        print(f"early stop at epoch {epoch}")
        break
      train_loss, train_acc = train(epoch, model, trainloader, optimizer)
      valid_loss, valid_acc = test(epoch, model, validloader, fold+1)
      scheduler.step()
      end_time = time.time()
      epoch_mins, epoch_secs = epoch_time(start_time, end_time)
      print(f'lr={scheduler.get_last_lr()}')
      print(f'Epoch: {epoch+1} | Epoch Time: {epoch_mins}m {epoch_secs}s')
      print(f"epoch{epoch+1} train loss: {train_loss} train acc: {train_acc} valid acc: {valid_acc}")

In [None]:
train_from_scratch(10, N_FOLD=5)

Fold 1


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.09755282581475769]
Epoch: 1 | Epoch Time: 0m 33s
epoch1 train loss: 1.7628140308605595 train acc: 34.325 valid acc: 45.41


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.09045084971874738]
Epoch: 2 | Epoch Time: 0m 29s
epoch2 train loss: 1.3506524882758388 train acc: 50.145 valid acc: 51.95


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.07938926261462367]
Epoch: 3 | Epoch Time: 0m 30s
epoch3 train loss: 1.0757486155619636 train acc: 61.41 valid acc: 55.05


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.06545084971874739]
Epoch: 4 | Epoch Time: 0m 29s
epoch4 train loss: 0.8772433028815273 train acc: 68.77 valid acc: 67.64


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.05000000000000001]
Epoch: 5 | Epoch Time: 0m 29s
epoch5 train loss: 0.723375819552059 train acc: 74.5625 valid acc: 72.1


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.03454915028125264]
Epoch: 6 | Epoch Time: 0m 30s
epoch6 train loss: 0.6102735917217815 train acc: 78.5925 valid acc: 75.44


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.020610737385376353]
Epoch: 7 | Epoch Time: 0m 30s
epoch7 train loss: 0.5299826905178947 train acc: 81.385 valid acc: 81.92


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.009549150281252635]
Epoch: 8 | Epoch Time: 0m 29s
epoch8 train loss: 0.4501736573517894 train acc: 84.2675 valid acc: 84.62


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.002447174185242324]
Epoch: 9 | Epoch Time: 0m 30s
epoch9 train loss: 0.38498420132615696 train acc: 86.6775 valid acc: 85.09


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.0]
Epoch: 10 | Epoch Time: 0m 30s
epoch10 train loss: 0.34395693156856316 train acc: 88.375 valid acc: 86.36
Fold 2


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.09755282581475769]
Epoch: 1 | Epoch Time: 0m 29s
epoch1 train loss: 1.7617464107446397 train acc: 36.04 valid acc: 43.18


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.09045084971874738]
Epoch: 2 | Epoch Time: 0m 29s
epoch2 train loss: 1.3091220992822616 train acc: 52.7 valid acc: 52.36


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.07938926261462367]
Epoch: 3 | Epoch Time: 0m 29s
epoch3 train loss: 1.05671590414291 train acc: 62.4 valid acc: 60.9


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.06545084971874739]
Epoch: 4 | Epoch Time: 0m 28s
epoch4 train loss: 0.8962872392072464 train acc: 68.1925 valid acc: 66.46


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.05000000000000001]
Epoch: 5 | Epoch Time: 0m 29s
epoch5 train loss: 0.7775554837891088 train acc: 72.4925 valid acc: 71.78


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.03454915028125264]
Epoch: 6 | Epoch Time: 0m 29s
epoch6 train loss: 0.6652085453557511 train acc: 76.7125 valid acc: 74.4


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.020610737385376353]
Epoch: 7 | Epoch Time: 0m 29s
epoch7 train loss: 0.569139801275235 train acc: 80.2025 valid acc: 78.77


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.009549150281252635]
Epoch: 8 | Epoch Time: 0m 29s
epoch8 train loss: 0.49377154437497783 train acc: 82.8 valid acc: 81.97


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.002447174185242324]
Epoch: 9 | Epoch Time: 0m 28s
epoch9 train loss: 0.41682610934534775 train acc: 85.5 valid acc: 83.79


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.0]
Epoch: 10 | Epoch Time: 0m 29s
epoch10 train loss: 0.3690868544216735 train acc: 87.1525 valid acc: 85.42
Fold 3


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.09755282581475769]
Epoch: 1 | Epoch Time: 0m 29s
epoch1 train loss: 1.8239854696078803 train acc: 32.88 valid acc: 43.62


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.09045084971874738]
Epoch: 2 | Epoch Time: 0m 28s
epoch2 train loss: 1.4151252306307467 train acc: 47.71 valid acc: 49.37


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.07938926261462367]
Epoch: 3 | Epoch Time: 0m 29s
epoch3 train loss: 1.148846036329056 train acc: 58.355 valid acc: 60.03


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.06545084971874739]
Epoch: 4 | Epoch Time: 0m 29s
epoch4 train loss: 0.9449186770679852 train acc: 66.3925 valid acc: 68.72


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.05000000000000001]
Epoch: 5 | Epoch Time: 0m 28s
epoch5 train loss: 0.8039142729375309 train acc: 71.435 valid acc: 68.74


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.03454915028125264]
Epoch: 6 | Epoch Time: 0m 29s
epoch6 train loss: 0.6765224644170401 train acc: 76.2825 valid acc: 72.8


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.020610737385376353]
Epoch: 7 | Epoch Time: 0m 29s
epoch7 train loss: 0.5782908522092496 train acc: 79.7525 valid acc: 71.7


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.009549150281252635]
Epoch: 8 | Epoch Time: 0m 28s
epoch8 train loss: 0.49697604232702774 train acc: 82.7175 valid acc: 81.37


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.002447174185242324]
Epoch: 9 | Epoch Time: 0m 29s
epoch9 train loss: 0.4240366367105478 train acc: 85.335 valid acc: 84.2


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.0]
Epoch: 10 | Epoch Time: 0m 29s
epoch10 train loss: 0.38119386341244266 train acc: 86.86 valid acc: 85.33
Fold 4


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.09755282581475769]
Epoch: 1 | Epoch Time: 0m 28s
epoch1 train loss: 1.7494914135613 train acc: 35.075 valid acc: 43.72


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.09045084971874738]
Epoch: 2 | Epoch Time: 0m 29s
epoch2 train loss: 1.2908345190480874 train acc: 53.1525 valid acc: 59.42


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.07938926261462367]
Epoch: 3 | Epoch Time: 0m 28s
epoch3 train loss: 1.0021615058850175 train acc: 64.3725 valid acc: 62.68


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.06545084971874739]
Epoch: 4 | Epoch Time: 0m 29s
epoch4 train loss: 0.8147693596328028 train acc: 71.15 valid acc: 69.17


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.05000000000000001]
Epoch: 5 | Epoch Time: 0m 29s
epoch5 train loss: 0.688376859354135 train acc: 75.94 valid acc: 76.19


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.03454915028125264]
Epoch: 6 | Epoch Time: 0m 28s
epoch6 train loss: 0.587529600237886 train acc: 79.72 valid acc: 74.55


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.020610737385376353]
Epoch: 7 | Epoch Time: 0m 29s
epoch7 train loss: 0.5041637711060314 train acc: 82.59 valid acc: 81.26


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.009549150281252635]
Epoch: 8 | Epoch Time: 0m 29s
epoch8 train loss: 0.4340601046435749 train acc: 85.0325 valid acc: 83.85


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.002447174185242324]
Epoch: 9 | Epoch Time: 0m 28s
epoch9 train loss: 0.37157255206435635 train acc: 87.3225 valid acc: 86.24


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.0]
Epoch: 10 | Epoch Time: 0m 29s
epoch10 train loss: 0.3239064333728327 train acc: 88.7875 valid acc: 86.92
Fold 5


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.09755282581475769]
Epoch: 1 | Epoch Time: 0m 28s
epoch1 train loss: 1.750738659224952 train acc: 34.835 valid acc: 42.46


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.09045084971874738]
Epoch: 2 | Epoch Time: 0m 29s
epoch2 train loss: 1.3060240016196862 train acc: 52.58 valid acc: 55.57


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.07938926261462367]
Epoch: 3 | Epoch Time: 0m 29s
epoch3 train loss: 1.0091057671144747 train acc: 64.1625 valid acc: 65.44


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.06545084971874739]
Epoch: 4 | Epoch Time: 0m 28s
epoch4 train loss: 0.8180359255391568 train acc: 71.135 valid acc: 71.71


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.05000000000000001]
Epoch: 5 | Epoch Time: 0m 29s
epoch5 train loss: 0.688487681337058 train acc: 76.0375 valid acc: 74.41


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.03454915028125264]
Epoch: 6 | Epoch Time: 0m 29s
epoch6 train loss: 0.5879492909192278 train acc: 79.4825 valid acc: 78.62


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.020610737385376353]
Epoch: 7 | Epoch Time: 0m 28s
epoch7 train loss: 0.5035790384006196 train acc: 82.375 valid acc: 80.11


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.009549150281252635]
Epoch: 8 | Epoch Time: 0m 29s
epoch8 train loss: 0.4345669173204099 train acc: 84.9875 valid acc: 83.68


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.002447174185242324]
Epoch: 9 | Epoch Time: 0m 29s
epoch9 train loss: 0.3716268232360054 train acc: 87.33 valid acc: 85.76


  0%|          | 0/313 [00:00<?, ?it/s]

lr=[0.0]
Epoch: 10 | Epoch Time: 0m 28s
epoch10 train loss: 0.32826473085453717 train acc: 88.7425 valid acc: 86.87


In [None]:
def train_all(N_EPOCHS, lr=0.1, N_patience=5):
  global best_acc
  global patience # to record how many epochs are not improving

  # make the directory for storing checkpoint
  if not os.path.isdir('checkpoint'):
      os.mkdir('checkpoint')

  trainloader = torch.utils.data.DataLoader(trainset, batch_size=128)
  testloader = torch.utils.data.DataLoader(testset, batch_size=100)

  model = ModifiedResNet(BasicBlock, [2, 2, 2, 2])
  # model = ResNet18()
  model.to(device)
  optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4)
  scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=N_EPOCHS)
  patience = 0
  best_acc = 0
  for epoch in range(N_EPOCHS):
    start_time = time.time()
    if patience == N_patience:
      print(f"early stop at epoch {epoch}")
      break
    train_loss, train_acc = train(epoch, model, trainloader, optimizer)
    test_loss, test_acc = test(epoch, model, testloader)
    scheduler.step()
    end_time = time.time()
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    print(f'lr={scheduler.get_last_lr()}')
    print(f'Epoch: {epoch+1} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f"epoch{epoch+1} train loss: {train_loss} train acc: {train_acc} test acc: {test_acc}")

In [None]:
train_all(20, lr=0.1, N_patience=5)

  0%|          | 0/391 [00:00<?, ?it/s]

lr=[0.0993844170297569]
Epoch: 1 | Epoch Time: 0m 35s
epoch1 train loss: 1.7290500518306138 train acc: 35.852 test acc: 46.2


  0%|          | 0/391 [00:00<?, ?it/s]

lr=[0.09755282581475769]
Epoch: 2 | Epoch Time: 0m 35s
epoch2 train loss: 1.2343519273621346 train acc: 55.192 test acc: 58.06


  0%|          | 0/391 [00:00<?, ?it/s]

lr=[0.0945503262094184]
Epoch: 3 | Epoch Time: 0m 35s
epoch3 train loss: 0.955575891925246 train acc: 65.788 test acc: 66.76


  0%|          | 0/391 [00:00<?, ?it/s]

lr=[0.0904508497187474]
Epoch: 4 | Epoch Time: 0m 34s
epoch4 train loss: 0.7835969999623116 train acc: 72.546 test acc: 73.5


  0%|          | 0/391 [00:00<?, ?it/s]

lr=[0.08535533905932739]
Epoch: 5 | Epoch Time: 0m 35s
epoch5 train loss: 0.6750704660592481 train acc: 76.436 test acc: 74.43


  0%|          | 0/391 [00:00<?, ?it/s]

lr=[0.07938926261462367]
Epoch: 6 | Epoch Time: 0m 35s
epoch6 train loss: 0.6050801133103383 train acc: 78.984 test acc: 70.92


  0%|          | 0/391 [00:00<?, ?it/s]

lr=[0.07269952498697735]
Epoch: 7 | Epoch Time: 0m 34s
epoch7 train loss: 0.5538519781721217 train acc: 80.856 test acc: 77.48


  0%|          | 0/391 [00:00<?, ?it/s]

lr=[0.06545084971874739]
Epoch: 8 | Epoch Time: 0m 34s
epoch8 train loss: 0.5108839621019485 train acc: 82.332 test acc: 75.34


  0%|          | 0/391 [00:00<?, ?it/s]

lr=[0.05782172325201156]
Epoch: 9 | Epoch Time: 0m 35s
epoch9 train loss: 0.4757981723379296 train acc: 83.556 test acc: 80.14


  0%|          | 0/391 [00:00<?, ?it/s]

lr=[0.05000000000000001]
Epoch: 10 | Epoch Time: 0m 35s
epoch10 train loss: 0.4370624296119451 train acc: 84.982 test acc: 81.2


  0%|          | 0/391 [00:00<?, ?it/s]

lr=[0.04217827674798848]
Epoch: 11 | Epoch Time: 0m 35s
epoch11 train loss: 0.3960897905747299 train acc: 86.32 test acc: 82.35


  0%|          | 0/391 [00:00<?, ?it/s]

lr=[0.03454915028125264]
Epoch: 12 | Epoch Time: 0m 35s
epoch12 train loss: 0.3630920758332743 train acc: 87.492 test acc: 81.29


  0%|          | 0/391 [00:00<?, ?it/s]

lr=[0.027300475013022667]
Epoch: 13 | Epoch Time: 0m 34s
epoch13 train loss: 0.32999657986261655 train acc: 88.66 test acc: 85.26


  0%|          | 0/391 [00:00<?, ?it/s]

lr=[0.020610737385376353]
Epoch: 14 | Epoch Time: 0m 35s
epoch14 train loss: 0.2876490096149542 train acc: 90.018 test acc: 85.87


  0%|          | 0/391 [00:00<?, ?it/s]

lr=[0.014644660940672629]
Epoch: 15 | Epoch Time: 0m 35s
epoch15 train loss: 0.25104796238567517 train acc: 91.36 test acc: 86.59


  0%|          | 0/391 [00:00<?, ?it/s]

lr=[0.009549150281252633]
Epoch: 16 | Epoch Time: 0m 35s
epoch16 train loss: 0.21269338351228964 train acc: 92.648 test acc: 88.45


  0%|          | 0/391 [00:00<?, ?it/s]

lr=[0.005449673790581611]
Epoch: 17 | Epoch Time: 0m 34s
epoch17 train loss: 0.1792018314075592 train acc: 93.908 test acc: 90.11


  0%|          | 0/391 [00:00<?, ?it/s]

lr=[0.0024471741852423235]
Epoch: 18 | Epoch Time: 0m 35s
epoch18 train loss: 0.1440093694135661 train acc: 95.134 test acc: 90.02


  0%|          | 0/391 [00:00<?, ?it/s]

lr=[0.0006155829702431171]
Epoch: 19 | Epoch Time: 0m 35s
epoch19 train loss: 0.12514921009083232 train acc: 95.872 test acc: 90.67


  0%|          | 0/391 [00:00<?, ?it/s]

lr=[0.0]
Epoch: 20 | Epoch Time: 0m 34s
epoch20 train loss: 0.11368617058143286 train acc: 96.294 test acc: 90.93
