In [1]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
import torchvision
import torchvision.transforms as transforms
import os
import argparse
from tqdm import tqdm
import sys
! pip install gdown
# https://drive.google.com/file/d/1TFF8h3j6KxIec7giLIarC1q0VBPBekh6/view?usp=sharing
# https://drive.google.com/file/d/1zFvAA_s-Yvt3_ogZ4RLmoWX7bsaOBW8B/view?usp=sharing
# 'https://drive.google.com/uc?export=download&id=1TFF8h3j6KxIec7giLIarC1q0VBPBekh6' ----
! gdown https://drive.google.com/uc?id=1zFvAA_s-Yvt3_ogZ4RLmoWX7bsaOBW8B
! gdown https://drive.google.com/uc?id=10sHvaXhTNZGz618QmD5gSOAjO3rMzV33
# ! gdown https://drive.google.com/file/d/1zFvAA_s-Yvt3_ogZ4RLmoWX7bsaOBW8B/view?usp=drive_link

Downloading...
From: https://drive.google.com/uc?id=1zFvAA_s-Yvt3_ogZ4RLmoWX7bsaOBW8B
To: /content/at_cifar10_resnet50_10class.pth
100% 94.4M/94.4M [00:00<00:00, 177MB/s]
Downloading...
From: https://drive.google.com/uc?id=10sHvaXhTNZGz618QmD5gSOAjO3rMzV33
To: /content/model_cifar_wrn.pt
100% 193M/193M [00:01<00:00, 100MB/s]


In [2]:

class attributes:
    def __init__(self):
        self.lr = 0.1
        self.lr_schedule = [100, 150]
        self.lr_factor = 0.1
        self.epochs = 200
        self.output = ''
        # self.model = 'MobileNetV2'
        self.model = 'WideResNet'
        # self.teacher_model = 'WideResNet' # 'ResNet50'
        self.teacher_model = 'ResNet50'
        self.teacher_path = "/content/at_cifar10_resnet50_10class.pth"
        # self.teacher_path = "/content/model_cifar_wrn.pt"
        self.temp = 30.0
        self.val_period = 1
        self.save_period = 1
        self.alpha = 1.0
        self.dataset = 'CIFAR10'

args = attributes()

device = 'cuda' if torch.cuda.is_available() else 'cpu'


In [3]:


def adjust_learning_rate(optimizer, epoch, lr):
    if epoch in args.lr_schedule:
        lr *= args.lr_factor
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr


print('==> Preparing data..')
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
])
transform_test = transforms.Compose([
    transforms.ToTensor(),
])
if args.dataset == 'CIFAR10':
    trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)
    testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
    testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)
    num_classes = 10
elif args.dataset == 'CIFAR100':
    trainset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform_train)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=256, shuffle=True, num_workers=2)
    testset = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform_test)
    testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)
    num_classes = 100

# num_classes = 1000

==> Preparing data..
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:02<00:00, 79970765.97it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [9]:

import math
import torch
import torch.nn as nn
import torch.nn.functional as F


class BasicBlock(nn.Module):
    def __init__(self, in_planes, out_planes, stride, dropRate=0.0):
        super(BasicBlock, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.relu1 = nn.ReLU(inplace=True)
        self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_planes)
        self.relu2 = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1,
                               padding=1, bias=False)
        self.droprate = dropRate
        self.equalInOut = (in_planes == out_planes)
        self.convShortcut = (not self.equalInOut) and nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride,
                               padding=0, bias=False) or None
    def forward(self, x):
        if not self.equalInOut:
            x = self.relu1(self.bn1(x))
        else:
            out = self.relu1(self.bn1(x))
        out = self.relu2(self.bn2(self.conv1(out if self.equalInOut else x)))
        if self.droprate > 0:
            out = F.dropout(out, p=self.droprate, training=self.training)
        out = self.conv2(out)
        return torch.add(x if self.equalInOut else self.convShortcut(x), out)

class NetworkBlock(nn.Module):
    def __init__(self, nb_layers, in_planes, out_planes, block, stride, dropRate=0.0):
        super(NetworkBlock, self).__init__()
        self.layer = self._make_layer(block, in_planes, out_planes, nb_layers, stride, dropRate)
    def _make_layer(self, block, in_planes, out_planes, nb_layers, stride, dropRate):
        layers = []
        for i in range(int(nb_layers)):
            layers.append(block(i == 0 and in_planes or out_planes, out_planes, i == 0 and stride or 1, dropRate))
        return nn.Sequential(*layers)
    def forward(self, x):
        return self.layer(x)


class WideResNet(nn.Module):
    def __init__(self, depth, num_classes, widen_factor=1, dropRate=0.0):
        super(WideResNet, self).__init__()
        nChannels = [16, 16*widen_factor, 32*widen_factor, 64*widen_factor]
        assert((depth - 4) % 6 == 0)
        n = (depth - 4) / 6
        block = BasicBlock
        # 1st conv before any network block
        self.conv1 = nn.Conv2d(3, nChannels[0], kernel_size=3, stride=1,
                               padding=1, bias=False)
        # 1st block
        self.block1 = NetworkBlock(n, nChannels[0], nChannels[1], block, 1, dropRate)
        # 2nd block
        self.block2 = NetworkBlock(n, nChannels[1], nChannels[2], block, 2, dropRate)
        # 3rd block
        self.block3 = NetworkBlock(n, nChannels[2], nChannels[3], block, 2, dropRate)
        # global average pooling and classifier
        self.bn1 = nn.BatchNorm2d(nChannels[3])
        self.relu = nn.ReLU(inplace=True)
        self.fc = nn.Linear(nChannels[3], num_classes)
        self.nChannels = nChannels[3]

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.bias.data.zero_()
    def forward(self, x):
        out = self.conv1(x)
        out = self.block1(out)
        out = self.block2(out)
        out = self.block3(out)
        out = self.relu(self.bn1(out))
        out = F.avg_pool2d(out, 8)
        out = out.view(-1, self.nChannels)
        return self.fc(out)

In [5]:


class Block(nn.Module):
    def __init__(self, in_planes, out_planes, expansion, stride):
        super(Block, self).__init__()
        self.stride = stride

        planes = expansion * in_planes
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, groups=planes, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn3 = nn.BatchNorm2d(out_planes)

        self.shortcut = nn.Sequential()
        if stride == 1 and in_planes != out_planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False),
                nn.BatchNorm2d(out_planes),
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out = out + self.shortcut(x) if self.stride==1 else out
        return out

class MobileNetV2(nn.Module):
    #(expansion, out_planes, num_blocks, stride)
    cfg = [(1,  16, 1, 1),
           (6,  24, 2, 1),
           (6,  32, 3, 2),
           (6,  64, 4, 2),
           (6,  96, 3, 1),
           (6, 160, 3, 2),
           (6, 320, 1, 1)]

    def __init__(self, num_classes=10):
        super(MobileNetV2, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        self.layers = self._make_layers(in_planes=32)
        self.conv2 = nn.Conv2d(320, 1280, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn2 = nn.BatchNorm2d(1280)
        self.linear = nn.Linear(1280, num_classes)

    def _make_layers(self, in_planes):
        layers = []
        for expansion, out_planes, num_blocks, stride in self.cfg:
            strides = [stride] + [1]*(num_blocks-1)
            for stride in strides:
                layers.append(Block(in_planes, out_planes, expansion, stride))
                in_planes = out_planes
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layers(out)
        out = F.relu(self.bn2(self.conv2(out)))
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


In [6]:

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

def ResNet18(num_classes=10):
    return ResNet(BasicBlock, [2,2,2,2], num_classes)

def ResNet34(num_classes=10):
    return ResNet(BasicBlock, [3,4,6,3], num_classes)

def ResNet50(num_classes=10):
    return ResNet(Bottleneck, [3,4,6,3], num_classes)

def ResNet101(num_classes=10):
    return ResNet(Bottleneck, [3,4,23,3], num_classes)

def ResNet152(num_classes=10):
    return ResNet(Bottleneck, [3,8,36,3], num_classes)


In [7]:
# !wget -O teacher.pth 'https://drive.google.com/uc?export=download&id=1TFF8h3j6KxIec7giLIarC1q0VBPBekh6'

In [10]:

class AttackPGD(nn.Module):
    def __init__(self, basic_net, config):
        super(AttackPGD, self).__init__()
        self.basic_net = basic_net
        self.step_size = config['step_size']
        self.epsilon = config['epsilon']
        self.num_steps = config['num_steps']

    def forward(self, inputs, targets):
        x = inputs.detach()
        x = x + torch.zeros_like(x).uniform_(-self.epsilon, self.epsilon)
        for i in range(self.num_steps):
            x.requires_grad_()
            with torch.enable_grad():
                loss = F.cross_entropy(self.basic_net(x), targets, size_average=False)
            grad = torch.autograd.grad(loss, [x])[0]
            x = x.detach() + self.step_size * torch.sign(grad.detach())
            x = torch.min(torch.max(x, inputs - self.epsilon), inputs + self.epsilon)
            x = torch.clamp(x, 0.0, 1.0)
        return self.basic_net(x), x


print('==> Building model..' + args.model)
class Net(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv2d(3, 6, 5)
    self.pool = nn.MaxPool2d(2, 2)
    self.conv2 = nn.Conv2d(6, 16, 5)
    self.fc1 = nn.Linear(16*5*5, 120)
    self.fc2 = nn.Linear(120, 84)
    self.fc3 = nn.Linear(84, 10)

  def forward(self, x):
    x = self.pool(F.relu(self.conv1(x)))
    x = self.pool(F.relu(self.conv2(x)))
    x = torch.flatten(x, 1)
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = self.fc3(x)
    return x



if args.model == 'MobileNetV2':
    basic_net = Net()
    # basic_net = MobileNetV2(num_classes=num_classes)
elif args.model == 'WideResNet':
    basic_net = WideResNet(depth = 40, num_classes=num_classes)
elif args.model == 'ResNet18':
    basic_net = ResNet18(num_classes=num_classes)
elif args.model == 'ResNet50':
    basic_net = ResNet50(num_classes=num_classes)
basic_net = basic_net.to(device)

if args.teacher_path != '':
    if args.teacher_model == 'MobileNetV2':
        teacher_net = MobileNetV2(num_classes=num_classes)
    elif args.teacher_model == 'WideResNet':
        teacher_net = WideResNet(depth = 16, num_classes=num_classes)

        # import torch
        # import torchvision.models as models
        # teacher_net = models.wide_resnet50_2(pretrained=False)

        # # Modify the output layer to match the number of classes in your pre-trained model
        # # num_classes = 10  # Change this to the number of classes in your pre-trained model
        # teacher_net.fc = nn.Linear(teacher_net.fc.in_features, 10)
        # # teacher_net = models.wide_resnet50_2(pretrained=True)
    elif args.teacher_model == 'ResNet18':
        teacher_net = ResNet18(num_classes=num_classes)
    elif args.teacher_model == 'ResNet50':
        teacher_net = torchvision.models.resnet50(num_classes=num_classes)
        # teacher_net = ResNet50(num_classes=num_classes)
    teacher_net = teacher_net.to(device)
    for param in teacher_net.parameters():
        param.requires_grad = False

config = {
    'epsilon': 8.0 / 255,
    'num_steps': 10,
    'step_size': 2.0 / 255,
}
net = AttackPGD(basic_net, config)
if device == 'cuda':
    cudnn.benchmark = True

print('==> Loading teacher..')

def filter_state_dict(state_dict):
    from collections import OrderedDict

    if 'state_dict' in state_dict.keys():
        state_dict = state_dict['state_dict']
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        if 'sub_block' in k:
            continue
        if 'module' in k:
            new_state_dict[k[7:]] = v
        else:
            new_state_dict[k] = v
    return new_state_dict


ckpt = filter_state_dict(torch.load(args.teacher_path, map_location=device))
# ckpt = torch.load(args.teacher_path, map_location=device)


# ckpt['fc.weight'] = torch.randn(10, 2048) # replace the weights with shape (10, 2048)

# ckpt['fc.bias'] = torch.randn(10) # replace the bias with shape (10)

teacher_net.load_state_dict(ckpt)
teacher_net.eval()


# def load_state_dict_with_module_prefix(model, state_dict):
#     new_state_dict = {k.replace("module.", ""): v for k, v in state_dict.items()}
#     model.load_state_dict(new_state_dict)
# load_state_dict_with_module_prefix(teacher_net, torch.load(args.teacher_path))

# teacher_net.load_state_dict(torch.load(args.teacher_path))
# teacher_net.eval() --> Needed?



KL_loss = nn.KLDivLoss()
XENT_loss = nn.CrossEntropyLoss()
lr = args.lr



==> Building model..WideResNet
==> Loading teacher..


In [None]:

def train(epoch, optimizer):
    net.train()
    train_loss = 0
    iterator = tqdm(trainloader, ncols=0, leave=False)
    for batch_idx, (inputs, targets) in enumerate(iterator):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs, pert_inputs = net(inputs, targets)
        teacher_outputs = teacher_net(inputs)
        basic_outputs = basic_net(inputs)
        loss = args.alpha * args.temp * args.temp * KL_loss(F.log_softmax(outputs / args.temp, dim=1),
                                                            F.softmax(teacher_outputs / args.temp, dim=1)) + (
                           1.0 - args.alpha) * XENT_loss(basic_outputs, targets)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        iterator.set_description(str(loss.item()))
        # break -> For very slow mode
    if (epoch + 1) % args.save_period == 0:
        state = {
            'net': basic_net.state_dict(),
            'optimizer': optimizer.state_dict()
        }
        if not os.path.isdir('checkpoint/' + args.dataset + '/' + args.output + '/'):
            os.makedirs('checkpoint/' + args.dataset + '/' + args.output + '/', )
        torch.save(state, './checkpoint/' + args.dataset + '/' + args.output + '/epoch=' + str(epoch) + '.t7')
    print('Mean Training Loss:', train_loss / len(iterator))
    return train_loss


def test(epoch, optimizer):
    net.eval()
    adv_correct = 0
    natural_correct = 0
    total = 0
    with torch.no_grad():
        iterator = tqdm(testloader, ncols=0, leave=False)
        for batch_idx, (inputs, targets) in enumerate(iterator):
            inputs, targets = inputs.to(device), targets.to(device)
            adv_outputs, pert_inputs = net(inputs, targets)
            natural_outputs = basic_net(inputs)
            _, adv_predicted = adv_outputs.max(1)
            _, natural_predicted = natural_outputs.max(1)
            natural_correct += natural_predicted.eq(targets).sum().item()
            total += targets.size(0)
            adv_correct += adv_predicted.eq(targets).sum().item()
            iterator.set_description(str(adv_predicted.eq(targets).sum().item() / targets.size(0)))
            # break -> For very slow mode
    robust_acc = 100. * adv_correct / total
    natural_acc = 100. * natural_correct / total
    print('Natural acc:', natural_acc)
    print('Robust acc:', robust_acc)
    return natural_acc, robust_acc


def main():
    lr = args.lr
    optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=2e-4)
    for epoch in range(args.epochs):
        print('-------------\nEpoch ' + str(epoch+1))
        adjust_learning_rate(optimizer, epoch, lr)
        train_loss = train(epoch, optimizer)
        if (epoch + 1) % args.val_period == 0:
            natural_val, robust_val = test(epoch, optimizer)


if __name__ == '__main__':
    main()

-------------
Epoch 1




Mean Training Loss: 0.014017142070805096




Natural acc: 12.6
Robust acc: 8.75
-------------
Epoch 2




Mean Training Loss: 0.004876106089014379




Natural acc: 15.24
Robust acc: 12.86
-------------
Epoch 3




Mean Training Loss: 0.003456072202976555




Natural acc: 15.75
Robust acc: 13.27
-------------
Epoch 4




Mean Training Loss: 0.0027455794791717207




Natural acc: 16.42
Robust acc: 13.89
-------------
Epoch 5




Mean Training Loss: 0.002424976905472005




Natural acc: 16.1
Robust acc: 13.63
-------------
Epoch 6




Mean Training Loss: 0.002228153694142371




Natural acc: 16.72
Robust acc: 13.61
-------------
Epoch 7




Mean Training Loss: 0.0020738773034823595




Natural acc: 17.16
Robust acc: 13.97
-------------
Epoch 8




Mean Training Loss: 0.001958879514161469




Natural acc: 16.9
Robust acc: 13.78
-------------
Epoch 9




Mean Training Loss: 0.001864460875432643




Natural acc: 17.92
Robust acc: 14.92
-------------
Epoch 10




Mean Training Loss: 0.001783291704755019




Natural acc: 17.42
Robust acc: 14.74
-------------
Epoch 11




Mean Training Loss: 0.0017050072601984452




Natural acc: 17.48
Robust acc: 14.67
-------------
Epoch 12




Mean Training Loss: 0.0016381235961752283




Natural acc: 17.73
Robust acc: 15.03
-------------
Epoch 13




Mean Training Loss: 0.0015844966024708223




Natural acc: 17.6
Robust acc: 14.85
-------------
Epoch 14




Mean Training Loss: 0.0015244766464814201




Natural acc: 17.9
Robust acc: 14.66
-------------
Epoch 15




Mean Training Loss: 0.0014894049864171831




Natural acc: 17.32
Robust acc: 14.6
-------------
Epoch 16




Mean Training Loss: 0.0014386716969561337




Natural acc: 18.39
Robust acc: 15.34
-------------
Epoch 17




Mean Training Loss: 0.001387752674888019




Natural acc: 18.26
Robust acc: 15.03
-------------
Epoch 18




Mean Training Loss: 0.0013532878707765655




Natural acc: 17.49
Robust acc: 14.78
-------------
Epoch 19




Mean Training Loss: 0.0013235905829964735




Natural acc: 18.1
Robust acc: 15.22
-------------
Epoch 20




Mean Training Loss: 0.0012775033700775326




Natural acc: 18.14
Robust acc: 15.37
-------------
Epoch 21




Mean Training Loss: 0.0012556984800311838




Natural acc: 17.9
Robust acc: 15.07
-------------
Epoch 22




Mean Training Loss: 0.0012433380659674401




Natural acc: 17.67
Robust acc: 15.34
-------------
Epoch 23




Mean Training Loss: 0.0011877083681676242




Natural acc: 18.8
Robust acc: 15.62
-------------
Epoch 24




Mean Training Loss: 0.0011766491324731322




Natural acc: 18.2
Robust acc: 15.27
-------------
Epoch 25




Mean Training Loss: 0.001165459053846948




Natural acc: 17.63
Robust acc: 14.83
-------------
Epoch 26




Mean Training Loss: 0.0011505247744293336




Natural acc: 17.95
Robust acc: 15.38
-------------
Epoch 27




Mean Training Loss: 0.0011270416388704974




Natural acc: 17.24
Robust acc: 15.16
-------------
Epoch 28




Mean Training Loss: 0.0011097753539269367




Natural acc: 18.26
Robust acc: 15.77
-------------
Epoch 29




Mean Training Loss: 0.0010914889556091383




Natural acc: 17.81
Robust acc: 15.4
-------------
Epoch 30




Mean Training Loss: 0.001098081876542848




Natural acc: 18.03
Robust acc: 15.58
-------------
Epoch 31




Mean Training Loss: 0.0010566991406118931




Natural acc: 17.67
Robust acc: 15.27
-------------
Epoch 32




Mean Training Loss: 0.0010620461381755082




Natural acc: 17.55
Robust acc: 15.59
-------------
Epoch 33




Mean Training Loss: 0.0010501975945406653




Natural acc: 18.18
Robust acc: 15.62
-------------
Epoch 34




Mean Training Loss: 0.0010332915956652283




Natural acc: 18.55
Robust acc: 15.88
-------------
Epoch 35




Mean Training Loss: 0.0010423168170508807




Natural acc: 17.82
Robust acc: 15.48
-------------
Epoch 36




Mean Training Loss: 0.0010239117699341795




Natural acc: 17.97
Robust acc: 15.71
-------------
Epoch 37




Mean Training Loss: 0.001001212786099471




Natural acc: 18.08
Robust acc: 15.51
-------------
Epoch 38




Mean Training Loss: 0.000999620466259644




Natural acc: 17.59
Robust acc: 15.58
-------------
Epoch 39




Mean Training Loss: 0.0009991629510674902




Natural acc: 18.39
Robust acc: 16.0
-------------
Epoch 40




Mean Training Loss: 0.0009925421150198774




Natural acc: 18.57
Robust acc: 16.11
-------------
Epoch 41




Mean Training Loss: 0.00098410914393018




Natural acc: 18.35
Robust acc: 15.85
-------------
Epoch 42




Mean Training Loss: 0.000988635162780028




Natural acc: 17.78
Robust acc: 15.72
-------------
Epoch 43




Mean Training Loss: 0.0009630066394755889




Natural acc: 18.35
Robust acc: 16.02
-------------
Epoch 44




Mean Training Loss: 0.0009782344534280984




Natural acc: 17.81
Robust acc: 15.48
-------------
Epoch 45




Mean Training Loss: 0.000949969607205285




Natural acc: 17.24
Robust acc: 14.5
-------------
Epoch 46




Mean Training Loss: 0.0009528140363562138




Natural acc: 18.3
Robust acc: 16.0
-------------
Epoch 47




Mean Training Loss: 0.0009506311913109039




Natural acc: 19.51
Robust acc: 16.31
-------------
Epoch 48




Mean Training Loss: 0.0009298083300063925




Natural acc: 17.42
Robust acc: 15.2
-------------
Epoch 49




Mean Training Loss: 0.0009547449958622646




Natural acc: 17.83
Robust acc: 15.53
-------------
Epoch 50




Mean Training Loss: 0.0009168230760556734




Natural acc: 18.44
Robust acc: 15.71
-------------
Epoch 51




Mean Training Loss: 0.0009204868533615204




Natural acc: 18.34
Robust acc: 15.91
-------------
Epoch 52




Mean Training Loss: 0.0009188859720828721




Natural acc: 17.62
Robust acc: 15.17
-------------
Epoch 53




Mean Training Loss: 0.0008953879652854503




Natural acc: 17.82
Robust acc: 15.72
-------------
Epoch 54




Mean Training Loss: 0.0009314484450349208




Natural acc: 18.17
Robust acc: 15.76
-------------
Epoch 55




Mean Training Loss: 0.0009008385992818572




Natural acc: 18.84
Robust acc: 16.14
-------------
Epoch 56




Mean Training Loss: 0.0008947571416211593




Natural acc: 18.96
Robust acc: 16.17
-------------
Epoch 57




Mean Training Loss: 0.0008834242218416994




Natural acc: 19.31
Robust acc: 16.35
-------------
Epoch 58




Mean Training Loss: 0.0008825830322668871




Natural acc: 18.15
Robust acc: 15.87
-------------
Epoch 59




Mean Training Loss: 0.0008883547138177392




Natural acc: 19.15
Robust acc: 16.42
-------------
Epoch 60




Mean Training Loss: 0.0008802293514585137




Natural acc: 19.18
Robust acc: 16.39
-------------
Epoch 61




Mean Training Loss: 0.0009004744231257864




Natural acc: 18.22
Robust acc: 15.77
-------------
Epoch 62




Mean Training Loss: 0.0008865455000261154




Natural acc: 18.85
Robust acc: 16.21
-------------
Epoch 63




Mean Training Loss: 0.000889559755312598




Natural acc: 18.57
Robust acc: 15.93
-------------
Epoch 64




Mean Training Loss: 0.0008587002841448483




Natural acc: 18.62
Robust acc: 15.62
-------------
Epoch 65




Mean Training Loss: 0.0008890550002119864


0.11:  86% 86/100 [00:18<00:03,  4.63it/s]