<a href="https://colab.research.google.com/github/mjmaher987/Robustness---CISPA/blob/main/Results/6_RN50_WRN15_0_0_122.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
import torchvision
import torchvision.transforms as transforms
import os
import argparse
from tqdm import tqdm
import sys
! pip install gdown
# https://drive.google.com/file/d/1TFF8h3j6KxIec7giLIarC1q0VBPBekh6/view?usp=sharing
# https://drive.google.com/file/d/1zFvAA_s-Yvt3_ogZ4RLmoWX7bsaOBW8B/view?usp=sharing
# 'https://drive.google.com/uc?export=download&id=1TFF8h3j6KxIec7giLIarC1q0VBPBekh6' ----
! gdown https://drive.google.com/uc?id=1zFvAA_s-Yvt3_ogZ4RLmoWX7bsaOBW8B
! gdown https://drive.google.com/uc?id=10sHvaXhTNZGz618QmD5gSOAjO3rMzV33
# ! gdown https://drive.google.com/file/d/1zFvAA_s-Yvt3_ogZ4RLmoWX7bsaOBW8B/view?usp=drive_link

Downloading...
From: https://drive.google.com/uc?id=1zFvAA_s-Yvt3_ogZ4RLmoWX7bsaOBW8B
To: /content/at_cifar10_resnet50_10class.pth
100% 94.4M/94.4M [00:02<00:00, 34.8MB/s]
Downloading...
From: https://drive.google.com/uc?id=10sHvaXhTNZGz618QmD5gSOAjO3rMzV33
To: /content/model_cifar_wrn.pt
100% 193M/193M [00:04<00:00, 44.1MB/s]


In [2]:

class attributes:
    def __init__(self):
        self.lr = 0.1
        self.lr_schedule = [100, 150]
        self.lr_factor = 0.1
        self.epochs = 200
        self.output = ''
        # self.model = 'MobileNetV2'
        self.model = 'WideResNet'
        # self.teacher_model = 'WideResNet' # 'ResNet50'
        self.teacher_model = 'ResNet50'
        self.teacher_path = "/content/at_cifar10_resnet50_10class.pth"
        # self.teacher_path = "/content/model_cifar_wrn.pt"
        self.temp = 30.0
        self.val_period = 1
        self.save_period = 1
        self.alpha = 0.0
        self.dataset = 'CIFAR10'

args = attributes()

device = 'cuda' if torch.cuda.is_available() else 'cpu'


In [3]:


def adjust_learning_rate(optimizer, epoch, lr):
    if epoch in args.lr_schedule:
        lr *= args.lr_factor
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr


print('==> Preparing data..')
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
])
transform_test = transforms.Compose([
    transforms.ToTensor(),
])
if args.dataset == 'CIFAR10':
    trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)
    testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
    testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)
    num_classes = 10
elif args.dataset == 'CIFAR100':
    trainset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform_train)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=256, shuffle=True, num_workers=2)
    testset = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform_test)
    testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)
    num_classes = 100

# num_classes = 1000

==> Preparing data..
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:20<00:00, 8493966.91it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [15]:

import math
import torch
import torch.nn as nn
import torch.nn.functional as F


class BasicBlock(nn.Module):
    def __init__(self, in_planes, out_planes, stride, dropRate=0.0):
        super(BasicBlock, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.relu1 = nn.ReLU(inplace=True)
        self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_planes)
        self.relu2 = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1,
                               padding=1, bias=False)
        self.droprate = dropRate
        self.equalInOut = (in_planes == out_planes)
        self.convShortcut = (not self.equalInOut) and nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride,
                               padding=0, bias=False) or None
    def forward(self, x):
        if not self.equalInOut:
            x = self.relu1(self.bn1(x))
        else:
            out = self.relu1(self.bn1(x))
        out = self.relu2(self.bn2(self.conv1(out if self.equalInOut else x)))
        if self.droprate > 0:
            out = F.dropout(out, p=self.droprate, training=self.training)
        out = self.conv2(out)
        return torch.add(x if self.equalInOut else self.convShortcut(x), out)

class NetworkBlock(nn.Module):
    def __init__(self, nb_layers, in_planes, out_planes, block, stride, dropRate=0.0):
        super(NetworkBlock, self).__init__()
        self.layer = self._make_layer(block, in_planes, out_planes, nb_layers, stride, dropRate)
    def _make_layer(self, block, in_planes, out_planes, nb_layers, stride, dropRate):
        layers = []
        for i in range(int(nb_layers)):
            layers.append(block(i == 0 and in_planes or out_planes, out_planes, i == 0 and stride or 1, dropRate))
        return nn.Sequential(*layers)
    def forward(self, x):
        return self.layer(x)


class WideResNet(nn.Module):
    def __init__(self, depth, num_classes, widen_factor=1, dropRate=0.0):
        super(WideResNet, self).__init__()
        nChannels = [16, 16*widen_factor, 32*widen_factor, 64*widen_factor]
        assert((depth - 4) % 6 == 0)
        n = (depth - 4) / 6
        block = BasicBlock
        # 1st conv before any network block
        self.conv1 = nn.Conv2d(3, nChannels[0], kernel_size=3, stride=1,
                               padding=1, bias=False)
        # 1st block
        self.block1 = NetworkBlock(n, nChannels[0], nChannels[1], block, 1, dropRate)
        # 2nd block
        self.block2 = NetworkBlock(n, nChannels[1], nChannels[2], block, 2, dropRate)
        # 3rd block
        self.block3 = NetworkBlock(n, nChannels[2], nChannels[3], block, 2, dropRate)
        # global average pooling and classifier
        self.bn1 = nn.BatchNorm2d(nChannels[3])
        self.relu = nn.ReLU(inplace=True)
        self.fc = nn.Linear(nChannels[3], num_classes)
        self.nChannels = nChannels[3]

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.bias.data.zero_()
    def forward(self, x):
        out = self.conv1(x)
        out = self.block1(out)
        out = self.block2(out)
        out = self.block3(out)
        out = self.relu(self.bn1(out))
        out = F.avg_pool2d(out, 8)
        out = out.view(-1, self.nChannels)
        return self.fc(out)

In [12]:


class Block(nn.Module):
    def __init__(self, in_planes, out_planes, expansion, stride):
        super(Block, self).__init__()
        self.stride = stride

        planes = expansion * in_planes
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, groups=planes, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn3 = nn.BatchNorm2d(out_planes)

        self.shortcut = nn.Sequential()
        if stride == 1 and in_planes != out_planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False),
                nn.BatchNorm2d(out_planes),
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out = out + self.shortcut(x) if self.stride==1 else out
        return out

class MobileNetV2(nn.Module):
    #(expansion, out_planes, num_blocks, stride)
    cfg = [(1,  16, 1, 1),
           (6,  24, 2, 1),
           (6,  32, 3, 2),
           (6,  64, 4, 2),
           (6,  96, 3, 1),
           (6, 160, 3, 2),
           (6, 320, 1, 1)]

    def __init__(self, num_classes=10):
        super(MobileNetV2, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        self.layers = self._make_layers(in_planes=32)
        self.conv2 = nn.Conv2d(320, 1280, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn2 = nn.BatchNorm2d(1280)
        self.linear = nn.Linear(1280, num_classes)

    def _make_layers(self, in_planes):
        layers = []
        for expansion, out_planes, num_blocks, stride in self.cfg:
            strides = [stride] + [1]*(num_blocks-1)
            for stride in strides:
                layers.append(Block(in_planes, out_planes, expansion, stride))
                in_planes = out_planes
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layers(out)
        out = F.relu(self.bn2(self.conv2(out)))
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


In [13]:

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

def ResNet18(num_classes=10):
    return ResNet(BasicBlock, [2,2,2,2], num_classes)

def ResNet34(num_classes=10):
    return ResNet(BasicBlock, [3,4,6,3], num_classes)

def ResNet50(num_classes=10):
    return ResNet(Bottleneck, [3,4,6,3], num_classes)

def ResNet101(num_classes=10):
    return ResNet(Bottleneck, [3,4,23,3], num_classes)

def ResNet152(num_classes=10):
    return ResNet(Bottleneck, [3,8,36,3], num_classes)


In [7]:
# !wget -O teacher.pth 'https://drive.google.com/uc?export=download&id=1TFF8h3j6KxIec7giLIarC1q0VBPBekh6'

In [16]:

class AttackPGD(nn.Module):
    def __init__(self, basic_net, config):
        super(AttackPGD, self).__init__()
        self.basic_net = basic_net
        self.step_size = config['step_size']
        self.epsilon = config['epsilon']
        self.num_steps = config['num_steps']

    def forward(self, inputs, targets):
        x = inputs.detach()
        x = x + torch.zeros_like(x).uniform_(-self.epsilon, self.epsilon)
        for i in range(self.num_steps):
            x.requires_grad_()
            with torch.enable_grad():
                loss = F.cross_entropy(self.basic_net(x), targets, size_average=False)
            grad = torch.autograd.grad(loss, [x])[0]
            x = x.detach() + self.step_size * torch.sign(grad.detach())
            x = torch.min(torch.max(x, inputs - self.epsilon), inputs + self.epsilon)
            x = torch.clamp(x, 0.0, 1.0)
        return self.basic_net(x), x


print('==> Building model..' + args.model)
class Net(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv2d(3, 6, 5)
    self.pool = nn.MaxPool2d(2, 2)
    self.conv2 = nn.Conv2d(6, 16, 5)
    self.fc1 = nn.Linear(16*5*5, 120)
    self.fc2 = nn.Linear(120, 84)
    self.fc3 = nn.Linear(84, 10)

  def forward(self, x):
    x = self.pool(F.relu(self.conv1(x)))
    x = self.pool(F.relu(self.conv2(x)))
    x = torch.flatten(x, 1)
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = self.fc3(x)
    return x



if args.model == 'MobileNetV2':
    basic_net = Net()
    # basic_net = MobileNetV2(num_classes=num_classes)
elif args.model == 'WideResNet':
    basic_net = WideResNet(depth = 16, num_classes=num_classes)
elif args.model == 'ResNet18':
    basic_net = ResNet18(num_classes=num_classes)
elif args.model == 'ResNet50':
    basic_net = ResNet50(num_classes=num_classes)
basic_net = basic_net.to(device)

if args.teacher_path != '':
    if args.teacher_model == 'MobileNetV2':
        teacher_net = MobileNetV2(num_classes=num_classes)
    elif args.teacher_model == 'WideResNet':
        teacher_net = WideResNet(depth = 16, num_classes=num_classes)

        # import torch
        # import torchvision.models as models
        # teacher_net = models.wide_resnet50_2(pretrained=False)

        # # Modify the output layer to match the number of classes in your pre-trained model
        # # num_classes = 10  # Change this to the number of classes in your pre-trained model
        # teacher_net.fc = nn.Linear(teacher_net.fc.in_features, 10)
        # # teacher_net = models.wide_resnet50_2(pretrained=True)
    elif args.teacher_model == 'ResNet18':
        teacher_net = ResNet18(num_classes=num_classes)
    elif args.teacher_model == 'ResNet50':
        teacher_net = torchvision.models.resnet50(num_classes=num_classes)
        # teacher_net = ResNet50(num_classes=num_classes)
    teacher_net = teacher_net.to(device)
    for param in teacher_net.parameters():
        param.requires_grad = False

config = {
    'epsilon': 8.0 / 255,
    'num_steps': 10,
    'step_size': 2.0 / 255,
}
net = AttackPGD(basic_net, config)
if device == 'cuda':
    cudnn.benchmark = True

print('==> Loading teacher..')

def filter_state_dict(state_dict):
    from collections import OrderedDict

    if 'state_dict' in state_dict.keys():
        state_dict = state_dict['state_dict']
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        if 'sub_block' in k:
            continue
        if 'module' in k:
            new_state_dict[k[7:]] = v
        else:
            new_state_dict[k] = v
    return new_state_dict


ckpt = filter_state_dict(torch.load(args.teacher_path, map_location=device))
# ckpt = torch.load(args.teacher_path, map_location=device)


# ckpt['fc.weight'] = torch.randn(10, 2048) # replace the weights with shape (10, 2048)

# ckpt['fc.bias'] = torch.randn(10) # replace the bias with shape (10)

teacher_net.load_state_dict(ckpt)
teacher_net.eval()


# def load_state_dict_with_module_prefix(model, state_dict):
#     new_state_dict = {k.replace("module.", ""): v for k, v in state_dict.items()}
#     model.load_state_dict(new_state_dict)
# load_state_dict_with_module_prefix(teacher_net, torch.load(args.teacher_path))

# teacher_net.load_state_dict(torch.load(args.teacher_path))
# teacher_net.eval() --> Needed?



KL_loss = nn.KLDivLoss()
XENT_loss = nn.CrossEntropyLoss()
lr = args.lr



==> Building model..WideResNet
==> Loading teacher..


In [None]:

def train(epoch, optimizer):
    net.train()
    train_loss = 0
    iterator = tqdm(trainloader, ncols=0, leave=False)
    for batch_idx, (inputs, targets) in enumerate(iterator):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs, pert_inputs = net(inputs, targets)
        teacher_outputs = teacher_net(inputs)
        basic_outputs = basic_net(inputs)
        loss = args.alpha * args.temp * args.temp * KL_loss(F.log_softmax(outputs / args.temp, dim=1),
                                                            F.softmax(teacher_outputs / args.temp, dim=1)) + (
                           1.0 - args.alpha) * XENT_loss(basic_outputs, targets)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        iterator.set_description(str(loss.item()))
        # break -> For very slow mode
    if (epoch + 1) % args.save_period == 0:
        state = {
            'net': basic_net.state_dict(),
            'optimizer': optimizer.state_dict()
        }
        if not os.path.isdir('checkpoint/' + args.dataset + '/' + args.output + '/'):
            os.makedirs('checkpoint/' + args.dataset + '/' + args.output + '/', )
        torch.save(state, './checkpoint/' + args.dataset + '/' + args.output + '/epoch=' + str(epoch) + '.t7')
    print('Mean Training Loss:', train_loss / len(iterator))
    return train_loss


def test(epoch, optimizer):
    net.eval()
    adv_correct = 0
    natural_correct = 0
    total = 0
    with torch.no_grad():
        iterator = tqdm(testloader, ncols=0, leave=False)
        for batch_idx, (inputs, targets) in enumerate(iterator):
            inputs, targets = inputs.to(device), targets.to(device)
            adv_outputs, pert_inputs = net(inputs, targets)
            natural_outputs = basic_net(inputs)
            _, adv_predicted = adv_outputs.max(1)
            _, natural_predicted = natural_outputs.max(1)
            natural_correct += natural_predicted.eq(targets).sum().item()
            total += targets.size(0)
            adv_correct += adv_predicted.eq(targets).sum().item()
            iterator.set_description(str(adv_predicted.eq(targets).sum().item() / targets.size(0)))
            # break -> For very slow mode
    robust_acc = 100. * adv_correct / total
    natural_acc = 100. * natural_correct / total
    print('Natural acc:', natural_acc)
    print('Robust acc:', robust_acc)
    return natural_acc, robust_acc


def main():
    lr = args.lr
    optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=2e-4)
    for epoch in range(args.epochs):
        print('-------------\nEpoch ' + str(epoch+1))
        adjust_learning_rate(optimizer, epoch, lr)
        train_loss = train(epoch, optimizer)
        if (epoch + 1) % args.val_period == 0:
            natural_val, robust_val = test(epoch, optimizer)


if __name__ == '__main__':
    main()

-------------
Epoch 1




Mean Training Loss: 1.5871289523361285




Natural acc: 52.98
Robust acc: 0.26
-------------
Epoch 2




Mean Training Loss: 1.1127746109767338




Natural acc: 64.04
Robust acc: 0.01
-------------
Epoch 3




Mean Training Loss: 0.9134805237545687




Natural acc: 66.19
Robust acc: 0.0
-------------
Epoch 4




Mean Training Loss: 0.7714042584304615




Natural acc: 67.69
Robust acc: 0.0
-------------
Epoch 5




Mean Training Loss: 0.6900339075518996




Natural acc: 73.03
Robust acc: 0.0
-------------
Epoch 6




Mean Training Loss: 0.6353313650011712




Natural acc: 76.91
Robust acc: 0.0
-------------
Epoch 7




Mean Training Loss: 0.6031362163593702




Natural acc: 76.05
Robust acc: 0.0
-------------
Epoch 8




Mean Training Loss: 0.5730319107737383




Natural acc: 77.14
Robust acc: 0.0
-------------
Epoch 9




Mean Training Loss: 0.5498811883084914




Natural acc: 77.07
Robust acc: 0.0
-------------
Epoch 10




Mean Training Loss: 0.5271365840721618




Natural acc: 76.02
Robust acc: 0.0
-------------
Epoch 11




Mean Training Loss: 0.518496197965139




Natural acc: 77.35
Robust acc: 0.0
-------------
Epoch 12




Mean Training Loss: 0.49922709178436747




Natural acc: 79.32
Robust acc: 0.0
-------------
Epoch 13




Mean Training Loss: 0.49065080826239815




Natural acc: 77.33
Robust acc: 0.0
-------------
Epoch 14




Mean Training Loss: 0.48364503540651266




Natural acc: 80.28
Robust acc: 0.0
-------------
Epoch 15




Mean Training Loss: 0.4726551050877632




Natural acc: 77.21
Robust acc: 0.0
-------------
Epoch 16




Mean Training Loss: 0.4635038610614474




Natural acc: 80.08
Robust acc: 0.0
-------------
Epoch 17




Mean Training Loss: 0.45601487380769246




Natural acc: 78.75
Robust acc: 0.0
-------------
Epoch 18




Mean Training Loss: 0.44855231618332436




Natural acc: 80.33
Robust acc: 0.0
-------------
Epoch 19




Mean Training Loss: 0.441673560901676




Natural acc: 79.45
Robust acc: 0.0
-------------
Epoch 20




Mean Training Loss: 0.43502370144247704




Natural acc: 82.08
Robust acc: 0.0
-------------
Epoch 21




Mean Training Loss: 0.4286865146873552




Natural acc: 83.86
Robust acc: 0.0
-------------
Epoch 22




Mean Training Loss: 0.42303850633256573




Natural acc: 80.72
Robust acc: 0.0
-------------
Epoch 23




Mean Training Loss: 0.42569258427985796




Natural acc: 78.49
Robust acc: 0.0
-------------
Epoch 24




Mean Training Loss: 0.41802169100555314




Natural acc: 81.02
Robust acc: 0.0
-------------
Epoch 25




Mean Training Loss: 0.41229550738621246




Natural acc: 80.15
Robust acc: 0.0
-------------
Epoch 26




Mean Training Loss: 0.4125982751626798




Natural acc: 80.97
Robust acc: 0.0
-------------
Epoch 27




Mean Training Loss: 0.41105984658231515




Natural acc: 79.62
Robust acc: 0.0
-------------
Epoch 28




Mean Training Loss: 0.40506708923050816




Natural acc: 83.04
Robust acc: 0.0
-------------
Epoch 29




Mean Training Loss: 0.39811688299526643




Natural acc: 81.84
Robust acc: 0.0
-------------
Epoch 30




Mean Training Loss: 0.39391012798489816




Natural acc: 81.04
Robust acc: 0.0
-------------
Epoch 31




Mean Training Loss: 0.39420065977384366




Natural acc: 82.5
Robust acc: 0.0
-------------
Epoch 32




Mean Training Loss: 0.39196638233216524




Natural acc: 82.52
Robust acc: 0.0
-------------
Epoch 33




Mean Training Loss: 0.3913151188503446




Natural acc: 78.26
Robust acc: 0.0
-------------
Epoch 34




Mean Training Loss: 0.3863184584299927




Natural acc: 81.12
Robust acc: 0.0
-------------
Epoch 35




Mean Training Loss: 0.3847869981051711




Natural acc: 81.26
Robust acc: 0.0
-------------
Epoch 36




Mean Training Loss: 0.38362829135659404




Natural acc: 81.9
Robust acc: 0.0
-------------
Epoch 37




Mean Training Loss: 0.3870020403581507




Natural acc: 80.99
Robust acc: 0.0
-------------
Epoch 38




Mean Training Loss: 0.3795069264024115




Natural acc: 80.09
Robust acc: 0.0
-------------
Epoch 39




Mean Training Loss: 0.3736890699628674




Natural acc: 81.72
Robust acc: 0.0
-------------
Epoch 40




Mean Training Loss: 0.3745156636704569




Natural acc: 79.05
Robust acc: 0.0
-------------
Epoch 41




Mean Training Loss: 0.3741631463665487




Natural acc: 80.09
Robust acc: 0.0
-------------
Epoch 42




Mean Training Loss: 0.3693707105525009




Natural acc: 82.24
Robust acc: 0.0
-------------
Epoch 43




Mean Training Loss: 0.36819381756550823




Natural acc: 82.17
Robust acc: 0.0
-------------
Epoch 44




Mean Training Loss: 0.3678544362259033




Natural acc: 80.57
Robust acc: 0.0
-------------
Epoch 45




Mean Training Loss: 0.36734872080785846




Natural acc: 81.05
Robust acc: 0.0
-------------
Epoch 46




Mean Training Loss: 0.3748761158236457




Natural acc: 80.11
Robust acc: 0.0
-------------
Epoch 47




Mean Training Loss: 0.3646485611148503




Natural acc: 81.66
Robust acc: 0.0
-------------
Epoch 48




Mean Training Loss: 0.3634407435308027




Natural acc: 84.11
Robust acc: 0.0
-------------
Epoch 49




Mean Training Loss: 0.36699231334811894




Natural acc: 83.57
Robust acc: 0.0
-------------
Epoch 50




Mean Training Loss: 0.3611353646839976




Natural acc: 81.69
Robust acc: 0.0
-------------
Epoch 51




Mean Training Loss: 0.35896983956132095




Natural acc: 80.37
Robust acc: 0.01
-------------
Epoch 52




Mean Training Loss: 0.3572814995446778




Natural acc: 80.68
Robust acc: 0.0
-------------
Epoch 53




Mean Training Loss: 0.3599410337560317




Natural acc: 82.96
Robust acc: 0.0
-------------
Epoch 54




Mean Training Loss: 0.3555088382014228




Natural acc: 83.72
Robust acc: 0.0
-------------
Epoch 55




Mean Training Loss: 0.3581762403783286




Natural acc: 84.15
Robust acc: 0.0
-------------
Epoch 56




Mean Training Loss: 0.3521904473185844




Natural acc: 82.13
Robust acc: 0.0
-------------
Epoch 57




Mean Training Loss: 0.35903042051798245




Natural acc: 84.71
Robust acc: 0.0
-------------
Epoch 58




Mean Training Loss: 0.35215436234651015




Natural acc: 83.79
Robust acc: 0.0
-------------
Epoch 59




Mean Training Loss: 0.3547048617506881




Natural acc: 78.28
Robust acc: 0.0
-------------
Epoch 60




Mean Training Loss: 0.3541179869485938




Natural acc: 81.87
Robust acc: 0.0
-------------
Epoch 61




Mean Training Loss: 0.3508726568783031




Natural acc: 84.18
Robust acc: 0.0
-------------
Epoch 62




Mean Training Loss: 0.3468696886240064




Natural acc: 82.13
Robust acc: 0.0
-------------
Epoch 63




Mean Training Loss: 0.35217969470164356




Natural acc: 82.86
Robust acc: 0.0
-------------
Epoch 64




Mean Training Loss: 0.3485375282252231




Natural acc: 82.07
Robust acc: 0.0
-------------
Epoch 65




Mean Training Loss: 0.34166733074523603




Natural acc: 83.59
Robust acc: 0.0
-------------
Epoch 66




Mean Training Loss: 0.34619860423495397




Natural acc: 81.05
Robust acc: 0.0
-------------
Epoch 67




Mean Training Loss: 0.3455742358246728




Natural acc: 83.29
Robust acc: 0.0
-------------
Epoch 68




Mean Training Loss: 0.34512941604075226




Natural acc: 80.07
Robust acc: 0.01
-------------
Epoch 69




Mean Training Loss: 0.34429999545712




Natural acc: 83.34
Robust acc: 0.0
-------------
Epoch 70




Mean Training Loss: 0.3435027025392293




Natural acc: 85.08
Robust acc: 0.0
-------------
Epoch 71




Mean Training Loss: 0.3406984365123617




Natural acc: 81.56
Robust acc: 0.0
-------------
Epoch 72




Mean Training Loss: 0.3431444259936852




Natural acc: 81.41
Robust acc: 0.0
-------------
Epoch 73




Mean Training Loss: 0.33708706273294775




Natural acc: 84.13
Robust acc: 0.0
-------------
Epoch 74




Mean Training Loss: 0.3408983852857214




Natural acc: 82.93
Robust acc: 0.0
-------------
Epoch 75




Mean Training Loss: 0.3363275784818108




Natural acc: 84.9
Robust acc: 0.0
-------------
Epoch 76




Mean Training Loss: 0.3380407506928724




Natural acc: 83.23
Robust acc: 0.0
-------------
Epoch 77




Mean Training Loss: 0.34301735190174465




Natural acc: 83.56
Robust acc: 0.0
-------------
Epoch 78




Mean Training Loss: 0.3380873731868651




Natural acc: 83.06
Robust acc: 0.0
-------------
Epoch 79




Mean Training Loss: 0.3346279309609967




Natural acc: 82.41
Robust acc: 0.0
-------------
Epoch 80




Mean Training Loss: 0.3384955292543792




Natural acc: 82.96
Robust acc: 0.0
-------------
Epoch 81




Mean Training Loss: 0.33617280819989226




Natural acc: 80.83
Robust acc: 0.0
-------------
Epoch 82




Mean Training Loss: 0.3365940722967962




Natural acc: 82.92
Robust acc: 0.0
-------------
Epoch 83




Mean Training Loss: 0.33359003493852935




Natural acc: 80.54
Robust acc: 0.0
-------------
Epoch 84




Mean Training Loss: 0.33569882284192476




Natural acc: 85.25
Robust acc: 0.0
-------------
Epoch 85




Mean Training Loss: 0.33040888603690943




Natural acc: 85.47
Robust acc: 0.0
-------------
Epoch 86




Mean Training Loss: 0.33738842408370484




Natural acc: 82.72
Robust acc: 0.0
-------------
Epoch 87




Mean Training Loss: 0.33198075655781095




Natural acc: 82.72
Robust acc: 0.0
-------------
Epoch 88




Mean Training Loss: 0.3292308912024169




Natural acc: 85.85
Robust acc: 0.0
-------------
Epoch 89




Mean Training Loss: 0.33435611480184835




Natural acc: 84.42
Robust acc: 0.0
-------------
Epoch 90




Mean Training Loss: 0.3349230867212691




Natural acc: 84.41
Robust acc: 0.0
-------------
Epoch 91




Mean Training Loss: 0.33117391573041294




Natural acc: 84.46
Robust acc: 0.0
-------------
Epoch 92




Mean Training Loss: 0.3342707613697442




Natural acc: 83.5
Robust acc: 0.0
-------------
Epoch 93




Mean Training Loss: 0.3317137854483426




Natural acc: 79.48
Robust acc: 0.0
-------------
Epoch 94




Mean Training Loss: 0.33228771293254766




Natural acc: 84.0
Robust acc: 0.0
-------------
Epoch 95




Mean Training Loss: 0.3300990406280893




Natural acc: 83.99
Robust acc: 0.0
-------------
Epoch 96




Mean Training Loss: 0.3288556438730196




Natural acc: 84.86
Robust acc: 0.0
-------------
Epoch 97




Mean Training Loss: 0.33191928686693195




Natural acc: 85.68
Robust acc: 0.0
-------------
Epoch 98




Mean Training Loss: 0.32539288661516536




Natural acc: 84.21
Robust acc: 0.0
-------------
Epoch 99




Mean Training Loss: 0.33009151664688763




Natural acc: 84.02
Robust acc: 0.0
-------------
Epoch 100




Mean Training Loss: 0.32785630332844334




Natural acc: 85.92
Robust acc: 0.0
-------------
Epoch 101




Mean Training Loss: 0.22236077131136603




Natural acc: 86.69
Robust acc: 0.0
-------------
Epoch 102




Mean Training Loss: 0.18968363864647458




Natural acc: 88.24
Robust acc: 0.0
-------------
Epoch 103




Mean Training Loss: 0.17584804700844733




Natural acc: 88.8
Robust acc: 0.0
-------------
Epoch 104




Mean Training Loss: 0.17300908052174332




Natural acc: 89.38
Robust acc: 0.0
-------------
Epoch 105




Mean Training Loss: 0.16271137364227753




Natural acc: 88.55
Robust acc: 0.0
-------------
Epoch 106




Mean Training Loss: 0.15618601354682232




Natural acc: 87.76
Robust acc: 0.0
-------------
Epoch 107




Mean Training Loss: 0.1556776395386747




Natural acc: 88.49
Robust acc: 0.0
-------------
Epoch 108




Mean Training Loss: 0.14898837875107976




Natural acc: 88.93
Robust acc: 0.0
-------------
Epoch 109




Mean Training Loss: 0.1465577545678219




Natural acc: 87.38
Robust acc: 0.0
-------------
Epoch 110




Mean Training Loss: 0.14131921552636129




Natural acc: 86.22
Robust acc: 0.0
-------------
Epoch 111




Mean Training Loss: 0.13883402458656474




Natural acc: 87.45
Robust acc: 0.0
-------------
Epoch 112




Mean Training Loss: 0.13738007635792807




Natural acc: 87.81
Robust acc: 0.0
-------------
Epoch 113




Mean Training Loss: 0.1327359032486101




Natural acc: 88.74
Robust acc: 0.0
-------------
Epoch 114




Mean Training Loss: 0.13407987571509597




Natural acc: 87.99
Robust acc: 0.0
-------------
Epoch 115




Mean Training Loss: 0.12673861994539076




Natural acc: 87.15
Robust acc: 0.0
-------------
Epoch 116




Mean Training Loss: 0.129294604796659




Natural acc: 88.97
Robust acc: 0.0
-------------
Epoch 117




Mean Training Loss: 0.12859445348229553




Natural acc: 87.97
Robust acc: 0.0
-------------
Epoch 118




Mean Training Loss: 0.1263803664852134




Natural acc: 88.04
Robust acc: 0.0
-------------
Epoch 119




Mean Training Loss: 0.1231783668860755




Natural acc: 86.96
Robust acc: 0.0
-------------
Epoch 120




Mean Training Loss: 0.122706909480569




Natural acc: 87.95
Robust acc: 0.0
-------------
Epoch 121




Mean Training Loss: 0.11892824854387347




Natural acc: 87.24
Robust acc: 0.0
-------------
Epoch 122


0.0965048149228096:  67% 261/391 [00:42<00:20,  6.34it/s]