<a href="https://colab.research.google.com/github/mjmaher987/Robustness---CISPA/blob/main/Results/6-RN50_WRN15_1_0_137.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
import torchvision
import torchvision.transforms as transforms
import os
import argparse
from tqdm import tqdm
import sys
! pip install gdown
# https://drive.google.com/file/d/1TFF8h3j6KxIec7giLIarC1q0VBPBekh6/view?usp=sharing
# https://drive.google.com/file/d/1zFvAA_s-Yvt3_ogZ4RLmoWX7bsaOBW8B/view?usp=sharing
# 'https://drive.google.com/uc?export=download&id=1TFF8h3j6KxIec7giLIarC1q0VBPBekh6' ----
! gdown https://drive.google.com/uc?id=1zFvAA_s-Yvt3_ogZ4RLmoWX7bsaOBW8B
! gdown https://drive.google.com/uc?id=10sHvaXhTNZGz618QmD5gSOAjO3rMzV33
# ! gdown https://drive.google.com/file/d/1zFvAA_s-Yvt3_ogZ4RLmoWX7bsaOBW8B/view?usp=drive_link

Downloading...
From: https://drive.google.com/uc?id=1zFvAA_s-Yvt3_ogZ4RLmoWX7bsaOBW8B
To: /content/at_cifar10_resnet50_10class.pth
100% 94.4M/94.4M [00:00<00:00, 163MB/s]
Downloading...
From: https://drive.google.com/uc?id=10sHvaXhTNZGz618QmD5gSOAjO3rMzV33
To: /content/model_cifar_wrn.pt
100% 193M/193M [00:01<00:00, 155MB/s]


In [27]:

class attributes:
    def __init__(self):
        self.lr = 0.1
        self.lr_schedule = [100, 150]
        self.lr_factor = 0.1
        self.epochs = 200
        self.output = ''
        # self.model = 'MobileNetV2'
        self.model = 'WideResNet'
        # self.teacher_model = 'WideResNet' # 'ResNet50'
        self.teacher_model = 'ResNet50'
        self.teacher_path = "/content/at_cifar10_resnet50_10class.pth"
        # self.teacher_path = "/content/model_cifar_wrn.pt"
        self.temp = 30.0
        self.val_period = 1
        self.save_period = 1
        self.alpha = 1.0
        self.dataset = 'CIFAR10'

args = attributes()

device = 'cuda' if torch.cuda.is_available() else 'cpu'


In [11]:


def adjust_learning_rate(optimizer, epoch, lr):
    if epoch in args.lr_schedule:
        lr *= args.lr_factor
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr


print('==> Preparing data..')
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
])
transform_test = transforms.Compose([
    transforms.ToTensor(),
])
if args.dataset == 'CIFAR10':
    trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)
    testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
    testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)
    num_classes = 10
elif args.dataset == 'CIFAR100':
    trainset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform_train)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=256, shuffle=True, num_workers=2)
    testset = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform_test)
    testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)
    num_classes = 100

# num_classes = 1000

==> Preparing data..
Files already downloaded and verified
Files already downloaded and verified


In [22]:

import math
import torch
import torch.nn as nn
import torch.nn.functional as F


class BasicBlock(nn.Module):
    def __init__(self, in_planes, out_planes, stride, dropRate=0.0):
        super(BasicBlock, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.relu1 = nn.ReLU(inplace=True)
        self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_planes)
        self.relu2 = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1,
                               padding=1, bias=False)
        self.droprate = dropRate
        self.equalInOut = (in_planes == out_planes)
        self.convShortcut = (not self.equalInOut) and nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride,
                               padding=0, bias=False) or None
    def forward(self, x):
        if not self.equalInOut:
            x = self.relu1(self.bn1(x))
        else:
            out = self.relu1(self.bn1(x))
        out = self.relu2(self.bn2(self.conv1(out if self.equalInOut else x)))
        if self.droprate > 0:
            out = F.dropout(out, p=self.droprate, training=self.training)
        out = self.conv2(out)
        return torch.add(x if self.equalInOut else self.convShortcut(x), out)

class NetworkBlock(nn.Module):
    def __init__(self, nb_layers, in_planes, out_planes, block, stride, dropRate=0.0):
        super(NetworkBlock, self).__init__()
        self.layer = self._make_layer(block, in_planes, out_planes, nb_layers, stride, dropRate)
    def _make_layer(self, block, in_planes, out_planes, nb_layers, stride, dropRate):
        layers = []
        for i in range(int(nb_layers)):
            layers.append(block(i == 0 and in_planes or out_planes, out_planes, i == 0 and stride or 1, dropRate))
        return nn.Sequential(*layers)
    def forward(self, x):
        return self.layer(x)


class WideResNet(nn.Module):
    def __init__(self, depth, num_classes, widen_factor=1, dropRate=0.0):
        super(WideResNet, self).__init__()
        nChannels = [16, 16*widen_factor, 32*widen_factor, 64*widen_factor]
        assert((depth - 4) % 6 == 0)
        n = (depth - 4) / 6
        block = BasicBlock
        # 1st conv before any network block
        self.conv1 = nn.Conv2d(3, nChannels[0], kernel_size=3, stride=1,
                               padding=1, bias=False)
        # 1st block
        self.block1 = NetworkBlock(n, nChannels[0], nChannels[1], block, 1, dropRate)
        # 2nd block
        self.block2 = NetworkBlock(n, nChannels[1], nChannels[2], block, 2, dropRate)
        # 3rd block
        self.block3 = NetworkBlock(n, nChannels[2], nChannels[3], block, 2, dropRate)
        # global average pooling and classifier
        self.bn1 = nn.BatchNorm2d(nChannels[3])
        self.relu = nn.ReLU(inplace=True)
        self.fc = nn.Linear(nChannels[3], num_classes)
        self.nChannels = nChannels[3]

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.bias.data.zero_()
    def forward(self, x):
        out = self.conv1(x)
        out = self.block1(out)
        out = self.block2(out)
        out = self.block3(out)
        out = self.relu(self.bn1(out))
        out = F.avg_pool2d(out, 8)
        out = out.view(-1, self.nChannels)
        return self.fc(out)

In [5]:


class Block(nn.Module):
    def __init__(self, in_planes, out_planes, expansion, stride):
        super(Block, self).__init__()
        self.stride = stride

        planes = expansion * in_planes
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, groups=planes, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn3 = nn.BatchNorm2d(out_planes)

        self.shortcut = nn.Sequential()
        if stride == 1 and in_planes != out_planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False),
                nn.BatchNorm2d(out_planes),
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out = out + self.shortcut(x) if self.stride==1 else out
        return out

class MobileNetV2(nn.Module):
    #(expansion, out_planes, num_blocks, stride)
    cfg = [(1,  16, 1, 1),
           (6,  24, 2, 1),
           (6,  32, 3, 2),
           (6,  64, 4, 2),
           (6,  96, 3, 1),
           (6, 160, 3, 2),
           (6, 320, 1, 1)]

    def __init__(self, num_classes=10):
        super(MobileNetV2, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        self.layers = self._make_layers(in_planes=32)
        self.conv2 = nn.Conv2d(320, 1280, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn2 = nn.BatchNorm2d(1280)
        self.linear = nn.Linear(1280, num_classes)

    def _make_layers(self, in_planes):
        layers = []
        for expansion, out_planes, num_blocks, stride in self.cfg:
            strides = [stride] + [1]*(num_blocks-1)
            for stride in strides:
                layers.append(Block(in_planes, out_planes, expansion, stride))
                in_planes = out_planes
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layers(out)
        out = F.relu(self.bn2(self.conv2(out)))
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


In [6]:

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

def ResNet18(num_classes=10):
    return ResNet(BasicBlock, [2,2,2,2], num_classes)

def ResNet34(num_classes=10):
    return ResNet(BasicBlock, [3,4,6,3], num_classes)

def ResNet50(num_classes=10):
    return ResNet(Bottleneck, [3,4,6,3], num_classes)

def ResNet101(num_classes=10):
    return ResNet(Bottleneck, [3,4,23,3], num_classes)

def ResNet152(num_classes=10):
    return ResNet(Bottleneck, [3,8,36,3], num_classes)


In [None]:
# !wget -O teacher.pth 'https://drive.google.com/uc?export=download&id=1TFF8h3j6KxIec7giLIarC1q0VBPBekh6'

In [30]:

class AttackPGD(nn.Module):
    def __init__(self, basic_net, config):
        super(AttackPGD, self).__init__()
        self.basic_net = basic_net
        self.step_size = config['step_size']
        self.epsilon = config['epsilon']
        self.num_steps = config['num_steps']

    def forward(self, inputs, targets):
        x = inputs.detach()
        x = x + torch.zeros_like(x).uniform_(-self.epsilon, self.epsilon)
        for i in range(self.num_steps):
            x.requires_grad_()
            with torch.enable_grad():
                loss = F.cross_entropy(self.basic_net(x), targets, size_average=False)
            grad = torch.autograd.grad(loss, [x])[0]
            x = x.detach() + self.step_size * torch.sign(grad.detach())
            x = torch.min(torch.max(x, inputs - self.epsilon), inputs + self.epsilon)
            x = torch.clamp(x, 0.0, 1.0)
        return self.basic_net(x), x


print('==> Building model..' + args.model)
class Net(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv2d(3, 6, 5)
    self.pool = nn.MaxPool2d(2, 2)
    self.conv2 = nn.Conv2d(6, 16, 5)
    self.fc1 = nn.Linear(16*5*5, 120)
    self.fc2 = nn.Linear(120, 84)
    self.fc3 = nn.Linear(84, 10)

  def forward(self, x):
    x = self.pool(F.relu(self.conv1(x)))
    x = self.pool(F.relu(self.conv2(x)))
    x = torch.flatten(x, 1)
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = self.fc3(x)
    return x



if args.model == 'MobileNetV2':
    basic_net = Net()
    # basic_net = MobileNetV2(num_classes=num_classes)
elif args.model == 'WideResNet':
    basic_net = WideResNet(depth = 16, num_classes=num_classes)
elif args.model == 'ResNet18':
    basic_net = ResNet18(num_classes=num_classes)
elif args.model == 'ResNet50':
    basic_net = ResNet50(num_classes=num_classes)
basic_net = basic_net.to(device)

if args.teacher_path != '':
    if args.teacher_model == 'MobileNetV2':
        teacher_net = MobileNetV2(num_classes=num_classes)
    elif args.teacher_model == 'WideResNet':
        teacher_net = WideResNet(depth = 16, num_classes=num_classes)

        # import torch
        # import torchvision.models as models
        # teacher_net = models.wide_resnet50_2(pretrained=False)

        # # Modify the output layer to match the number of classes in your pre-trained model
        # # num_classes = 10  # Change this to the number of classes in your pre-trained model
        # teacher_net.fc = nn.Linear(teacher_net.fc.in_features, 10)
        # # teacher_net = models.wide_resnet50_2(pretrained=True)
    elif args.teacher_model == 'ResNet18':
        teacher_net = ResNet18(num_classes=num_classes)
    elif args.teacher_model == 'ResNet50':
        teacher_net = torchvision.models.resnet50(num_classes=num_classes)
        # teacher_net = ResNet50(num_classes=num_classes)
    teacher_net = teacher_net.to(device)
    for param in teacher_net.parameters():
        param.requires_grad = False

config = {
    'epsilon': 8.0 / 255,
    'num_steps': 10,
    'step_size': 2.0 / 255,
}
net = AttackPGD(basic_net, config)
if device == 'cuda':
    cudnn.benchmark = True

print('==> Loading teacher..')

def filter_state_dict(state_dict):
    from collections import OrderedDict

    if 'state_dict' in state_dict.keys():
        state_dict = state_dict['state_dict']
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        if 'sub_block' in k:
            continue
        if 'module' in k:
            new_state_dict[k[7:]] = v
        else:
            new_state_dict[k] = v
    return new_state_dict


ckpt = filter_state_dict(torch.load(args.teacher_path, map_location=device))
# ckpt = torch.load(args.teacher_path, map_location=device)


# ckpt['fc.weight'] = torch.randn(10, 2048) # replace the weights with shape (10, 2048)

# ckpt['fc.bias'] = torch.randn(10) # replace the bias with shape (10)

teacher_net.load_state_dict(ckpt)
teacher_net.eval()


# def load_state_dict_with_module_prefix(model, state_dict):
#     new_state_dict = {k.replace("module.", ""): v for k, v in state_dict.items()}
#     model.load_state_dict(new_state_dict)
# load_state_dict_with_module_prefix(teacher_net, torch.load(args.teacher_path))

# teacher_net.load_state_dict(torch.load(args.teacher_path))
# teacher_net.eval() --> Needed?



KL_loss = nn.KLDivLoss()
XENT_loss = nn.CrossEntropyLoss()
lr = args.lr



==> Building model..WideResNet
==> Loading teacher..


In [None]:

def train(epoch, optimizer):
    net.train()
    train_loss = 0
    iterator = tqdm(trainloader, ncols=0, leave=False)
    for batch_idx, (inputs, targets) in enumerate(iterator):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs, pert_inputs = net(inputs, targets)
        teacher_outputs = teacher_net(inputs)
        basic_outputs = basic_net(inputs)
        loss = args.alpha * args.temp * args.temp * KL_loss(F.log_softmax(outputs / args.temp, dim=1),
                                                            F.softmax(teacher_outputs / args.temp, dim=1)) + (
                           1.0 - args.alpha) * XENT_loss(basic_outputs, targets)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        iterator.set_description(str(loss.item()))
        # break -> For very slow mode
    if (epoch + 1) % args.save_period == 0:
        state = {
            'net': basic_net.state_dict(),
            'optimizer': optimizer.state_dict()
        }
        if not os.path.isdir('checkpoint/' + args.dataset + '/' + args.output + '/'):
            os.makedirs('checkpoint/' + args.dataset + '/' + args.output + '/', )
        torch.save(state, './checkpoint/' + args.dataset + '/' + args.output + '/epoch=' + str(epoch) + '.t7')
    print('Mean Training Loss:', train_loss / len(iterator))
    return train_loss


def test(epoch, optimizer):
    net.eval()
    adv_correct = 0
    natural_correct = 0
    total = 0
    with torch.no_grad():
        iterator = tqdm(testloader, ncols=0, leave=False)
        for batch_idx, (inputs, targets) in enumerate(iterator):
            inputs, targets = inputs.to(device), targets.to(device)
            adv_outputs, pert_inputs = net(inputs, targets)
            natural_outputs = basic_net(inputs)
            _, adv_predicted = adv_outputs.max(1)
            _, natural_predicted = natural_outputs.max(1)
            natural_correct += natural_predicted.eq(targets).sum().item()
            total += targets.size(0)
            adv_correct += adv_predicted.eq(targets).sum().item()
            iterator.set_description(str(adv_predicted.eq(targets).sum().item() / targets.size(0)))
            # break -> For very slow mode
    robust_acc = 100. * adv_correct / total
    natural_acc = 100. * natural_correct / total
    print('Natural acc:', natural_acc)
    print('Robust acc:', robust_acc)
    return natural_acc, robust_acc


def main():
    lr = args.lr
    optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=2e-4)
    for epoch in range(args.epochs):
        print('-------------\nEpoch ' + str(epoch+1))
        adjust_learning_rate(optimizer, epoch, lr)
        train_loss = train(epoch, optimizer)
        if (epoch + 1) % args.val_period == 0:
            natural_val, robust_val = test(epoch, optimizer)


if __name__ == '__main__':
    main()

-------------
Epoch 1




Mean Training Loss: 0.012314749682736595




Natural acc: 14.53
Robust acc: 11.62
-------------
Epoch 2




Mean Training Loss: 0.004500479947732728




Natural acc: 16.16
Robust acc: 13.51
-------------
Epoch 3




Mean Training Loss: 0.0035320859839496634




Natural acc: 15.85
Robust acc: 13.32
-------------
Epoch 4




Mean Training Loss: 0.0029683608086565343




Natural acc: 16.18
Robust acc: 13.95
-------------
Epoch 5




Mean Training Loss: 0.0026407942284241586




Natural acc: 16.34
Robust acc: 13.94
-------------
Epoch 6




Mean Training Loss: 0.002427580075331337




Natural acc: 15.6
Robust acc: 12.92
-------------
Epoch 7




Mean Training Loss: 0.002256157323884804




Natural acc: 17.35
Robust acc: 14.51
-------------
Epoch 8




Mean Training Loss: 0.0021319116517553665




Natural acc: 16.08
Robust acc: 13.59
-------------
Epoch 9




Mean Training Loss: 0.002037923971233923




Natural acc: 17.13
Robust acc: 14.18
-------------
Epoch 10




Mean Training Loss: 0.001961964759570749




Natural acc: 16.81
Robust acc: 14.0
-------------
Epoch 11




Mean Training Loss: 0.0018942180426691272




Natural acc: 17.33
Robust acc: 14.5
-------------
Epoch 12




Mean Training Loss: 0.001811395203063498




Natural acc: 17.22
Robust acc: 14.81
-------------
Epoch 13




Mean Training Loss: 0.0017539690846524885




Natural acc: 17.78
Robust acc: 14.79
-------------
Epoch 14




Mean Training Loss: 0.0017185667586272291




Natural acc: 17.41
Robust acc: 14.62
-------------
Epoch 15




Mean Training Loss: 0.0016534082401577202




Natural acc: 17.8
Robust acc: 14.53
-------------
Epoch 16




Mean Training Loss: 0.0016069516689633317




Natural acc: 16.9
Robust acc: 14.02
-------------
Epoch 17




Mean Training Loss: 0.0015509148373070848




Natural acc: 18.06
Robust acc: 15.09
-------------
Epoch 18




Mean Training Loss: 0.0015426244788572115




Natural acc: 18.37
Robust acc: 15.25
-------------
Epoch 19




Mean Training Loss: 0.0015060949732389902




Natural acc: 18.47
Robust acc: 14.85
-------------
Epoch 20




Mean Training Loss: 0.0014537389229690118




Natural acc: 18.02
Robust acc: 15.38
-------------
Epoch 21




Mean Training Loss: 0.0014024229063426175




Natural acc: 18.08
Robust acc: 15.15
-------------
Epoch 22




Mean Training Loss: 0.0013744747135168908




Natural acc: 18.42
Robust acc: 15.63
-------------
Epoch 23




Mean Training Loss: 0.0013606176066124226




Natural acc: 18.13
Robust acc: 15.18
-------------
Epoch 24




Mean Training Loss: 0.0013162601032637327




Natural acc: 17.76
Robust acc: 14.61
-------------
Epoch 25




Mean Training Loss: 0.0012904203716008103




Natural acc: 17.45
Robust acc: 14.39
-------------
Epoch 26




Mean Training Loss: 0.001283359247893381




Natural acc: 18.01
Robust acc: 14.69
-------------
Epoch 27




Mean Training Loss: 0.001245969584595193




Natural acc: 17.79
Robust acc: 14.82
-------------
Epoch 28




Mean Training Loss: 0.0012122314644243824




Natural acc: 18.36
Robust acc: 15.29
-------------
Epoch 29




Mean Training Loss: 0.001197346146730587




Natural acc: 18.31
Robust acc: 15.46
-------------
Epoch 30




Mean Training Loss: 0.0011770865742100492




Natural acc: 18.73
Robust acc: 15.86
-------------
Epoch 31




Mean Training Loss: 0.0011514847034640857




Natural acc: 18.14
Robust acc: 15.37
-------------
Epoch 32




Mean Training Loss: 0.0011473862326863552




Natural acc: 18.03
Robust acc: 15.38
-------------
Epoch 33




Mean Training Loss: 0.0011513507287815937




Natural acc: 17.89
Robust acc: 15.37
-------------
Epoch 34




Mean Training Loss: 0.0011335855858731548




Natural acc: 17.71
Robust acc: 15.29
-------------
Epoch 35




Mean Training Loss: 0.0010980906654530398




Natural acc: 18.01
Robust acc: 15.21
-------------
Epoch 36




Mean Training Loss: 0.0010960470147245108




Natural acc: 18.18
Robust acc: 15.4
-------------
Epoch 37




Mean Training Loss: 0.0011029725096693447




Natural acc: 17.6
Robust acc: 15.26
-------------
Epoch 38




Mean Training Loss: 0.0010764831220052297




Natural acc: 18.5
Robust acc: 15.91
-------------
Epoch 39




Mean Training Loss: 0.0010884811609264112




Natural acc: 18.87
Robust acc: 16.01
-------------
Epoch 40




Mean Training Loss: 0.0010746816063628478




Natural acc: 18.42
Robust acc: 15.79
-------------
Epoch 41




Mean Training Loss: 0.0010744586675975692




Natural acc: 17.8
Robust acc: 15.45
-------------
Epoch 42




Mean Training Loss: 0.0010579264741521948




Natural acc: 17.84
Robust acc: 14.82
-------------
Epoch 43




Mean Training Loss: 0.0010361638503111042




Natural acc: 17.91
Robust acc: 15.39
-------------
Epoch 44




Mean Training Loss: 0.0010358571486376092




Natural acc: 18.48
Robust acc: 15.83
-------------
Epoch 45




Mean Training Loss: 0.001015802541517836




Natural acc: 17.96
Robust acc: 15.31
-------------
Epoch 46




Mean Training Loss: 0.0010302600736939408




Natural acc: 18.59
Robust acc: 15.76
-------------
Epoch 47




Mean Training Loss: 0.0010213864962165446




Natural acc: 18.08
Robust acc: 15.44
-------------
Epoch 48




Mean Training Loss: 0.0010064429206930845




Natural acc: 17.74
Robust acc: 15.16
-------------
Epoch 49




Mean Training Loss: 0.00101935805302099




Natural acc: 18.52
Robust acc: 15.77
-------------
Epoch 50




Mean Training Loss: 0.00101626860683479




Natural acc: 17.64
Robust acc: 15.0
-------------
Epoch 51




Mean Training Loss: 0.0010175973719135974




Natural acc: 17.36
Robust acc: 15.12
-------------
Epoch 52




Mean Training Loss: 0.0009980936894369553




Natural acc: 17.8
Robust acc: 15.44
-------------
Epoch 53




Mean Training Loss: 0.0009827350371676828




Natural acc: 17.97
Robust acc: 15.33
-------------
Epoch 54




Mean Training Loss: 0.0009848396783596967




Natural acc: 17.89
Robust acc: 14.95
-------------
Epoch 55




Mean Training Loss: 0.0009912760261936908




Natural acc: 17.76
Robust acc: 15.4
-------------
Epoch 56




Mean Training Loss: 0.0009673181846809319




Natural acc: 18.4
Robust acc: 15.64
-------------
Epoch 57




Mean Training Loss: 0.0009898588545453708




Natural acc: 17.95
Robust acc: 15.62
-------------
Epoch 58




Mean Training Loss: 0.000983194341017243




Natural acc: 17.76
Robust acc: 15.32
-------------
Epoch 59




Mean Training Loss: 0.0009678790068594486




Natural acc: 17.79
Robust acc: 15.35
-------------
Epoch 60




Mean Training Loss: 0.0009832321258280855




Natural acc: 18.65
Robust acc: 15.88
-------------
Epoch 61




Mean Training Loss: 0.0009839531018451103




Natural acc: 18.17
Robust acc: 15.65
-------------
Epoch 62




Mean Training Loss: 0.0009558082994697687




Natural acc: 18.84
Robust acc: 15.87
-------------
Epoch 63




Mean Training Loss: 0.0009602356307289523




Natural acc: 17.08
Robust acc: 14.86
-------------
Epoch 64




Mean Training Loss: 0.0009697773154405758




Natural acc: 17.73
Robust acc: 15.5
-------------
Epoch 65




Mean Training Loss: 0.0009516667849455706




Natural acc: 18.75
Robust acc: 15.91
-------------
Epoch 66




Mean Training Loss: 0.0009707082869828014




Natural acc: 18.39
Robust acc: 15.95
-------------
Epoch 67




Mean Training Loss: 0.000953183900636366




Natural acc: 18.77
Robust acc: 16.13
-------------
Epoch 68




Mean Training Loss: 0.000936264823496942




Natural acc: 18.57
Robust acc: 16.11
-------------
Epoch 69




Mean Training Loss: 0.0009694720345699345




Natural acc: 18.26
Robust acc: 15.69
-------------
Epoch 70




Mean Training Loss: 0.0009750318257947979




Natural acc: 18.34
Robust acc: 15.98
-------------
Epoch 71




Mean Training Loss: 0.0009459947581853136




Natural acc: 18.61
Robust acc: 15.89
-------------
Epoch 72




Mean Training Loss: 0.000946520677650981




Natural acc: 18.17
Robust acc: 15.99
-------------
Epoch 73




Mean Training Loss: 0.000945328013992885




Natural acc: 17.64
Robust acc: 15.42
-------------
Epoch 74




Mean Training Loss: 0.0009468394814564101




Natural acc: 18.29
Robust acc: 15.87
-------------
Epoch 75




Mean Training Loss: 0.0009380098354295277




Natural acc: 18.4
Robust acc: 15.82
-------------
Epoch 76




Mean Training Loss: 0.0009446561344258506




Natural acc: 18.31
Robust acc: 15.51
-------------
Epoch 77




Mean Training Loss: 0.0009401047989652227




Natural acc: 18.04
Robust acc: 15.62
-------------
Epoch 78




Mean Training Loss: 0.0009425935136568744




Natural acc: 18.14
Robust acc: 15.46
-------------
Epoch 79




Mean Training Loss: 0.000947173043509083




Natural acc: 17.71
Robust acc: 15.56
-------------
Epoch 80




Mean Training Loss: 0.0009321497185775043




Natural acc: 18.97
Robust acc: 16.11
-------------
Epoch 81




Mean Training Loss: 0.0009523069766192885




Natural acc: 17.98
Robust acc: 15.77
-------------
Epoch 82




Mean Training Loss: 0.000930586094965639




Natural acc: 19.44
Robust acc: 16.44
-------------
Epoch 83




Mean Training Loss: 0.000927172789988501




Natural acc: 17.18
Robust acc: 14.8
-------------
Epoch 84




Mean Training Loss: 0.0009472090395136506




Natural acc: 18.27
Robust acc: 15.98
-------------
Epoch 85




Mean Training Loss: 0.0009446374141870786




Natural acc: 18.47
Robust acc: 15.83
-------------
Epoch 86




Mean Training Loss: 0.0009427406172187108




Natural acc: 17.94
Robust acc: 15.49
-------------
Epoch 87




Mean Training Loss: 0.0009264862447705529




Natural acc: 17.52
Robust acc: 15.26
-------------
Epoch 88




Mean Training Loss: 0.0009306678737518485




Natural acc: 17.77
Robust acc: 15.67
-------------
Epoch 89




Mean Training Loss: 0.0009132989745079766




Natural acc: 18.08
Robust acc: 15.46
-------------
Epoch 90




Mean Training Loss: 0.0009274225547442884




Natural acc: 17.97
Robust acc: 15.38
-------------
Epoch 91




Mean Training Loss: 0.0009294424360544156




Natural acc: 18.63
Robust acc: 15.99
-------------
Epoch 92




Mean Training Loss: 0.0009181221046120576




Natural acc: 18.2
Robust acc: 15.66
-------------
Epoch 93




Mean Training Loss: 0.0009190510287630799




Natural acc: 18.11
Robust acc: 16.08
-------------
Epoch 94




Mean Training Loss: 0.0009189620429454633




Natural acc: 18.64
Robust acc: 16.06
-------------
Epoch 95




Mean Training Loss: 0.0009305288336630387




Natural acc: 17.77
Robust acc: 15.35
-------------
Epoch 96




Mean Training Loss: 0.0009003900267812602




Natural acc: 18.87
Robust acc: 15.93
-------------
Epoch 97




Mean Training Loss: 0.000923567573460119




Natural acc: 18.29
Robust acc: 15.81
-------------
Epoch 98




Mean Training Loss: 0.0009246952575690988




Natural acc: 17.53
Robust acc: 15.01
-------------
Epoch 99




Mean Training Loss: 0.0009062418660870694




Natural acc: 18.39
Robust acc: 15.79
-------------
Epoch 100




Mean Training Loss: 0.0009324781574628047




Natural acc: 18.28
Robust acc: 15.75
-------------
Epoch 101




Mean Training Loss: 0.000786594747889625




Natural acc: 18.27
Robust acc: 16.0
-------------
Epoch 102




Mean Training Loss: 0.000775626752480312




Natural acc: 18.39
Robust acc: 15.93
-------------
Epoch 103




Mean Training Loss: 0.0007690348444492235




Natural acc: 17.98
Robust acc: 15.8
-------------
Epoch 104




Mean Training Loss: 0.0007610421098085106




Natural acc: 18.45
Robust acc: 15.95
-------------
Epoch 105




Mean Training Loss: 0.0007666662940398202




Natural acc: 17.7
Robust acc: 15.38
-------------
Epoch 106




Mean Training Loss: 0.0007614528282743205




Natural acc: 18.6
Robust acc: 15.94
-------------
Epoch 107




Mean Training Loss: 0.0007536070772072733




Natural acc: 18.48
Robust acc: 15.97
-------------
Epoch 108




Mean Training Loss: 0.0007602459431895058




Natural acc: 18.3
Robust acc: 15.67
-------------
Epoch 109




Mean Training Loss: 0.0007539628742886779




Natural acc: 18.22
Robust acc: 15.81
-------------
Epoch 110




Mean Training Loss: 0.0007610862821285777




Natural acc: 18.86
Robust acc: 16.04
-------------
Epoch 111




Mean Training Loss: 0.0007453641492177915




Natural acc: 18.67
Robust acc: 16.02
-------------
Epoch 112




Mean Training Loss: 0.0007548791255451777




Natural acc: 17.97
Robust acc: 15.71
-------------
Epoch 113




Mean Training Loss: 0.0007552806338023804




Natural acc: 18.53
Robust acc: 15.93
-------------
Epoch 114




Mean Training Loss: 0.0007437630310890448




Natural acc: 18.45
Robust acc: 15.86
-------------
Epoch 115




Mean Training Loss: 0.0007471716060610417




Natural acc: 18.13
Robust acc: 15.76
-------------
Epoch 116




Mean Training Loss: 0.0007507180175928594




Natural acc: 18.28
Robust acc: 16.03
-------------
Epoch 117




Mean Training Loss: 0.000744418930196825




Natural acc: 18.11
Robust acc: 15.82
-------------
Epoch 118




Mean Training Loss: 0.0007422434901782905




Natural acc: 19.38
Robust acc: 16.42
-------------
Epoch 119




Mean Training Loss: 0.0007458230744942527




Natural acc: 18.17
Robust acc: 15.68
-------------
Epoch 120




Mean Training Loss: 0.0007447410006280941




Natural acc: 18.49
Robust acc: 15.88
-------------
Epoch 121




Mean Training Loss: 0.0007483016384992977




Natural acc: 18.79
Robust acc: 16.11
-------------
Epoch 122




Mean Training Loss: 0.0007566484091791522




Natural acc: 18.23
Robust acc: 15.95
-------------
Epoch 123




Mean Training Loss: 0.0007448500390593296




Natural acc: 18.22
Robust acc: 15.54
-------------
Epoch 124




Mean Training Loss: 0.0007452569877231003




Natural acc: 18.43
Robust acc: 16.02
-------------
Epoch 125




Mean Training Loss: 0.0007504618901144856




Natural acc: 18.14
Robust acc: 15.75
-------------
Epoch 126




Mean Training Loss: 0.0007464506983747492




Natural acc: 18.22
Robust acc: 15.87
-------------
Epoch 127




Mean Training Loss: 0.0007452512800972194




Natural acc: 18.93
Robust acc: 16.22
-------------
Epoch 128




Mean Training Loss: 0.0007426501321725433




Natural acc: 18.33
Robust acc: 15.87
-------------
Epoch 129




Mean Training Loss: 0.0007461428856941611




Natural acc: 18.05
Robust acc: 15.5
-------------
Epoch 130




Mean Training Loss: 0.0007439918144394065




Natural acc: 18.79
Robust acc: 16.0
-------------
Epoch 131




Mean Training Loss: 0.0007446340194878304




Natural acc: 18.45
Robust acc: 15.89
-------------
Epoch 132




Mean Training Loss: 0.0007455139648695679




Natural acc: 18.5
Robust acc: 15.98
-------------
Epoch 133




Mean Training Loss: 0.0007331414546286378




Natural acc: 18.7
Robust acc: 15.96
-------------
Epoch 134




Mean Training Loss: 0.0007381561940805058




Natural acc: 18.71
Robust acc: 16.05
-------------
Epoch 135




Mean Training Loss: 0.0007469006498460955




Natural acc: 18.89
Robust acc: 16.07
-------------
Epoch 136




Mean Training Loss: 0.0007488437978279255




Natural acc: 18.64
Robust acc: 16.03
-------------
Epoch 137


0.0007267424953170121:   6% 23/391 [00:03<01:03,  5.80it/s]