# Boundary Guided Adversarial Training (BGAT)

In [1]:
import os
import sys
sys.path.append('..')
import yaml
import shutil
import argparse
import numpy as np
from tqdm import tqdm
from collections import OrderedDict

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader, random_split

from utils import *

## Parameter setting

In [2]:
gpu = '4,5,6,7'
dataset = 'cifar10'
model_type = 'wrn34-10'
checkpoint = './checkpoint/bgat/%s/%s' % (model_type, dataset)
num_classes = 10
lr = 0.1
momentum = 0.9
weight_decay = 2e-4
batch_size = 128
total_epochs = 100
lam = 1
epsilon = 0.031
alpha = 0.007
num_repeats = 10

## Inner maximization

In [3]:
def inner_max(model, xent, inputs, targets, epsilon, alpha, num_repeats):
    noise = torch.FloatTensor(inputs.shape).uniform_(-epsilon, epsilon).cuda()
    x = torch.clamp(inputs + noise, min=0, max=1)
    
    for _ in range(num_repeats):
        x.requires_grad_()
        logits = model(x)
        loss = xent(logits, targets)
        loss.backward()
        grads = x.grad.data
        x = x.detach() + alpha*torch.sign(grads).detach()
        x = torch.min(torch.max(x, inputs-epsilon), inputs+epsilon).clamp(min=0, max=1)
    return x

## Training (Outer minimization)

In [4]:
def standard_training(epoch, model, dataloader, optimizer):
    model.train()
    total = 0
    total_loss = 0
    total_correct = 0
    xent = nn.CrossEntropyLoss()
    
    for idx, (inputs, targets) in enumerate(dataloader):
        inputs, targets = inputs.cuda(), targets.cuda()
        batch = inputs.size(0)
        logits = model(inputs)
        loss = xent(logits, targets)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total += batch
        total_loss += loss.item()
        num_correct = logits.softmax(dim=1).argmax(dim=1).eq(targets).sum().item()
        total_correct += num_correct
        
        if idx % 100 == 0:
            print('Epoch %d [%d/%d] | loss: %.4f (avg: %.4f) | acc: %.4f (avg: %.4f) |'\
                  % (epoch, idx, len(dataloader), loss.item(), 
                     total_loss/len(dataloader), 
                     num_correct/batch, total_correct/total))

In [5]:
def training(epoch, model_nat, model_rob, dataloader, optimizer, num_classes, 
             lam=1, epsilon=8/255, alpha=2/255, num_repeats=10):
    model_nat.eval()
    model_rob.train()
    total = 0
    total_loss = 0
    total_correct = 0
        
    mse = nn.MSELoss()
    xent = nn.CrossEntropyLoss()
    for idx, (inputs, targets) in enumerate(dataloader):
        inputs, targets = inputs.cuda(), targets.cuda()
        batch = inputs.size(0)
        
        with torch.no_grad():
            logits_nat = model_nat(inputs)
        x = inner_max(model_rob, xent, inputs, targets, epsilon, alpha, num_repeats)
        logits_rob = model_rob(x)
        
        loss = mse(logits_rob, logits_nat)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
            
        total += batch
        total_loss += loss.item()
        num_correct = torch.argmax(logits_rob.data, dim=1).eq(targets.data).cpu().sum()
        total_correct += num_correct
        
        if idx % 100 == 0:
            print('Epoch %d [%d/%d] | loss: %.4f (avg: %.4f) | acc nat: %.4f (avg: %.4f) |'\
                  % (epoch, idx, len(dataloader), loss.item(), 
                     total_loss/len(dataloader), 
                     num_correct/batch, total_correct/total))

In [6]:
def evaluation(epoch, model, dataloader, alpha, epsilon, num_repeats, use_robust=False):
    model.eval()
    total_correct_nat = 0
    total_correct_adv = 0
    
    xent = nn.CrossEntropyLoss()
    with tqdm(dataloader) as pbar:
        for index, (inputs, targets) in enumerate(pbar):
            inputs, targets = inputs.cuda(), targets.cuda()
            batch = inputs.size(0)
            pbar.set_description("[eval] %d/%d" % (index+1, len(dataloader.dataset)))
            if use_robust:
                with torch.enable_grad():
                    x = inner_max(model, xent, inputs, targets, epsilon, alpha, num_repeats)
            
                with torch.no_grad():
                    logits_nat = model(inputs)
                    logits_adv = model(x)
                total_correct_nat += torch.argmax(logits_nat.data, dim=1).eq(targets.data).cpu().sum().item()
                total_correct_adv += torch.argmax(logits_adv.data, dim=1).eq(targets.data).cpu().sum().item()
                pbar.set_postfix(OrderedDict(acc_nat=total_correct_nat / len(dataloader.dataset),
                                             acc_rob=total_correct_adv / len(dataloader.dataset)))
            else:
                with torch.no_grad():
                    logits_nat = model(inputs)
        
                total_correct_nat += torch.argmax(logits_nat.data, dim=1).eq(targets.data).cpu().sum().item()
                pbar.set_postfix(OrderedDict(acc_nat=total_correct_nat / len(dataloader.dataset)))
    
        return (total_correct_nat / len(dataloader.dataset)), (total_correct_adv / len(dataloader.dataset))


In [None]:
os.environ['CUDA_VISIBLE_DEVICES'] = gpu
os.makedirs(checkpoint, exist_ok=True)

train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor()])
train_dataset, _ = get_dataloader(dataset, batch_size)
num_samples = len(train_dataset)
num_samples_for_train = int(num_samples * 0.98)
num_samples_for_valid = num_samples - num_samples_for_train
train_set, valid_set = random_split(train_dataset, [num_samples_for_train, num_samples_for_valid])
train_dataloader = DataLoader(train_set, batch_size=batch_size, shuffle=True, drop_last=False)
valid_dataloader = DataLoader(valid_set, batch_size=1, shuffle=True, drop_last=False)

model = nn.DataParallel(get_network(model_type, num_classes).cuda())
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay)

scheduler = [75, 90]
adjust_learning_rate = lr_scheduler.MultiStepLR(optimizer, scheduler, gamma=0.1)
best_acc = 0
# Training natural model
for epoch in range(total_epochs):
    standard_training(epoch, model, train_dataloader, optimizer)
    test_acc_nat, _ = evaluation(epoch, model, valid_dataloader, alpha, epsilon, num_repeats, use_robust=False)
    is_best = best_acc < test_acc_nat
    best_acc = max(best_acc, test_acc_nat)
    save_checkpoint = {'state_dict': model.state_dict(),
                       'best_acc': best_acc,
                       'optimizer': optimizer.state_dict(),
                       'model_type': model_type,
                       'dataset': dataset}
    torch.save(save_checkpoint, os.path.join(checkpoint, 'model_nat'))
    if is_best:
        torch.save(save_checkpoint, os.path.join(checkpoint, 'best_model_nat'))
    adjust_learning_rate.step()
    
print('Start BGAT.')
model_nat = nn.DataParallel(get_network(model_type, num_classes).cuda())
state_dict_nat = torch.load(os.path.join(checkpoint, 'best_model_nat'))['state_dict']
model_nat.load_state_dict(state_dict_nat)
model_rob = nn.DataParallel(get_network(model_type, num_classes).cuda())
optimizer_rob = optim.SGD(model_rob.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay)
adjust_learning_rate = lr_scheduler.MultiStepLR(optimizer_rob, scheduler, gamma=0.1)

best_acc_nat = 0
best_acc_rob = 0
# Training robust model using natural model
for epoch in range(total_epochs):
    training(epoch, model_nat, model_rob, train_dataloader, optimizer_rob, num_classes, lam, epsilon, alpha, num_repeats)
    test_acc_nat, test_acc_rob = evaluation(epoch, model_rob, valid_dataloader, alpha, epsilon, num_repeats, use_robust=True)
    is_best = best_acc_nat < test_acc_nat and best_acc_rob < test_acc_rob
    best_acc_nat = max(best_acc_nat, test_acc_nat)
    best_acc_rob = max(best_acc_rob, test_acc_rob)
    save_checkpoint = {'state_dict_rob': model_rob.state_dict(),
                       'best_acc_rob': best_acc_rob,
                       'optimizer': optimizer_rob.state_dict(),
                       'model_type': model_type,
                       'dataset': dataset}
    torch.save(save_checkpoint, os.path.join(checkpoint, 'model_rob'))
    if is_best:
        torch.save(save_checkpoint, os.path.join(checkpoint, 'best_model_rob'))
    adjust_learning_rate.step()

Files already downloaded and verified
Files already downloaded and verified
Epoch 0 [0/383] | loss: 2.3289 (avg: 0.0061) | acc: 0.0859 (avg: 0.0859) |
Epoch 0 [100/383] | loss: 1.7107 (avg: 0.5210) | acc: 0.3125 (avg: 0.2710) |
Epoch 0 [200/383] | loss: 1.6129 (avg: 0.9442) | acc: 0.4062 (avg: 0.3336) |
Epoch 0 [300/383] | loss: 1.3646 (avg: 1.3225) | acc: 0.5078 (avg: 0.3776) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:20<00:00, 47.67it/s, acc_nat=0.412]


Epoch 1 [0/383] | loss: 1.3437 (avg: 0.0035) | acc: 0.5156 (avg: 0.5156) |
Epoch 1 [100/383] | loss: 1.0840 (avg: 0.3121) | acc: 0.5781 (avg: 0.5723) |
Epoch 1 [200/383] | loss: 1.0257 (avg: 0.5955) | acc: 0.6641 (avg: 0.5923) |
Epoch 1 [300/383] | loss: 1.2346 (avg: 0.8613) | acc: 0.5703 (avg: 0.6077) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:20<00:00, 47.81it/s, acc_nat=0.64]


Epoch 2 [0/383] | loss: 0.9155 (avg: 0.0024) | acc: 0.6562 (avg: 0.6562) |
Epoch 2 [100/383] | loss: 0.7442 (avg: 0.2353) | acc: 0.7031 (avg: 0.6863) |
Epoch 2 [200/383] | loss: 0.9118 (avg: 0.4479) | acc: 0.6484 (avg: 0.7017) |
Epoch 2 [300/383] | loss: 0.7649 (avg: 0.6489) | acc: 0.7422 (avg: 0.7114) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:20<00:00, 48.04it/s, acc_nat=0.68]


Epoch 3 [0/383] | loss: 0.6657 (avg: 0.0017) | acc: 0.7969 (avg: 0.7969) |
Epoch 3 [100/383] | loss: 0.5432 (avg: 0.1782) | acc: 0.7891 (avg: 0.7678) |
Epoch 3 [200/383] | loss: 0.4971 (avg: 0.3478) | acc: 0.8125 (avg: 0.7705) |
Epoch 3 [300/383] | loss: 0.5838 (avg: 0.5117) | acc: 0.7891 (avg: 0.7760) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:21<00:00, 46.53it/s, acc_nat=0.765]


Epoch 4 [0/383] | loss: 0.4672 (avg: 0.0012) | acc: 0.8672 (avg: 0.8672) |
Epoch 4 [100/383] | loss: 0.5602 (avg: 0.1490) | acc: 0.8125 (avg: 0.8051) |
Epoch 4 [200/383] | loss: 0.5838 (avg: 0.2966) | acc: 0.7734 (avg: 0.8069) |
Epoch 4 [300/383] | loss: 0.4404 (avg: 0.4401) | acc: 0.8281 (avg: 0.8076) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:14<00:00, 68.67it/s, acc_nat=0.791]


Epoch 5 [0/383] | loss: 0.5662 (avg: 0.0015) | acc: 0.8359 (avg: 0.8359) |
Epoch 5 [100/383] | loss: 0.4014 (avg: 0.1290) | acc: 0.8672 (avg: 0.8274) |
Epoch 5 [200/383] | loss: 0.5170 (avg: 0.2569) | acc: 0.8438 (avg: 0.8288) |
Epoch 5 [300/383] | loss: 0.3460 (avg: 0.3799) | acc: 0.9141 (avg: 0.8321) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:14<00:00, 69.37it/s, acc_nat=0.786]


Epoch 6 [0/383] | loss: 0.3509 (avg: 0.0009) | acc: 0.8984 (avg: 0.8984) |
Epoch 6 [100/383] | loss: 0.3563 (avg: 0.1165) | acc: 0.8828 (avg: 0.8489) |
Epoch 6 [200/383] | loss: 0.4387 (avg: 0.2306) | acc: 0.8594 (avg: 0.8506) |
Epoch 6 [300/383] | loss: 0.4895 (avg: 0.3457) | acc: 0.8047 (avg: 0.8503) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:14<00:00, 71.10it/s, acc_nat=0.814]


Epoch 7 [0/383] | loss: 0.4592 (avg: 0.0012) | acc: 0.8359 (avg: 0.8359) |
Epoch 7 [100/383] | loss: 0.3552 (avg: 0.1049) | acc: 0.8828 (avg: 0.8636) |
Epoch 7 [200/383] | loss: 0.3498 (avg: 0.2128) | acc: 0.8672 (avg: 0.8610) |
Epoch 7 [300/383] | loss: 0.5029 (avg: 0.3214) | acc: 0.8438 (avg: 0.8604) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 71.83it/s, acc_nat=0.821]


Epoch 8 [0/383] | loss: 0.3774 (avg: 0.0010) | acc: 0.8438 (avg: 0.8438) |
Epoch 8 [100/383] | loss: 0.2933 (avg: 0.0964) | acc: 0.8750 (avg: 0.8737) |
Epoch 8 [200/383] | loss: 0.4221 (avg: 0.1962) | acc: 0.8281 (avg: 0.8706) |
Epoch 8 [300/383] | loss: 0.5171 (avg: 0.2935) | acc: 0.8438 (avg: 0.8717) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 71.53it/s, acc_nat=0.793]


Epoch 9 [0/383] | loss: 0.3333 (avg: 0.0009) | acc: 0.8906 (avg: 0.8906) |
Epoch 9 [100/383] | loss: 0.2873 (avg: 0.0906) | acc: 0.9219 (avg: 0.8832) |
Epoch 9 [200/383] | loss: 0.4340 (avg: 0.1830) | acc: 0.8594 (avg: 0.8799) |
Epoch 9 [300/383] | loss: 0.2817 (avg: 0.2757) | acc: 0.8906 (avg: 0.8798) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 73.19it/s, acc_nat=0.845]


Epoch 10 [0/383] | loss: 0.4115 (avg: 0.0011) | acc: 0.8750 (avg: 0.8750) |
Epoch 10 [100/383] | loss: 0.2846 (avg: 0.0826) | acc: 0.8906 (avg: 0.8943) |
Epoch 10 [200/383] | loss: 0.2899 (avg: 0.1702) | acc: 0.8672 (avg: 0.8897) |
Epoch 10 [300/383] | loss: 0.3657 (avg: 0.2557) | acc: 0.8750 (avg: 0.8890) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 71.46it/s, acc_nat=0.829]


Epoch 11 [0/383] | loss: 0.3798 (avg: 0.0010) | acc: 0.8906 (avg: 0.8906) |
Epoch 11 [100/383] | loss: 0.2951 (avg: 0.0774) | acc: 0.9062 (avg: 0.8988) |
Epoch 11 [200/383] | loss: 0.1807 (avg: 0.1567) | acc: 0.9375 (avg: 0.8978) |
Epoch 11 [300/383] | loss: 0.2766 (avg: 0.2370) | acc: 0.8984 (avg: 0.8974) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 72.64it/s, acc_nat=0.838]


Epoch 12 [0/383] | loss: 0.2501 (avg: 0.0007) | acc: 0.9219 (avg: 0.9219) |
Epoch 12 [100/383] | loss: 0.3034 (avg: 0.0744) | acc: 0.9219 (avg: 0.9023) |
Epoch 12 [200/383] | loss: 0.2790 (avg: 0.1481) | acc: 0.9141 (avg: 0.9029) |
Epoch 12 [300/383] | loss: 0.3418 (avg: 0.2268) | acc: 0.9062 (avg: 0.9002) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:14<00:00, 71.00it/s, acc_nat=0.86]


Epoch 13 [0/383] | loss: 0.1972 (avg: 0.0005) | acc: 0.9375 (avg: 0.9375) |
Epoch 13 [100/383] | loss: 0.1851 (avg: 0.0691) | acc: 0.9297 (avg: 0.9117) |
Epoch 13 [200/383] | loss: 0.2425 (avg: 0.1428) | acc: 0.9062 (avg: 0.9078) |
Epoch 13 [300/383] | loss: 0.3624 (avg: 0.2184) | acc: 0.8672 (avg: 0.9065) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:14<00:00, 70.41it/s, acc_nat=0.857]


Epoch 14 [0/383] | loss: 0.2344 (avg: 0.0006) | acc: 0.9219 (avg: 0.9219) |
Epoch 14 [100/383] | loss: 0.1997 (avg: 0.0664) | acc: 0.9453 (avg: 0.9137) |
Epoch 14 [200/383] | loss: 0.1681 (avg: 0.1361) | acc: 0.9688 (avg: 0.9108) |
Epoch 14 [300/383] | loss: 0.1556 (avg: 0.2070) | acc: 0.9453 (avg: 0.9086) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 71.78it/s, acc_nat=0.826]


Epoch 15 [0/383] | loss: 0.3331 (avg: 0.0009) | acc: 0.8750 (avg: 0.8750) |
Epoch 15 [100/383] | loss: 0.3484 (avg: 0.0653) | acc: 0.8828 (avg: 0.9162) |
Epoch 15 [200/383] | loss: 0.1280 (avg: 0.1323) | acc: 0.9453 (avg: 0.9141) |
Epoch 15 [300/383] | loss: 0.1969 (avg: 0.1997) | acc: 0.9297 (avg: 0.9127) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:14<00:00, 68.13it/s, acc_nat=0.853]


Epoch 16 [0/383] | loss: 0.2174 (avg: 0.0006) | acc: 0.8984 (avg: 0.8984) |
Epoch 16 [100/383] | loss: 0.1335 (avg: 0.0590) | acc: 0.9531 (avg: 0.9220) |
Epoch 16 [200/383] | loss: 0.2404 (avg: 0.1257) | acc: 0.9297 (avg: 0.9186) |
Epoch 16 [300/383] | loss: 0.2260 (avg: 0.1932) | acc: 0.9062 (avg: 0.9162) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 72.66it/s, acc_nat=0.851]


Epoch 17 [0/383] | loss: 0.2192 (avg: 0.0006) | acc: 0.9219 (avg: 0.9219) |
Epoch 17 [100/383] | loss: 0.2694 (avg: 0.0546) | acc: 0.9219 (avg: 0.9313) |
Epoch 17 [200/383] | loss: 0.1763 (avg: 0.1148) | acc: 0.9297 (avg: 0.9249) |
Epoch 17 [300/383] | loss: 0.2790 (avg: 0.1784) | acc: 0.8828 (avg: 0.9218) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 71.99it/s, acc_nat=0.852]


Epoch 18 [0/383] | loss: 0.1238 (avg: 0.0003) | acc: 0.9766 (avg: 0.9766) |
Epoch 18 [100/383] | loss: 0.1412 (avg: 0.0598) | acc: 0.9609 (avg: 0.9237) |
Epoch 18 [200/383] | loss: 0.1591 (avg: 0.1186) | acc: 0.9453 (avg: 0.9248) |
Epoch 18 [300/383] | loss: 0.3602 (avg: 0.1772) | acc: 0.8750 (avg: 0.9245) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 72.57it/s, acc_nat=0.877]


Epoch 19 [0/383] | loss: 0.0974 (avg: 0.0003) | acc: 0.9609 (avg: 0.9609) |
Epoch 19 [100/383] | loss: 0.2171 (avg: 0.0521) | acc: 0.9062 (avg: 0.9321) |
Epoch 19 [200/383] | loss: 0.2544 (avg: 0.1122) | acc: 0.8906 (avg: 0.9259) |
Epoch 19 [300/383] | loss: 0.2044 (avg: 0.1724) | acc: 0.9453 (avg: 0.9238) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 71.97it/s, acc_nat=0.889]


Epoch 20 [0/383] | loss: 0.2352 (avg: 0.0006) | acc: 0.9219 (avg: 0.9219) |
Epoch 20 [100/383] | loss: 0.2970 (avg: 0.0548) | acc: 0.9219 (avg: 0.9276) |
Epoch 20 [200/383] | loss: 0.1391 (avg: 0.1122) | acc: 0.9688 (avg: 0.9253) |
Epoch 20 [300/383] | loss: 0.2146 (avg: 0.1705) | acc: 0.9297 (avg: 0.9238) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 73.33it/s, acc_nat=0.866]


Epoch 21 [0/383] | loss: 0.2255 (avg: 0.0006) | acc: 0.9375 (avg: 0.9375) |
Epoch 21 [100/383] | loss: 0.1393 (avg: 0.0525) | acc: 0.9453 (avg: 0.9330) |
Epoch 21 [200/383] | loss: 0.2326 (avg: 0.1090) | acc: 0.9297 (avg: 0.9294) |
Epoch 21 [300/383] | loss: 0.2197 (avg: 0.1654) | acc: 0.9219 (avg: 0.9288) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:14<00:00, 71.15it/s, acc_nat=0.867]


Epoch 22 [0/383] | loss: 0.2685 (avg: 0.0007) | acc: 0.8828 (avg: 0.8828) |
Epoch 22 [100/383] | loss: 0.2307 (avg: 0.0523) | acc: 0.9219 (avg: 0.9324) |
Epoch 22 [200/383] | loss: 0.1746 (avg: 0.1064) | acc: 0.9375 (avg: 0.9297) |
Epoch 22 [300/383] | loss: 0.1934 (avg: 0.1588) | acc: 0.9297 (avg: 0.9298) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 73.72it/s, acc_nat=0.854]


Epoch 23 [0/383] | loss: 0.1499 (avg: 0.0004) | acc: 0.9531 (avg: 0.9531) |
Epoch 23 [100/383] | loss: 0.2580 (avg: 0.0527) | acc: 0.9141 (avg: 0.9330) |
Epoch 23 [200/383] | loss: 0.1741 (avg: 0.1032) | acc: 0.9453 (avg: 0.9328) |
Epoch 23 [300/383] | loss: 0.2411 (avg: 0.1608) | acc: 0.9297 (avg: 0.9306) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 75.12it/s, acc_nat=0.849]


Epoch 24 [0/383] | loss: 0.1719 (avg: 0.0004) | acc: 0.9453 (avg: 0.9453) |
Epoch 24 [100/383] | loss: 0.1555 (avg: 0.0482) | acc: 0.9297 (avg: 0.9356) |
Epoch 24 [200/383] | loss: 0.2226 (avg: 0.1017) | acc: 0.9219 (avg: 0.9329) |
Epoch 24 [300/383] | loss: 0.1433 (avg: 0.1498) | acc: 0.9531 (avg: 0.9348) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 74.52it/s, acc_nat=0.846]


Epoch 25 [0/383] | loss: 0.1319 (avg: 0.0003) | acc: 0.9453 (avg: 0.9453) |
Epoch 25 [100/383] | loss: 0.1417 (avg: 0.0504) | acc: 0.9531 (avg: 0.9346) |
Epoch 25 [200/383] | loss: 0.1771 (avg: 0.1018) | acc: 0.9453 (avg: 0.9338) |
Epoch 25 [300/383] | loss: 0.1828 (avg: 0.1544) | acc: 0.9531 (avg: 0.9317) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 72.88it/s, acc_nat=0.838]


Epoch 26 [0/383] | loss: 0.1677 (avg: 0.0004) | acc: 0.9297 (avg: 0.9297) |
Epoch 26 [100/383] | loss: 0.1503 (avg: 0.0448) | acc: 0.9375 (avg: 0.9439) |
Epoch 26 [200/383] | loss: 0.1272 (avg: 0.0926) | acc: 0.9531 (avg: 0.9401) |
Epoch 26 [300/383] | loss: 0.1580 (avg: 0.1426) | acc: 0.9688 (avg: 0.9379) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 75.01it/s, acc_nat=0.859]


Epoch 27 [0/383] | loss: 0.0953 (avg: 0.0002) | acc: 0.9766 (avg: 0.9766) |
Epoch 27 [100/383] | loss: 0.1147 (avg: 0.0441) | acc: 0.9609 (avg: 0.9440) |
Epoch 27 [200/383] | loss: 0.2238 (avg: 0.0917) | acc: 0.9062 (avg: 0.9392) |
Epoch 27 [300/383] | loss: 0.1423 (avg: 0.1403) | acc: 0.9609 (avg: 0.9384) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:14<00:00, 69.60it/s, acc_nat=0.83]


Epoch 28 [0/383] | loss: 0.1238 (avg: 0.0003) | acc: 0.9688 (avg: 0.9688) |
Epoch 28 [100/383] | loss: 0.1731 (avg: 0.0455) | acc: 0.9453 (avg: 0.9410) |
Epoch 28 [200/383] | loss: 0.1173 (avg: 0.0915) | acc: 0.9453 (avg: 0.9398) |
Epoch 28 [300/383] | loss: 0.2671 (avg: 0.1426) | acc: 0.9141 (avg: 0.9378) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 71.99it/s, acc_nat=0.879]


Epoch 29 [0/383] | loss: 0.1808 (avg: 0.0005) | acc: 0.9297 (avg: 0.9297) |
Epoch 29 [100/383] | loss: 0.1742 (avg: 0.0426) | acc: 0.9531 (avg: 0.9447) |
Epoch 29 [200/383] | loss: 0.2085 (avg: 0.0856) | acc: 0.9141 (avg: 0.9447) |
Epoch 29 [300/383] | loss: 0.1340 (avg: 0.1372) | acc: 0.9531 (avg: 0.9406) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:14<00:00, 71.36it/s, acc_nat=0.852]


Epoch 30 [0/383] | loss: 0.1747 (avg: 0.0005) | acc: 0.9297 (avg: 0.9297) |
Epoch 30 [100/383] | loss: 0.2341 (avg: 0.0437) | acc: 0.9375 (avg: 0.9438) |
Epoch 30 [200/383] | loss: 0.1236 (avg: 0.0908) | acc: 0.9688 (avg: 0.9417) |
Epoch 30 [300/383] | loss: 0.1215 (avg: 0.1372) | acc: 0.9531 (avg: 0.9405) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 72.61it/s, acc_nat=0.869]


Epoch 31 [0/383] | loss: 0.2181 (avg: 0.0006) | acc: 0.9219 (avg: 0.9219) |
Epoch 31 [100/383] | loss: 0.1705 (avg: 0.0421) | acc: 0.9297 (avg: 0.9462) |
Epoch 31 [200/383] | loss: 0.2192 (avg: 0.0847) | acc: 0.9297 (avg: 0.9461) |
Epoch 31 [300/383] | loss: 0.1589 (avg: 0.1300) | acc: 0.9297 (avg: 0.9441) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:14<00:00, 67.16it/s, acc_nat=0.877]


Epoch 32 [0/383] | loss: 0.1716 (avg: 0.0004) | acc: 0.9453 (avg: 0.9453) |
Epoch 32 [100/383] | loss: 0.0660 (avg: 0.0402) | acc: 0.9766 (avg: 0.9482) |
Epoch 32 [200/383] | loss: 0.1341 (avg: 0.0856) | acc: 0.9688 (avg: 0.9447) |
Epoch 32 [300/383] | loss: 0.1894 (avg: 0.1295) | acc: 0.9297 (avg: 0.9439) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 71.59it/s, acc_nat=0.86]


Epoch 33 [0/383] | loss: 0.1617 (avg: 0.0004) | acc: 0.9375 (avg: 0.9375) |
Epoch 33 [100/383] | loss: 0.2314 (avg: 0.0436) | acc: 0.9062 (avg: 0.9416) |
Epoch 33 [200/383] | loss: 0.2111 (avg: 0.0882) | acc: 0.9219 (avg: 0.9417) |
Epoch 33 [300/383] | loss: 0.1668 (avg: 0.1340) | acc: 0.9453 (avg: 0.9414) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 71.45it/s, acc_nat=0.883]


Epoch 34 [0/383] | loss: 0.1007 (avg: 0.0003) | acc: 0.9688 (avg: 0.9688) |
Epoch 34 [100/383] | loss: 0.1438 (avg: 0.0407) | acc: 0.9609 (avg: 0.9463) |
Epoch 34 [200/383] | loss: 0.1770 (avg: 0.0807) | acc: 0.9297 (avg: 0.9468) |
Epoch 34 [300/383] | loss: 0.2346 (avg: 0.1280) | acc: 0.9219 (avg: 0.9428) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 73.46it/s, acc_nat=0.898]


Epoch 35 [0/383] | loss: 0.1802 (avg: 0.0005) | acc: 0.9297 (avg: 0.9297) |
Epoch 35 [100/383] | loss: 0.1000 (avg: 0.0419) | acc: 0.9688 (avg: 0.9455) |
Epoch 35 [200/383] | loss: 0.1519 (avg: 0.0808) | acc: 0.9531 (avg: 0.9476) |
Epoch 35 [300/383] | loss: 0.1167 (avg: 0.1279) | acc: 0.9609 (avg: 0.9446) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 72.77it/s, acc_nat=0.809]


Epoch 36 [0/383] | loss: 0.1418 (avg: 0.0004) | acc: 0.9609 (avg: 0.9609) |
Epoch 36 [100/383] | loss: 0.1813 (avg: 0.0412) | acc: 0.9375 (avg: 0.9460) |
Epoch 36 [200/383] | loss: 0.2038 (avg: 0.0824) | acc: 0.9375 (avg: 0.9459) |
Epoch 36 [300/383] | loss: 0.1607 (avg: 0.1244) | acc: 0.9375 (avg: 0.9454) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 73.09it/s, acc_nat=0.851]


Epoch 37 [0/383] | loss: 0.1263 (avg: 0.0003) | acc: 0.9688 (avg: 0.9688) |
Epoch 37 [100/383] | loss: 0.2429 (avg: 0.0387) | acc: 0.9062 (avg: 0.9492) |
Epoch 37 [200/383] | loss: 0.1694 (avg: 0.0817) | acc: 0.9375 (avg: 0.9455) |
Epoch 37 [300/383] | loss: 0.2416 (avg: 0.1260) | acc: 0.9219 (avg: 0.9441) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:14<00:00, 70.22it/s, acc_nat=0.856]


Epoch 38 [0/383] | loss: 0.1153 (avg: 0.0003) | acc: 0.9531 (avg: 0.9531) |
Epoch 38 [100/383] | loss: 0.1235 (avg: 0.0372) | acc: 0.9688 (avg: 0.9524) |
Epoch 38 [200/383] | loss: 0.0865 (avg: 0.0772) | acc: 0.9766 (avg: 0.9502) |
Epoch 38 [300/383] | loss: 0.1405 (avg: 0.1222) | acc: 0.9453 (avg: 0.9473) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 73.80it/s, acc_nat=0.878]


Epoch 39 [0/383] | loss: 0.1548 (avg: 0.0004) | acc: 0.9297 (avg: 0.9297) |
Epoch 39 [100/383] | loss: 0.0536 (avg: 0.0364) | acc: 0.9766 (avg: 0.9516) |
Epoch 39 [200/383] | loss: 0.2106 (avg: 0.0791) | acc: 0.9453 (avg: 0.9484) |
Epoch 39 [300/383] | loss: 0.1682 (avg: 0.1196) | acc: 0.9453 (avg: 0.9475) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 73.82it/s, acc_nat=0.836]


Epoch 40 [0/383] | loss: 0.1677 (avg: 0.0004) | acc: 0.9375 (avg: 0.9375) |
Epoch 40 [100/383] | loss: 0.1550 (avg: 0.0374) | acc: 0.9609 (avg: 0.9524) |
Epoch 40 [200/383] | loss: 0.2217 (avg: 0.0781) | acc: 0.9375 (avg: 0.9504) |
Epoch 40 [300/383] | loss: 0.1911 (avg: 0.1201) | acc: 0.9375 (avg: 0.9487) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 71.84it/s, acc_nat=0.846]


Epoch 41 [0/383] | loss: 0.1092 (avg: 0.0003) | acc: 0.9609 (avg: 0.9609) |
Epoch 41 [100/383] | loss: 0.1116 (avg: 0.0375) | acc: 0.9609 (avg: 0.9513) |
Epoch 41 [200/383] | loss: 0.1563 (avg: 0.0797) | acc: 0.9531 (avg: 0.9472) |
Epoch 41 [300/383] | loss: 0.1025 (avg: 0.1205) | acc: 0.9609 (avg: 0.9468) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:14<00:00, 71.38it/s, acc_nat=0.867]


Epoch 42 [0/383] | loss: 0.1072 (avg: 0.0003) | acc: 0.9609 (avg: 0.9609) |
Epoch 42 [100/383] | loss: 0.2211 (avg: 0.0367) | acc: 0.9297 (avg: 0.9541) |
Epoch 42 [200/383] | loss: 0.1066 (avg: 0.0776) | acc: 0.9531 (avg: 0.9506) |
Epoch 42 [300/383] | loss: 0.3819 (avg: 0.1199) | acc: 0.8906 (avg: 0.9478) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 72.38it/s, acc_nat=0.853]


Epoch 43 [0/383] | loss: 0.1133 (avg: 0.0003) | acc: 0.9453 (avg: 0.9453) |
Epoch 43 [100/383] | loss: 0.1172 (avg: 0.0369) | acc: 0.9688 (avg: 0.9513) |
Epoch 43 [200/383] | loss: 0.1960 (avg: 0.0737) | acc: 0.9375 (avg: 0.9507) |
Epoch 43 [300/383] | loss: 0.2352 (avg: 0.1142) | acc: 0.9297 (avg: 0.9494) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 72.47it/s, acc_nat=0.877]


Epoch 44 [0/383] | loss: 0.0925 (avg: 0.0002) | acc: 0.9766 (avg: 0.9766) |
Epoch 44 [100/383] | loss: 0.1516 (avg: 0.0400) | acc: 0.9297 (avg: 0.9475) |
Epoch 44 [200/383] | loss: 0.0704 (avg: 0.0771) | acc: 0.9766 (avg: 0.9497) |
Epoch 44 [300/383] | loss: 0.2002 (avg: 0.1158) | acc: 0.9219 (avg: 0.9490) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 73.01it/s, acc_nat=0.892]


Epoch 45 [0/383] | loss: 0.1870 (avg: 0.0005) | acc: 0.9375 (avg: 0.9375) |
Epoch 45 [100/383] | loss: 0.1932 (avg: 0.0349) | acc: 0.9297 (avg: 0.9555) |
Epoch 45 [200/383] | loss: 0.1782 (avg: 0.0749) | acc: 0.9297 (avg: 0.9509) |
Epoch 45 [300/383] | loss: 0.1149 (avg: 0.1178) | acc: 0.9688 (avg: 0.9481) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:14<00:00, 71.39it/s, acc_nat=0.867]


Epoch 46 [0/383] | loss: 0.1283 (avg: 0.0003) | acc: 0.9375 (avg: 0.9375) |
Epoch 46 [100/383] | loss: 0.1832 (avg: 0.0338) | acc: 0.9297 (avg: 0.9554) |
Epoch 46 [200/383] | loss: 0.1614 (avg: 0.0710) | acc: 0.9453 (avg: 0.9529) |
Epoch 46 [300/383] | loss: 0.1429 (avg: 0.1126) | acc: 0.9297 (avg: 0.9504) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:14<00:00, 69.03it/s, acc_nat=0.878]


Epoch 47 [0/383] | loss: 0.1915 (avg: 0.0005) | acc: 0.9531 (avg: 0.9531) |
Epoch 47 [100/383] | loss: 0.1132 (avg: 0.0333) | acc: 0.9609 (avg: 0.9585) |
Epoch 47 [200/383] | loss: 0.1838 (avg: 0.0716) | acc: 0.9297 (avg: 0.9546) |
Epoch 47 [300/383] | loss: 0.1391 (avg: 0.1090) | acc: 0.9375 (avg: 0.9534) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 72.04it/s, acc_nat=0.855]


Epoch 48 [0/383] | loss: 0.2300 (avg: 0.0006) | acc: 0.9297 (avg: 0.9297) |
Epoch 48 [100/383] | loss: 0.0803 (avg: 0.0340) | acc: 0.9766 (avg: 0.9566) |
Epoch 48 [200/383] | loss: 0.0766 (avg: 0.0711) | acc: 0.9922 (avg: 0.9531) |
Epoch 48 [300/383] | loss: 0.0918 (avg: 0.1100) | acc: 0.9766 (avg: 0.9518) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:14<00:00, 70.96it/s, acc_nat=0.865]


Epoch 49 [0/383] | loss: 0.2060 (avg: 0.0005) | acc: 0.9453 (avg: 0.9453) |
Epoch 49 [100/383] | loss: 0.1333 (avg: 0.0346) | acc: 0.9531 (avg: 0.9549) |
Epoch 49 [200/383] | loss: 0.1599 (avg: 0.0730) | acc: 0.9375 (avg: 0.9520) |
Epoch 49 [300/383] | loss: 0.1885 (avg: 0.1126) | acc: 0.9375 (avg: 0.9504) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 72.03it/s, acc_nat=0.884]


Epoch 50 [0/383] | loss: 0.1764 (avg: 0.0005) | acc: 0.9375 (avg: 0.9375) |
Epoch 50 [100/383] | loss: 0.1471 (avg: 0.0382) | acc: 0.9688 (avg: 0.9483) |
Epoch 50 [200/383] | loss: 0.1186 (avg: 0.0750) | acc: 0.9531 (avg: 0.9499) |
Epoch 50 [300/383] | loss: 0.1222 (avg: 0.1109) | acc: 0.9453 (avg: 0.9502) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:14<00:00, 69.06it/s, acc_nat=0.882]


Epoch 51 [0/383] | loss: 0.1593 (avg: 0.0004) | acc: 0.9453 (avg: 0.9453) |
Epoch 51 [100/383] | loss: 0.1327 (avg: 0.0361) | acc: 0.9531 (avg: 0.9537) |
Epoch 51 [200/383] | loss: 0.1184 (avg: 0.0713) | acc: 0.9609 (avg: 0.9536) |
Epoch 51 [300/383] | loss: 0.1902 (avg: 0.1111) | acc: 0.9453 (avg: 0.9523) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 72.18it/s, acc_nat=0.886]


Epoch 52 [0/383] | loss: 0.0741 (avg: 0.0002) | acc: 0.9922 (avg: 0.9922) |
Epoch 52 [100/383] | loss: 0.1182 (avg: 0.0355) | acc: 0.9609 (avg: 0.9565) |
Epoch 52 [200/383] | loss: 0.1673 (avg: 0.0705) | acc: 0.9375 (avg: 0.9547) |
Epoch 52 [300/383] | loss: 0.1971 (avg: 0.1070) | acc: 0.9297 (avg: 0.9538) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 73.79it/s, acc_nat=0.887]


Epoch 53 [0/383] | loss: 0.1807 (avg: 0.0005) | acc: 0.9531 (avg: 0.9531) |
Epoch 53 [100/383] | loss: 0.2095 (avg: 0.0343) | acc: 0.9297 (avg: 0.9575) |
Epoch 53 [200/383] | loss: 0.1289 (avg: 0.0674) | acc: 0.9688 (avg: 0.9575) |
Epoch 53 [300/383] | loss: 0.1330 (avg: 0.1027) | acc: 0.9766 (avg: 0.9563) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 71.56it/s, acc_nat=0.868]


Epoch 54 [0/383] | loss: 0.1851 (avg: 0.0005) | acc: 0.9375 (avg: 0.9375) |
Epoch 54 [100/383] | loss: 0.1276 (avg: 0.0332) | acc: 0.9609 (avg: 0.9579) |
Epoch 54 [200/383] | loss: 0.1469 (avg: 0.0732) | acc: 0.9609 (avg: 0.9526) |
Epoch 54 [300/383] | loss: 0.1742 (avg: 0.1116) | acc: 0.9375 (avg: 0.9515) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:14<00:00, 70.64it/s, acc_nat=0.866]


Epoch 55 [0/383] | loss: 0.1666 (avg: 0.0004) | acc: 0.9531 (avg: 0.9531) |
Epoch 55 [100/383] | loss: 0.0889 (avg: 0.0320) | acc: 0.9766 (avg: 0.9584) |
Epoch 55 [200/383] | loss: 0.0722 (avg: 0.0722) | acc: 0.9766 (avg: 0.9532) |
Epoch 55 [300/383] | loss: 0.1941 (avg: 0.1082) | acc: 0.9531 (avg: 0.9529) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 73.97it/s, acc_nat=0.882]


Epoch 56 [0/383] | loss: 0.1254 (avg: 0.0003) | acc: 0.9688 (avg: 0.9688) |
Epoch 56 [100/383] | loss: 0.1504 (avg: 0.0344) | acc: 0.9453 (avg: 0.9568) |
Epoch 56 [200/383] | loss: 0.0892 (avg: 0.0729) | acc: 0.9688 (avg: 0.9534) |
Epoch 56 [300/383] | loss: 0.1889 (avg: 0.1080) | acc: 0.9375 (avg: 0.9535) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:20<00:00, 48.58it/s, acc_nat=0.868]


Epoch 57 [0/383] | loss: 0.0951 (avg: 0.0002) | acc: 0.9609 (avg: 0.9609) |
Epoch 57 [100/383] | loss: 0.1333 (avg: 0.0341) | acc: 0.9531 (avg: 0.9547) |
Epoch 57 [200/383] | loss: 0.2171 (avg: 0.0665) | acc: 0.9297 (avg: 0.9566) |
Epoch 57 [300/383] | loss: 0.2111 (avg: 0.1018) | acc: 0.9141 (avg: 0.9556) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:20<00:00, 48.36it/s, acc_nat=0.891]


Epoch 58 [0/383] | loss: 0.0750 (avg: 0.0002) | acc: 0.9844 (avg: 0.9844) |
Epoch 58 [100/383] | loss: 0.1234 (avg: 0.0307) | acc: 0.9531 (avg: 0.9623) |
Epoch 58 [200/383] | loss: 0.1269 (avg: 0.0665) | acc: 0.9531 (avg: 0.9584) |
Epoch 58 [300/383] | loss: 0.1338 (avg: 0.1030) | acc: 0.9453 (avg: 0.9568) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:20<00:00, 47.64it/s, acc_nat=0.896]


Epoch 59 [0/383] | loss: 0.1441 (avg: 0.0004) | acc: 0.9531 (avg: 0.9531) |
Epoch 59 [100/383] | loss: 0.1129 (avg: 0.0285) | acc: 0.9688 (avg: 0.9624) |
Epoch 59 [200/383] | loss: 0.1489 (avg: 0.0643) | acc: 0.9609 (avg: 0.9576) |
Epoch 59 [300/383] | loss: 0.1703 (avg: 0.1040) | acc: 0.9219 (avg: 0.9546) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:20<00:00, 47.71it/s, acc_nat=0.883]


Epoch 60 [0/383] | loss: 0.0561 (avg: 0.0001) | acc: 0.9766 (avg: 0.9766) |
Epoch 60 [100/383] | loss: 0.0717 (avg: 0.0318) | acc: 0.9766 (avg: 0.9613) |
Epoch 60 [200/383] | loss: 0.1084 (avg: 0.0642) | acc: 0.9609 (avg: 0.9598) |
Epoch 60 [300/383] | loss: 0.1271 (avg: 0.1009) | acc: 0.9688 (avg: 0.9571) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:21<00:00, 46.57it/s, acc_nat=0.887]


Epoch 61 [0/383] | loss: 0.0547 (avg: 0.0001) | acc: 0.9688 (avg: 0.9688) |
Epoch 61 [100/383] | loss: 0.1009 (avg: 0.0323) | acc: 0.9609 (avg: 0.9580) |
Epoch 61 [200/383] | loss: 0.1421 (avg: 0.0681) | acc: 0.9688 (avg: 0.9561) |
Epoch 61 [300/383] | loss: 0.0980 (avg: 0.1057) | acc: 0.9531 (avg: 0.9537) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:20<00:00, 49.10it/s, acc_nat=0.892]


Epoch 62 [0/383] | loss: 0.1283 (avg: 0.0003) | acc: 0.9609 (avg: 0.9609) |
Epoch 62 [100/383] | loss: 0.0716 (avg: 0.0302) | acc: 0.9766 (avg: 0.9615) |
Epoch 62 [200/383] | loss: 0.1022 (avg: 0.0636) | acc: 0.9766 (avg: 0.9587) |
Epoch 62 [300/383] | loss: 0.2076 (avg: 0.1003) | acc: 0.9141 (avg: 0.9564) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:12<00:00, 77.66it/s, acc_nat=0.889]


Epoch 63 [0/383] | loss: 0.1432 (avg: 0.0004) | acc: 0.9531 (avg: 0.9531) |
Epoch 63 [100/383] | loss: 0.1398 (avg: 0.0325) | acc: 0.9453 (avg: 0.9586) |
Epoch 63 [200/383] | loss: 0.1815 (avg: 0.0671) | acc: 0.9297 (avg: 0.9564) |
Epoch 63 [300/383] | loss: 0.1383 (avg: 0.1036) | acc: 0.9375 (avg: 0.9549) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 76.51it/s, acc_nat=0.908]


Epoch 64 [0/383] | loss: 0.1168 (avg: 0.0003) | acc: 0.9375 (avg: 0.9375) |
Epoch 64 [100/383] | loss: 0.1332 (avg: 0.0311) | acc: 0.9609 (avg: 0.9593) |
Epoch 64 [200/383] | loss: 0.1084 (avg: 0.0673) | acc: 0.9688 (avg: 0.9558) |
Epoch 64 [300/383] | loss: 0.1024 (avg: 0.1038) | acc: 0.9688 (avg: 0.9544) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 76.32it/s, acc_nat=0.872]


Epoch 65 [0/383] | loss: 0.1089 (avg: 0.0003) | acc: 0.9688 (avg: 0.9688) |
Epoch 65 [100/383] | loss: 0.1434 (avg: 0.0319) | acc: 0.9609 (avg: 0.9604) |
Epoch 65 [200/383] | loss: 0.1993 (avg: 0.0631) | acc: 0.9219 (avg: 0.9598) |
Epoch 65 [300/383] | loss: 0.1685 (avg: 0.0987) | acc: 0.9297 (avg: 0.9575) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 76.44it/s, acc_nat=0.89]


Epoch 66 [0/383] | loss: 0.0985 (avg: 0.0003) | acc: 0.9688 (avg: 0.9688) |
Epoch 66 [100/383] | loss: 0.1661 (avg: 0.0317) | acc: 0.9375 (avg: 0.9599) |
Epoch 66 [200/383] | loss: 0.1116 (avg: 0.0664) | acc: 0.9609 (avg: 0.9565) |
Epoch 66 [300/383] | loss: 0.1103 (avg: 0.1023) | acc: 0.9531 (avg: 0.9556) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:14<00:00, 70.10it/s, acc_nat=0.878]


Epoch 67 [0/383] | loss: 0.0423 (avg: 0.0001) | acc: 0.9922 (avg: 0.9922) |
Epoch 67 [100/383] | loss: 0.0915 (avg: 0.0307) | acc: 0.9844 (avg: 0.9609) |
Epoch 67 [200/383] | loss: 0.2032 (avg: 0.0659) | acc: 0.9219 (avg: 0.9573) |
Epoch 67 [300/383] | loss: 0.1350 (avg: 0.1022) | acc: 0.9453 (avg: 0.9559) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:13<00:00, 76.17it/s, acc_nat=0.852]


Epoch 68 [0/383] | loss: 0.1946 (avg: 0.0005) | acc: 0.9375 (avg: 0.9375) |
Epoch 68 [100/383] | loss: 0.2069 (avg: 0.0329) | acc: 0.9531 (avg: 0.9575) |
Epoch 68 [200/383] | loss: 0.2016 (avg: 0.0697) | acc: 0.9219 (avg: 0.9551) |
Epoch 68 [300/383] | loss: 0.1598 (avg: 0.1044) | acc: 0.9297 (avg: 0.9547) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:14<00:00, 67.82it/s, acc_nat=0.865]


Epoch 69 [0/383] | loss: 0.0988 (avg: 0.0003) | acc: 0.9531 (avg: 0.9531) |
Epoch 69 [100/383] | loss: 0.1368 (avg: 0.0312) | acc: 0.9453 (avg: 0.9596) |
Epoch 69 [200/383] | loss: 0.0947 (avg: 0.0647) | acc: 0.9766 (avg: 0.9583) |
Epoch 69 [300/383] | loss: 0.0967 (avg: 0.1013) | acc: 0.9766 (avg: 0.9561) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:20<00:00, 47.97it/s, acc_nat=0.884]


Epoch 70 [0/383] | loss: 0.0902 (avg: 0.0002) | acc: 0.9844 (avg: 0.9844) |
Epoch 70 [100/383] | loss: 0.0763 (avg: 0.0267) | acc: 0.9766 (avg: 0.9658) |
Epoch 70 [200/383] | loss: 0.2321 (avg: 0.0601) | acc: 0.9219 (avg: 0.9613) |
Epoch 70 [300/383] | loss: 0.1890 (avg: 0.0961) | acc: 0.9453 (avg: 0.9584) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:20<00:00, 48.25it/s, acc_nat=0.895]


Epoch 71 [0/383] | loss: 0.1110 (avg: 0.0003) | acc: 0.9688 (avg: 0.9688) |
Epoch 71 [100/383] | loss: 0.0977 (avg: 0.0321) | acc: 0.9766 (avg: 0.9594) |
Epoch 71 [200/383] | loss: 0.1357 (avg: 0.0643) | acc: 0.9453 (avg: 0.9584) |
Epoch 71 [300/383] | loss: 0.2312 (avg: 0.0992) | acc: 0.9531 (avg: 0.9569) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:20<00:00, 48.51it/s, acc_nat=0.87]


Epoch 72 [0/383] | loss: 0.1328 (avg: 0.0003) | acc: 0.9531 (avg: 0.9531) |
Epoch 72 [100/383] | loss: 0.0854 (avg: 0.0315) | acc: 0.9844 (avg: 0.9606) |
Epoch 72 [200/383] | loss: 0.1318 (avg: 0.0633) | acc: 0.9453 (avg: 0.9586) |
Epoch 72 [300/383] | loss: 0.1363 (avg: 0.0984) | acc: 0.9453 (avg: 0.9571) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:20<00:00, 48.32it/s, acc_nat=0.889]


Epoch 73 [0/383] | loss: 0.1293 (avg: 0.0003) | acc: 0.9531 (avg: 0.9531) |
Epoch 73 [100/383] | loss: 0.1222 (avg: 0.0314) | acc: 0.9531 (avg: 0.9599) |
Epoch 73 [200/383] | loss: 0.1065 (avg: 0.0658) | acc: 0.9688 (avg: 0.9575) |
Epoch 73 [300/383] | loss: 0.2002 (avg: 0.0977) | acc: 0.9297 (avg: 0.9577) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:20<00:00, 48.29it/s, acc_nat=0.877]


Epoch 74 [0/383] | loss: 0.0876 (avg: 0.0002) | acc: 0.9688 (avg: 0.9688) |
Epoch 74 [100/383] | loss: 0.0766 (avg: 0.0268) | acc: 0.9688 (avg: 0.9647) |
Epoch 74 [200/383] | loss: 0.1678 (avg: 0.0611) | acc: 0.9297 (avg: 0.9597) |
Epoch 74 [300/383] | loss: 0.1064 (avg: 0.0955) | acc: 0.9766 (avg: 0.9578) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:20<00:00, 47.91it/s, acc_nat=0.907]


Epoch 75 [0/383] | loss: 0.1487 (avg: 0.0004) | acc: 0.9453 (avg: 0.9453) |
Epoch 75 [100/383] | loss: 0.0185 (avg: 0.0194) | acc: 1.0000 (avg: 0.9758) |
Epoch 75 [200/383] | loss: 0.0228 (avg: 0.0302) | acc: 0.9922 (avg: 0.9813) |
Epoch 75 [300/383] | loss: 0.0279 (avg: 0.0392) | acc: 0.9922 (avg: 0.9841) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:19<00:00, 51.81it/s, acc_nat=0.935]


Epoch 76 [0/383] | loss: 0.0214 (avg: 0.0001) | acc: 1.0000 (avg: 1.0000) |
Epoch 76 [100/383] | loss: 0.0531 (avg: 0.0060) | acc: 0.9844 (avg: 0.9938) |
Epoch 76 [200/383] | loss: 0.0115 (avg: 0.0112) | acc: 1.0000 (avg: 0.9940) |
Epoch 76 [300/383] | loss: 0.0158 (avg: 0.0161) | acc: 0.9922 (avg: 0.9942) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:19<00:00, 51.11it/s, acc_nat=0.955]


Epoch 77 [0/383] | loss: 0.0189 (avg: 0.0000) | acc: 1.0000 (avg: 1.0000) |
Epoch 77 [100/383] | loss: 0.0306 (avg: 0.0041) | acc: 0.9922 (avg: 0.9954) |
Epoch 77 [200/383] | loss: 0.0449 (avg: 0.0081) | acc: 0.9844 (avg: 0.9954) |
Epoch 77 [300/383] | loss: 0.0057 (avg: 0.0116) | acc: 1.0000 (avg: 0.9957) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:20<00:00, 48.10it/s, acc_nat=0.946]


Epoch 78 [0/383] | loss: 0.0108 (avg: 0.0000) | acc: 1.0000 (avg: 1.0000) |
Epoch 78 [100/383] | loss: 0.0062 (avg: 0.0033) | acc: 1.0000 (avg: 0.9966) |
Epoch 78 [200/383] | loss: 0.0040 (avg: 0.0063) | acc: 1.0000 (avg: 0.9967) |
Epoch 78 [300/383] | loss: 0.0030 (avg: 0.0091) | acc: 1.0000 (avg: 0.9970) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:20<00:00, 48.50it/s, acc_nat=0.951]


Epoch 79 [0/383] | loss: 0.0033 (avg: 0.0000) | acc: 1.0000 (avg: 1.0000) |
Epoch 79 [100/383] | loss: 0.0021 (avg: 0.0023) | acc: 1.0000 (avg: 0.9977) |
Epoch 79 [200/383] | loss: 0.0120 (avg: 0.0047) | acc: 0.9922 (avg: 0.9976) |
Epoch 79 [300/383] | loss: 0.0038 (avg: 0.0072) | acc: 1.0000 (avg: 0.9977) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:21<00:00, 46.42it/s, acc_nat=0.952]


Epoch 80 [0/383] | loss: 0.0266 (avg: 0.0001) | acc: 0.9922 (avg: 0.9922) |
Epoch 80 [100/383] | loss: 0.0073 (avg: 0.0021) | acc: 0.9922 (avg: 0.9984) |
Epoch 80 [200/383] | loss: 0.0104 (avg: 0.0041) | acc: 1.0000 (avg: 0.9983) |
Epoch 80 [300/383] | loss: 0.0108 (avg: 0.0064) | acc: 0.9922 (avg: 0.9983) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:20<00:00, 47.79it/s, acc_nat=0.952]


Epoch 81 [0/383] | loss: 0.0006 (avg: 0.0000) | acc: 1.0000 (avg: 1.0000) |
Epoch 81 [100/383] | loss: 0.0138 (avg: 0.0016) | acc: 0.9922 (avg: 0.9986) |
Epoch 81 [200/383] | loss: 0.0059 (avg: 0.0035) | acc: 1.0000 (avg: 0.9983) |
Epoch 81 [300/383] | loss: 0.0031 (avg: 0.0052) | acc: 1.0000 (avg: 0.9984) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:20<00:00, 47.90it/s, acc_nat=0.948]


Epoch 82 [0/383] | loss: 0.0163 (avg: 0.0000) | acc: 1.0000 (avg: 1.0000) |
Epoch 82 [100/383] | loss: 0.0017 (avg: 0.0016) | acc: 1.0000 (avg: 0.9989) |
Epoch 82 [200/383] | loss: 0.0298 (avg: 0.0031) | acc: 0.9922 (avg: 0.9989) |
Epoch 82 [300/383] | loss: 0.0036 (avg: 0.0046) | acc: 1.0000 (avg: 0.9989) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:20<00:00, 47.79it/s, acc_nat=0.947]


Epoch 83 [0/383] | loss: 0.0177 (avg: 0.0000) | acc: 0.9922 (avg: 0.9922) |
Epoch 83 [100/383] | loss: 0.0035 (avg: 0.0013) | acc: 1.0000 (avg: 0.9985) |
Epoch 83 [200/383] | loss: 0.0043 (avg: 0.0027) | acc: 1.0000 (avg: 0.9986) |
Epoch 83 [300/383] | loss: 0.0031 (avg: 0.0040) | acc: 1.0000 (avg: 0.9987) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:19<00:00, 51.82it/s, acc_nat=0.946]


Epoch 84 [0/383] | loss: 0.0033 (avg: 0.0000) | acc: 1.0000 (avg: 1.0000) |
Epoch 84 [100/383] | loss: 0.0182 (avg: 0.0012) | acc: 0.9922 (avg: 0.9990) |
Epoch 84 [200/383] | loss: 0.0065 (avg: 0.0024) | acc: 1.0000 (avg: 0.9991) |
Epoch 84 [300/383] | loss: 0.0039 (avg: 0.0037) | acc: 1.0000 (avg: 0.9990) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:19<00:00, 51.82it/s, acc_nat=0.949]


Epoch 85 [0/383] | loss: 0.0030 (avg: 0.0000) | acc: 1.0000 (avg: 1.0000) |
Epoch 85 [100/383] | loss: 0.0011 (avg: 0.0011) | acc: 1.0000 (avg: 0.9989) |
Epoch 85 [200/383] | loss: 0.0079 (avg: 0.0024) | acc: 1.0000 (avg: 0.9987) |
Epoch 85 [300/383] | loss: 0.0031 (avg: 0.0035) | acc: 1.0000 (avg: 0.9988) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:19<00:00, 52.28it/s, acc_nat=0.943]


Epoch 86 [0/383] | loss: 0.0054 (avg: 0.0000) | acc: 1.0000 (avg: 1.0000) |
Epoch 86 [100/383] | loss: 0.0027 (avg: 0.0010) | acc: 1.0000 (avg: 0.9995) |
Epoch 86 [200/383] | loss: 0.0066 (avg: 0.0022) | acc: 1.0000 (avg: 0.9992) |
Epoch 86 [300/383] | loss: 0.0012 (avg: 0.0033) | acc: 1.0000 (avg: 0.9993) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:20<00:00, 47.64it/s, acc_nat=0.942]


Epoch 87 [0/383] | loss: 0.0012 (avg: 0.0000) | acc: 1.0000 (avg: 1.0000) |
Epoch 87 [100/383] | loss: 0.0011 (avg: 0.0008) | acc: 1.0000 (avg: 0.9997) |
Epoch 87 [200/383] | loss: 0.0021 (avg: 0.0018) | acc: 1.0000 (avg: 0.9994) |
Epoch 87 [300/383] | loss: 0.0054 (avg: 0.0028) | acc: 1.0000 (avg: 0.9992) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:21<00:00, 46.94it/s, acc_nat=0.947]


Epoch 88 [0/383] | loss: 0.0028 (avg: 0.0000) | acc: 1.0000 (avg: 1.0000) |
Epoch 88 [100/383] | loss: 0.0011 (avg: 0.0009) | acc: 1.0000 (avg: 0.9994) |
Epoch 88 [200/383] | loss: 0.0027 (avg: 0.0018) | acc: 1.0000 (avg: 0.9994) |
Epoch 88 [300/383] | loss: 0.0008 (avg: 0.0028) | acc: 1.0000 (avg: 0.9994) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:21<00:00, 47.20it/s, acc_nat=0.944]


Epoch 89 [0/383] | loss: 0.0027 (avg: 0.0000) | acc: 1.0000 (avg: 1.0000) |
Epoch 89 [100/383] | loss: 0.0010 (avg: 0.0006) | acc: 1.0000 (avg: 0.9999) |
Epoch 89 [200/383] | loss: 0.0015 (avg: 0.0014) | acc: 1.0000 (avg: 0.9996) |
Epoch 89 [300/383] | loss: 0.0012 (avg: 0.0021) | acc: 1.0000 (avg: 0.9996) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:21<00:00, 46.91it/s, acc_nat=0.946]


Epoch 90 [0/383] | loss: 0.0009 (avg: 0.0000) | acc: 1.0000 (avg: 1.0000) |
Epoch 90 [100/383] | loss: 0.0024 (avg: 0.0007) | acc: 1.0000 (avg: 0.9998) |
Epoch 90 [200/383] | loss: 0.0010 (avg: 0.0014) | acc: 1.0000 (avg: 0.9998) |
Epoch 90 [300/383] | loss: 0.0016 (avg: 0.0021) | acc: 1.0000 (avg: 0.9998) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:21<00:00, 46.95it/s, acc_nat=0.939]


Epoch 91 [0/383] | loss: 0.0014 (avg: 0.0000) | acc: 1.0000 (avg: 1.0000) |
Epoch 91 [100/383] | loss: 0.0056 (avg: 0.0007) | acc: 1.0000 (avg: 0.9996) |
Epoch 91 [200/383] | loss: 0.0038 (avg: 0.0016) | acc: 1.0000 (avg: 0.9995) |
Epoch 91 [300/383] | loss: 0.0026 (avg: 0.0021) | acc: 1.0000 (avg: 0.9996) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:19<00:00, 51.36it/s, acc_nat=0.946]


Epoch 92 [0/383] | loss: 0.0022 (avg: 0.0000) | acc: 1.0000 (avg: 1.0000) |
Epoch 92 [100/383] | loss: 0.0021 (avg: 0.0008) | acc: 1.0000 (avg: 0.9993) |
Epoch 92 [200/383] | loss: 0.0013 (avg: 0.0014) | acc: 1.0000 (avg: 0.9996) |
Epoch 92 [300/383] | loss: 0.0189 (avg: 0.0021) | acc: 0.9922 (avg: 0.9995) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:20<00:00, 49.88it/s, acc_nat=0.954]


Epoch 93 [0/383] | loss: 0.0008 (avg: 0.0000) | acc: 1.0000 (avg: 1.0000) |
Epoch 93 [100/383] | loss: 0.0055 (avg: 0.0006) | acc: 1.0000 (avg: 0.9998) |
Epoch 93 [200/383] | loss: 0.0020 (avg: 0.0012) | acc: 1.0000 (avg: 0.9998) |
Epoch 93 [300/383] | loss: 0.0022 (avg: 0.0018) | acc: 1.0000 (avg: 0.9998) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:19<00:00, 50.74it/s, acc_nat=0.951]


Epoch 94 [0/383] | loss: 0.0009 (avg: 0.0000) | acc: 1.0000 (avg: 1.0000) |
Epoch 94 [100/383] | loss: 0.0019 (avg: 0.0006) | acc: 1.0000 (avg: 0.9996) |
Epoch 94 [200/383] | loss: 0.0095 (avg: 0.0013) | acc: 0.9922 (avg: 0.9996) |
Epoch 94 [300/383] | loss: 0.0012 (avg: 0.0018) | acc: 1.0000 (avg: 0.9997) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:21<00:00, 47.48it/s, acc_nat=0.942]


Epoch 95 [0/383] | loss: 0.0074 (avg: 0.0000) | acc: 1.0000 (avg: 1.0000) |
Epoch 95 [100/383] | loss: 0.0053 (avg: 0.0009) | acc: 1.0000 (avg: 0.9993) |
Epoch 95 [200/383] | loss: 0.0008 (avg: 0.0014) | acc: 1.0000 (avg: 0.9996) |
Epoch 95 [300/383] | loss: 0.0011 (avg: 0.0020) | acc: 1.0000 (avg: 0.9996) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:21<00:00, 47.33it/s, acc_nat=0.951]


Epoch 96 [0/383] | loss: 0.0004 (avg: 0.0000) | acc: 1.0000 (avg: 1.0000) |
Epoch 96 [100/383] | loss: 0.0011 (avg: 0.0005) | acc: 1.0000 (avg: 0.9998) |
Epoch 96 [200/383] | loss: 0.0086 (avg: 0.0011) | acc: 0.9922 (avg: 0.9997) |
Epoch 96 [300/383] | loss: 0.0008 (avg: 0.0016) | acc: 1.0000 (avg: 0.9998) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:20<00:00, 48.07it/s, acc_nat=0.949]


Epoch 97 [0/383] | loss: 0.0018 (avg: 0.0000) | acc: 1.0000 (avg: 1.0000) |
Epoch 97 [100/383] | loss: 0.0038 (avg: 0.0005) | acc: 1.0000 (avg: 0.9998) |
Epoch 97 [200/383] | loss: 0.0038 (avg: 0.0012) | acc: 1.0000 (avg: 0.9997) |
Epoch 97 [300/383] | loss: 0.0010 (avg: 0.0018) | acc: 1.0000 (avg: 0.9997) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:21<00:00, 47.06it/s, acc_nat=0.935]


Epoch 98 [0/383] | loss: 0.0013 (avg: 0.0000) | acc: 1.0000 (avg: 1.0000) |
Epoch 98 [100/383] | loss: 0.0009 (avg: 0.0005) | acc: 1.0000 (avg: 0.9998) |
Epoch 98 [200/383] | loss: 0.0016 (avg: 0.0010) | acc: 1.0000 (avg: 0.9997) |
Epoch 98 [300/383] | loss: 0.0059 (avg: 0.0018) | acc: 1.0000 (avg: 0.9996) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:21<00:00, 47.37it/s, acc_nat=0.953]


Epoch 99 [0/383] | loss: 0.0008 (avg: 0.0000) | acc: 1.0000 (avg: 1.0000) |
Epoch 99 [100/383] | loss: 0.0010 (avg: 0.0006) | acc: 1.0000 (avg: 0.9996) |
Epoch 99 [200/383] | loss: 0.0018 (avg: 0.0011) | acc: 1.0000 (avg: 0.9997) |
Epoch 99 [300/383] | loss: 0.0020 (avg: 0.0016) | acc: 1.0000 (avg: 0.9997) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [00:20<00:00, 49.96it/s, acc_nat=0.947]


Epoch 0 [0/383] | loss: 18.4221 (avg: 0.0481) | acc nat: 0.0000 (avg: 0.0000) |
Epoch 0 [100/383] | loss: 14.3831 (avg: 4.0562) | acc nat: 0.1562 (avg: 0.1514) |
Epoch 0 [200/383] | loss: 14.9888 (avg: 7.8071) | acc nat: 0.1719 (avg: 0.1756) |
Epoch 0 [300/383] | loss: 13.7805 (avg: 11.4513) | acc nat: 0.2031 (avg: 0.1920) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:59<00:00,  3.34it/s, acc_nat=0.37, acc_rob=0.245]


Epoch 1 [0/383] | loss: 13.4739 (avg: 0.0352) | acc nat: 0.2500 (avg: 0.2500) |
Epoch 1 [100/383] | loss: 13.2785 (avg: 3.5642) | acc nat: 0.2031 (avg: 0.2350) |
Epoch 1 [200/383] | loss: 14.1346 (avg: 7.0393) | acc nat: 0.1562 (avg: 0.2397) |
Epoch 1 [300/383] | loss: 13.4007 (avg: 10.4738) | acc nat: 0.2812 (avg: 0.2464) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [05:00<00:00,  3.33it/s, acc_nat=0.431, acc_rob=0.267]


Epoch 2 [0/383] | loss: 12.5835 (avg: 0.0329) | acc nat: 0.3281 (avg: 0.3281) |
Epoch 2 [100/383] | loss: 11.9160 (avg: 3.3453) | acc nat: 0.2734 (avg: 0.2712) |
Epoch 2 [200/383] | loss: 11.9754 (avg: 6.6279) | acc nat: 0.3125 (avg: 0.2743) |
Epoch 2 [300/383] | loss: 11.7610 (avg: 9.8552) | acc nat: 0.2812 (avg: 0.2784) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [05:00<00:00,  3.33it/s, acc_nat=0.507, acc_rob=0.308]


Epoch 3 [0/383] | loss: 11.1429 (avg: 0.0291) | acc nat: 0.3906 (avg: 0.3906) |
Epoch 3 [100/383] | loss: 12.1944 (avg: 3.1684) | acc nat: 0.3438 (avg: 0.2973) |
Epoch 3 [200/383] | loss: 12.1194 (avg: 6.2588) | acc nat: 0.2266 (avg: 0.3011) |
Epoch 3 [300/383] | loss: 12.0372 (avg: 9.2924) | acc nat: 0.3203 (avg: 0.3061) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [05:00<00:00,  3.33it/s, acc_nat=0.568, acc_rob=0.336]


Epoch 4 [0/383] | loss: 12.3594 (avg: 0.0323) | acc nat: 0.2578 (avg: 0.2578) |
Epoch 4 [100/383] | loss: 10.9725 (avg: 3.0014) | acc nat: 0.3047 (avg: 0.3227) |
Epoch 4 [200/383] | loss: 10.8699 (avg: 5.9077) | acc nat: 0.3750 (avg: 0.3317) |
Epoch 4 [300/383] | loss: 10.8909 (avg: 8.8167) | acc nat: 0.2969 (avg: 0.3356) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [05:00<00:00,  3.33it/s, acc_nat=0.577, acc_rob=0.321]


Epoch 5 [0/383] | loss: 11.2886 (avg: 0.0295) | acc nat: 0.3594 (avg: 0.3594) |
Epoch 5 [100/383] | loss: 11.0039 (avg: 2.8501) | acc nat: 0.3281 (avg: 0.3547) |
Epoch 5 [200/383] | loss: 10.5309 (avg: 5.6270) | acc nat: 0.3672 (avg: 0.3613) |
Epoch 5 [300/383] | loss: 10.4981 (avg: 8.3738) | acc nat: 0.3828 (avg: 0.3643) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:59<00:00,  3.34it/s, acc_nat=0.632, acc_rob=0.33]


Epoch 6 [0/383] | loss: 10.5095 (avg: 0.0274) | acc nat: 0.3828 (avg: 0.3828) |
Epoch 6 [100/383] | loss: 10.2859 (avg: 2.7310) | acc nat: 0.4062 (avg: 0.3831) |
Epoch 6 [200/383] | loss: 10.0637 (avg: 5.3806) | acc nat: 0.4297 (avg: 0.3839) |
Epoch 6 [300/383] | loss: 9.4844 (avg: 8.0005) | acc nat: 0.3828 (avg: 0.3861) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:59<00:00,  3.33it/s, acc_nat=0.667, acc_rob=0.385]


Epoch 7 [0/383] | loss: 10.4174 (avg: 0.0272) | acc nat: 0.3516 (avg: 0.3516) |
Epoch 7 [100/383] | loss: 9.3354 (avg: 2.6203) | acc nat: 0.4062 (avg: 0.4011) |
Epoch 7 [200/383] | loss: 10.3361 (avg: 5.1864) | acc nat: 0.4688 (avg: 0.4047) |
Epoch 7 [300/383] | loss: 9.3211 (avg: 7.7553) | acc nat: 0.4062 (avg: 0.4051) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [05:00<00:00,  3.33it/s, acc_nat=0.723, acc_rob=0.406]


Epoch 8 [0/383] | loss: 9.9008 (avg: 0.0259) | acc nat: 0.3672 (avg: 0.3672) |
Epoch 8 [100/383] | loss: 9.5094 (avg: 2.5182) | acc nat: 0.3828 (avg: 0.4160) |
Epoch 8 [200/383] | loss: 8.7460 (avg: 5.0222) | acc nat: 0.5000 (avg: 0.4214) |
Epoch 8 [300/383] | loss: 10.1459 (avg: 7.4942) | acc nat: 0.3984 (avg: 0.4244) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [05:00<00:00,  3.33it/s, acc_nat=0.704, acc_rob=0.396]


Epoch 9 [0/383] | loss: 9.5439 (avg: 0.0249) | acc nat: 0.3906 (avg: 0.3906) |
Epoch 9 [100/383] | loss: 9.5970 (avg: 2.4508) | acc nat: 0.4453 (avg: 0.4375) |
Epoch 9 [200/383] | loss: 8.8623 (avg: 4.8799) | acc nat: 0.4844 (avg: 0.4352) |
Epoch 9 [300/383] | loss: 8.9586 (avg: 7.2817) | acc nat: 0.3984 (avg: 0.4359) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:59<00:00,  3.34it/s, acc_nat=0.721, acc_rob=0.39]


Epoch 10 [0/383] | loss: 9.1185 (avg: 0.0238) | acc nat: 0.3516 (avg: 0.3516) |
Epoch 10 [100/383] | loss: 8.9712 (avg: 2.4062) | acc nat: 0.4531 (avg: 0.4457) |
Epoch 10 [200/383] | loss: 8.9895 (avg: 4.7825) | acc nat: 0.4688 (avg: 0.4419) |
Epoch 10 [300/383] | loss: 9.2745 (avg: 7.1036) | acc nat: 0.4141 (avg: 0.4457) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:59<00:00,  3.34it/s, acc_nat=0.737, acc_rob=0.41]


Epoch 11 [0/383] | loss: 9.4433 (avg: 0.0247) | acc nat: 0.4219 (avg: 0.4219) |
Epoch 11 [100/383] | loss: 8.2934 (avg: 2.3211) | acc nat: 0.5156 (avg: 0.4609) |
Epoch 11 [200/383] | loss: 8.6861 (avg: 4.6171) | acc nat: 0.4531 (avg: 0.4605) |
Epoch 11 [300/383] | loss: 8.7785 (avg: 6.9105) | acc nat: 0.4531 (avg: 0.4595) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:57<00:00,  3.36it/s, acc_nat=0.754, acc_rob=0.431]


Epoch 12 [0/383] | loss: 8.7223 (avg: 0.0228) | acc nat: 0.5312 (avg: 0.5312) |
Epoch 12 [100/383] | loss: 8.3968 (avg: 2.2640) | acc nat: 0.4766 (avg: 0.4737) |
Epoch 12 [200/383] | loss: 8.2979 (avg: 4.4643) | acc nat: 0.5156 (avg: 0.4739) |
Epoch 12 [300/383] | loss: 8.7392 (avg: 6.7102) | acc nat: 0.3906 (avg: 0.4717) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:54<00:00,  3.40it/s, acc_nat=0.759, acc_rob=0.418]


Epoch 13 [0/383] | loss: 8.5839 (avg: 0.0224) | acc nat: 0.4219 (avg: 0.4219) |
Epoch 13 [100/383] | loss: 8.4393 (avg: 2.2207) | acc nat: 0.4453 (avg: 0.4736) |
Epoch 13 [200/383] | loss: 7.6259 (avg: 4.3977) | acc nat: 0.5078 (avg: 0.4827) |
Epoch 13 [300/383] | loss: 8.4620 (avg: 6.6032) | acc nat: 0.4531 (avg: 0.4795) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:51<00:00,  3.43it/s, acc_nat=0.764, acc_rob=0.446]


Epoch 14 [0/383] | loss: 8.4769 (avg: 0.0221) | acc nat: 0.4688 (avg: 0.4688) |
Epoch 14 [100/383] | loss: 7.2945 (avg: 2.2045) | acc nat: 0.5547 (avg: 0.4851) |
Epoch 14 [200/383] | loss: 7.8346 (avg: 4.3327) | acc nat: 0.4531 (avg: 0.4883) |
Epoch 14 [300/383] | loss: 7.5194 (avg: 6.4952) | acc nat: 0.5625 (avg: 0.4886) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:49<00:00,  3.46it/s, acc_nat=0.786, acc_rob=0.467]


Epoch 15 [0/383] | loss: 7.8247 (avg: 0.0204) | acc nat: 0.5312 (avg: 0.5312) |
Epoch 15 [100/383] | loss: 8.2579 (avg: 2.1127) | acc nat: 0.4922 (avg: 0.4990) |
Epoch 15 [200/383] | loss: 8.3655 (avg: 4.2276) | acc nat: 0.5078 (avg: 0.4977) |
Epoch 15 [300/383] | loss: 8.6912 (avg: 6.3406) | acc nat: 0.5000 (avg: 0.4985) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:44<00:00,  3.51it/s, acc_nat=0.785, acc_rob=0.474]


Epoch 16 [0/383] | loss: 7.6164 (avg: 0.0199) | acc nat: 0.5234 (avg: 0.5234) |
Epoch 16 [100/383] | loss: 7.4526 (avg: 2.0714) | acc nat: 0.5234 (avg: 0.5162) |
Epoch 16 [200/383] | loss: 7.9863 (avg: 4.1477) | acc nat: 0.4844 (avg: 0.5094) |
Epoch 16 [300/383] | loss: 9.1826 (avg: 6.2380) | acc nat: 0.4922 (avg: 0.5064) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:43<00:00,  3.53it/s, acc_nat=0.794, acc_rob=0.453]


Epoch 17 [0/383] | loss: 7.2127 (avg: 0.0188) | acc nat: 0.5156 (avg: 0.5156) |
Epoch 17 [100/383] | loss: 8.2055 (avg: 2.0417) | acc nat: 0.4844 (avg: 0.5118) |
Epoch 17 [200/383] | loss: 7.6945 (avg: 4.1137) | acc nat: 0.5234 (avg: 0.5085) |
Epoch 17 [300/383] | loss: 7.4057 (avg: 6.1859) | acc nat: 0.5625 (avg: 0.5095) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:41<00:00,  3.56it/s, acc_nat=0.785, acc_rob=0.45]


Epoch 18 [0/383] | loss: 8.1897 (avg: 0.0214) | acc nat: 0.4922 (avg: 0.4922) |
Epoch 18 [100/383] | loss: 6.5707 (avg: 1.9936) | acc nat: 0.5234 (avg: 0.5254) |
Epoch 18 [200/383] | loss: 7.6400 (avg: 3.9959) | acc nat: 0.4766 (avg: 0.5244) |
Epoch 18 [300/383] | loss: 7.9413 (avg: 6.0352) | acc nat: 0.5000 (avg: 0.5197) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:37<00:00,  3.60it/s, acc_nat=0.796, acc_rob=0.465]


Epoch 19 [0/383] | loss: 6.9790 (avg: 0.0182) | acc nat: 0.5391 (avg: 0.5391) |
Epoch 19 [100/383] | loss: 7.3611 (avg: 1.9712) | acc nat: 0.5078 (avg: 0.5289) |
Epoch 19 [200/383] | loss: 7.8010 (avg: 3.9533) | acc nat: 0.5391 (avg: 0.5285) |
Epoch 19 [300/383] | loss: 7.2560 (avg: 5.9472) | acc nat: 0.6172 (avg: 0.5273) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:37<00:00,  3.60it/s, acc_nat=0.788, acc_rob=0.464]


Epoch 20 [0/383] | loss: 7.5286 (avg: 0.0197) | acc nat: 0.4922 (avg: 0.4922) |
Epoch 20 [100/383] | loss: 7.8780 (avg: 1.9644) | acc nat: 0.5234 (avg: 0.5358) |
Epoch 20 [200/383] | loss: 6.9842 (avg: 3.9077) | acc nat: 0.5703 (avg: 0.5358) |
Epoch 20 [300/383] | loss: 7.9723 (avg: 5.8605) | acc nat: 0.4453 (avg: 0.5346) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:41<00:00,  3.55it/s, acc_nat=0.791, acc_rob=0.445]


Epoch 21 [0/383] | loss: 7.2950 (avg: 0.0190) | acc nat: 0.5781 (avg: 0.5781) |
Epoch 21 [100/383] | loss: 7.0928 (avg: 1.9180) | acc nat: 0.5625 (avg: 0.5475) |
Epoch 21 [200/383] | loss: 7.5057 (avg: 3.8383) | acc nat: 0.5156 (avg: 0.5424) |
Epoch 21 [300/383] | loss: 7.6474 (avg: 5.7829) | acc nat: 0.5469 (avg: 0.5382) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:44<00:00,  3.51it/s, acc_nat=0.797, acc_rob=0.443]


Epoch 22 [0/383] | loss: 8.0958 (avg: 0.0211) | acc nat: 0.5234 (avg: 0.5234) |
Epoch 22 [100/383] | loss: 6.7350 (avg: 1.8946) | acc nat: 0.6172 (avg: 0.5495) |
Epoch 22 [200/383] | loss: 8.1110 (avg: 3.8184) | acc nat: 0.5391 (avg: 0.5424) |
Epoch 22 [300/383] | loss: 7.3240 (avg: 5.7610) | acc nat: 0.5703 (avg: 0.5417) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:46<00:00,  3.49it/s, acc_nat=0.783, acc_rob=0.47]


Epoch 23 [0/383] | loss: 6.6990 (avg: 0.0175) | acc nat: 0.5781 (avg: 0.5781) |
Epoch 23 [100/383] | loss: 7.3445 (avg: 1.8656) | acc nat: 0.5234 (avg: 0.5547) |
Epoch 23 [200/383] | loss: 7.0866 (avg: 3.7294) | acc nat: 0.5156 (avg: 0.5512) |
Epoch 23 [300/383] | loss: 6.0636 (avg: 5.6265) | acc nat: 0.6172 (avg: 0.5501) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:49<00:00,  3.46it/s, acc_nat=0.808, acc_rob=0.452]


Epoch 24 [0/383] | loss: 7.4234 (avg: 0.0194) | acc nat: 0.5000 (avg: 0.5000) |
Epoch 24 [100/383] | loss: 7.3527 (avg: 1.8554) | acc nat: 0.5078 (avg: 0.5540) |
Epoch 24 [200/383] | loss: 6.8188 (avg: 3.7134) | acc nat: 0.5938 (avg: 0.5522) |
Epoch 24 [300/383] | loss: 6.8282 (avg: 5.5608) | acc nat: 0.6094 (avg: 0.5532) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:52<00:00,  3.42it/s, acc_nat=0.809, acc_rob=0.481]


Epoch 25 [0/383] | loss: 7.3287 (avg: 0.0191) | acc nat: 0.5391 (avg: 0.5391) |
Epoch 25 [100/383] | loss: 7.7608 (avg: 1.8312) | acc nat: 0.5391 (avg: 0.5592) |
Epoch 25 [200/383] | loss: 6.2172 (avg: 3.6610) | acc nat: 0.5859 (avg: 0.5587) |
Epoch 25 [300/383] | loss: 8.5789 (avg: 5.5483) | acc nat: 0.4922 (avg: 0.5560) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:56<00:00,  3.37it/s, acc_nat=0.817, acc_rob=0.47]


Epoch 26 [0/383] | loss: 7.1708 (avg: 0.0187) | acc nat: 0.5469 (avg: 0.5469) |
Epoch 26 [100/383] | loss: 6.6880 (avg: 1.7900) | acc nat: 0.5703 (avg: 0.5753) |
Epoch 26 [200/383] | loss: 6.7432 (avg: 3.6132) | acc nat: 0.5781 (avg: 0.5664) |
Epoch 26 [300/383] | loss: 7.2753 (avg: 5.4601) | acc nat: 0.5859 (avg: 0.5635) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:58<00:00,  3.35it/s, acc_nat=0.811, acc_rob=0.466]


Epoch 27 [0/383] | loss: 6.2806 (avg: 0.0164) | acc nat: 0.6094 (avg: 0.6094) |
Epoch 27 [100/383] | loss: 7.1592 (avg: 1.7501) | acc nat: 0.5547 (avg: 0.5773) |
Epoch 27 [200/383] | loss: 6.8842 (avg: 3.5717) | acc nat: 0.5859 (avg: 0.5684) |
Epoch 27 [300/383] | loss: 6.3693 (avg: 5.4013) | acc nat: 0.6328 (avg: 0.5654) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:59<00:00,  3.34it/s, acc_nat=0.808, acc_rob=0.489]


Epoch 28 [0/383] | loss: 6.5067 (avg: 0.0170) | acc nat: 0.6172 (avg: 0.6172) |
Epoch 28 [100/383] | loss: 7.2299 (avg: 1.7640) | acc nat: 0.5312 (avg: 0.5728) |
Epoch 28 [200/383] | loss: 6.5043 (avg: 3.5475) | acc nat: 0.5469 (avg: 0.5707) |
Epoch 28 [300/383] | loss: 6.5792 (avg: 5.3470) | acc nat: 0.5859 (avg: 0.5675) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [05:00<00:00,  3.33it/s, acc_nat=0.817, acc_rob=0.467]


Epoch 29 [0/383] | loss: 6.4828 (avg: 0.0169) | acc nat: 0.6172 (avg: 0.6172) |
Epoch 29 [100/383] | loss: 6.8438 (avg: 1.7692) | acc nat: 0.5625 (avg: 0.5776) |
Epoch 29 [200/383] | loss: 6.8567 (avg: 3.4956) | acc nat: 0.5547 (avg: 0.5769) |
Epoch 29 [300/383] | loss: 7.4718 (avg: 5.2779) | acc nat: 0.5391 (avg: 0.5741) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:59<00:00,  3.33it/s, acc_nat=0.798, acc_rob=0.459]


Epoch 30 [0/383] | loss: 7.0082 (avg: 0.0183) | acc nat: 0.5859 (avg: 0.5859) |
Epoch 30 [100/383] | loss: 6.6932 (avg: 1.7174) | acc nat: 0.5703 (avg: 0.5822) |
Epoch 30 [200/383] | loss: 6.6982 (avg: 3.4598) | acc nat: 0.5625 (avg: 0.5791) |
Epoch 30 [300/383] | loss: 6.4453 (avg: 5.2221) | acc nat: 0.5938 (avg: 0.5778) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [05:00<00:00,  3.33it/s, acc_nat=0.797, acc_rob=0.466]


Epoch 31 [0/383] | loss: 6.4709 (avg: 0.0169) | acc nat: 0.6250 (avg: 0.6250) |
Epoch 31 [100/383] | loss: 6.3283 (avg: 1.7135) | acc nat: 0.6328 (avg: 0.5853) |
Epoch 31 [200/383] | loss: 6.4973 (avg: 3.4432) | acc nat: 0.5781 (avg: 0.5809) |
Epoch 31 [300/383] | loss: 7.2624 (avg: 5.1897) | acc nat: 0.5234 (avg: 0.5814) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:59<00:00,  3.34it/s, acc_nat=0.815, acc_rob=0.474]


Epoch 32 [0/383] | loss: 6.8432 (avg: 0.0179) | acc nat: 0.5938 (avg: 0.5938) |
Epoch 32 [100/383] | loss: 6.5405 (avg: 1.6850) | acc nat: 0.5547 (avg: 0.5938) |
Epoch 32 [200/383] | loss: 6.7516 (avg: 3.3877) | acc nat: 0.5781 (avg: 0.5881) |
Epoch 32 [300/383] | loss: 6.4339 (avg: 5.1212) | acc nat: 0.5625 (avg: 0.5838) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:59<00:00,  3.34it/s, acc_nat=0.813, acc_rob=0.487]


Epoch 33 [0/383] | loss: 6.0813 (avg: 0.0159) | acc nat: 0.6094 (avg: 0.6094) |
Epoch 33 [100/383] | loss: 6.0520 (avg: 1.6947) | acc nat: 0.6172 (avg: 0.5860) |
Epoch 33 [200/383] | loss: 6.5667 (avg: 3.3926) | acc nat: 0.6094 (avg: 0.5842) |
Epoch 33 [300/383] | loss: 6.0634 (avg: 5.1012) | acc nat: 0.6016 (avg: 0.5859) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:59<00:00,  3.34it/s, acc_nat=0.829, acc_rob=0.483]


Epoch 34 [0/383] | loss: 6.3155 (avg: 0.0165) | acc nat: 0.5703 (avg: 0.5703) |
Epoch 34 [100/383] | loss: 6.7011 (avg: 1.6464) | acc nat: 0.5859 (avg: 0.6014) |
Epoch 34 [200/383] | loss: 7.0571 (avg: 3.3324) | acc nat: 0.5312 (avg: 0.5933) |
Epoch 34 [300/383] | loss: 6.5015 (avg: 5.0489) | acc nat: 0.5859 (avg: 0.5913) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:59<00:00,  3.33it/s, acc_nat=0.809, acc_rob=0.462]


Epoch 35 [0/383] | loss: 4.8318 (avg: 0.0126) | acc nat: 0.6797 (avg: 0.6797) |
Epoch 35 [100/383] | loss: 6.4496 (avg: 1.6348) | acc nat: 0.5781 (avg: 0.6125) |
Epoch 35 [200/383] | loss: 7.3587 (avg: 3.2701) | acc nat: 0.5234 (avg: 0.6048) |
Epoch 35 [300/383] | loss: 6.9818 (avg: 4.9583) | acc nat: 0.5391 (avg: 0.5983) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [05:00<00:00,  3.33it/s, acc_nat=0.815, acc_rob=0.488]


Epoch 36 [0/383] | loss: 6.0519 (avg: 0.0158) | acc nat: 0.6641 (avg: 0.6641) |
Epoch 36 [100/383] | loss: 5.4453 (avg: 1.6117) | acc nat: 0.6797 (avg: 0.6097) |
Epoch 36 [200/383] | loss: 5.9253 (avg: 3.2458) | acc nat: 0.6250 (avg: 0.6070) |
Epoch 36 [300/383] | loss: 5.6676 (avg: 4.9117) | acc nat: 0.6172 (avg: 0.6016) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:59<00:00,  3.34it/s, acc_nat=0.833, acc_rob=0.498]


Epoch 37 [0/383] | loss: 6.2794 (avg: 0.0164) | acc nat: 0.6328 (avg: 0.6328) |
Epoch 37 [100/383] | loss: 6.0402 (avg: 1.6089) | acc nat: 0.6016 (avg: 0.6119) |
Epoch 37 [200/383] | loss: 5.9825 (avg: 3.2518) | acc nat: 0.6172 (avg: 0.6058) |
Epoch 37 [300/383] | loss: 7.1218 (avg: 4.9029) | acc nat: 0.6094 (avg: 0.6023) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:59<00:00,  3.34it/s, acc_nat=0.829, acc_rob=0.468]


Epoch 38 [0/383] | loss: 5.5947 (avg: 0.0146) | acc nat: 0.6250 (avg: 0.6250) |
Epoch 38 [100/383] | loss: 5.7924 (avg: 1.6071) | acc nat: 0.6172 (avg: 0.6120) |
Epoch 38 [200/383] | loss: 5.7827 (avg: 3.2512) | acc nat: 0.6406 (avg: 0.6059) |
Epoch 38 [300/383] | loss: 6.1981 (avg: 4.9254) | acc nat: 0.6562 (avg: 0.6023) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:57<00:00,  3.37it/s, acc_nat=0.823, acc_rob=0.497]


Epoch 39 [0/383] | loss: 6.0328 (avg: 0.0158) | acc nat: 0.6328 (avg: 0.6328) |
Epoch 39 [100/383] | loss: 6.3338 (avg: 1.5706) | acc nat: 0.5938 (avg: 0.6172) |
Epoch 39 [200/383] | loss: 6.8124 (avg: 3.1801) | acc nat: 0.6016 (avg: 0.6147) |
Epoch 39 [300/383] | loss: 6.4205 (avg: 4.7995) | acc nat: 0.5625 (avg: 0.6100) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:53<00:00,  3.40it/s, acc_nat=0.828, acc_rob=0.473]


Epoch 40 [0/383] | loss: 5.7441 (avg: 0.0150) | acc nat: 0.6250 (avg: 0.6250) |
Epoch 40 [100/383] | loss: 6.3266 (avg: 1.5659) | acc nat: 0.5469 (avg: 0.6205) |
Epoch 40 [200/383] | loss: 6.2367 (avg: 3.1782) | acc nat: 0.5234 (avg: 0.6117) |
Epoch 40 [300/383] | loss: 6.6527 (avg: 4.8278) | acc nat: 0.5469 (avg: 0.6074) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:51<00:00,  3.44it/s, acc_nat=0.83, acc_rob=0.499]


Epoch 41 [0/383] | loss: 6.2634 (avg: 0.0164) | acc nat: 0.6250 (avg: 0.6250) |
Epoch 41 [100/383] | loss: 6.3807 (avg: 1.5578) | acc nat: 0.5859 (avg: 0.6226) |
Epoch 41 [200/383] | loss: 6.5646 (avg: 3.1570) | acc nat: 0.5703 (avg: 0.6154) |
Epoch 41 [300/383] | loss: 6.3309 (avg: 4.7727) | acc nat: 0.5938 (avg: 0.6123) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:48<00:00,  3.47it/s, acc_nat=0.844, acc_rob=0.482]


Epoch 42 [0/383] | loss: 5.1812 (avg: 0.0135) | acc nat: 0.6172 (avg: 0.6172) |
Epoch 42 [100/383] | loss: 5.9139 (avg: 1.5871) | acc nat: 0.6094 (avg: 0.6144) |
Epoch 42 [200/383] | loss: 6.9454 (avg: 3.1923) | acc nat: 0.5547 (avg: 0.6099) |
Epoch 42 [300/383] | loss: 6.5317 (avg: 4.7917) | acc nat: 0.6250 (avg: 0.6095) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:44<00:00,  3.51it/s, acc_nat=0.833, acc_rob=0.477]


Epoch 43 [0/383] | loss: 6.4741 (avg: 0.0169) | acc nat: 0.6016 (avg: 0.6016) |
Epoch 43 [100/383] | loss: 6.0638 (avg: 1.5365) | acc nat: 0.5859 (avg: 0.6269) |
Epoch 43 [200/383] | loss: 6.3012 (avg: 3.0969) | acc nat: 0.5625 (avg: 0.6231) |
Epoch 43 [300/383] | loss: 6.6145 (avg: 4.6873) | acc nat: 0.5312 (avg: 0.6189) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:42<00:00,  3.54it/s, acc_nat=0.829, acc_rob=0.483]


Epoch 44 [0/383] | loss: 5.2383 (avg: 0.0137) | acc nat: 0.6641 (avg: 0.6641) |
Epoch 44 [100/383] | loss: 5.9904 (avg: 1.5660) | acc nat: 0.5703 (avg: 0.6188) |
Epoch 44 [200/383] | loss: 6.6578 (avg: 3.1181) | acc nat: 0.6406 (avg: 0.6204) |
Epoch 44 [300/383] | loss: 6.1697 (avg: 4.6993) | acc nat: 0.5859 (avg: 0.6181) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:41<00:00,  3.55it/s, acc_nat=0.835, acc_rob=0.509]


Epoch 45 [0/383] | loss: 5.6399 (avg: 0.0147) | acc nat: 0.6016 (avg: 0.6016) |
Epoch 45 [100/383] | loss: 5.4838 (avg: 1.5213) | acc nat: 0.6484 (avg: 0.6320) |
Epoch 45 [200/383] | loss: 6.3417 (avg: 3.0813) | acc nat: 0.6250 (avg: 0.6208) |
Epoch 45 [300/383] | loss: 6.9266 (avg: 4.6441) | acc nat: 0.5859 (avg: 0.6205) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:37<00:00,  3.61it/s, acc_nat=0.818, acc_rob=0.51]


Epoch 46 [0/383] | loss: 5.2237 (avg: 0.0136) | acc nat: 0.6641 (avg: 0.6641) |
Epoch 46 [100/383] | loss: 5.7315 (avg: 1.5009) | acc nat: 0.6016 (avg: 0.6358) |
Epoch 46 [200/383] | loss: 6.0589 (avg: 3.0561) | acc nat: 0.5859 (avg: 0.6259) |
Epoch 46 [300/383] | loss: 5.5862 (avg: 4.6432) | acc nat: 0.6328 (avg: 0.6217) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:37<00:00,  3.60it/s, acc_nat=0.84, acc_rob=0.476]


Epoch 47 [0/383] | loss: 5.9441 (avg: 0.0155) | acc nat: 0.6094 (avg: 0.6094) |
Epoch 47 [100/383] | loss: 6.1757 (avg: 1.5164) | acc nat: 0.6250 (avg: 0.6290) |
Epoch 47 [200/383] | loss: 6.3646 (avg: 3.0303) | acc nat: 0.5703 (avg: 0.6257) |
Epoch 47 [300/383] | loss: 5.8962 (avg: 4.5737) | acc nat: 0.5938 (avg: 0.6252) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:40<00:00,  3.56it/s, acc_nat=0.839, acc_rob=0.476]


Epoch 48 [0/383] | loss: 6.0967 (avg: 0.0159) | acc nat: 0.5938 (avg: 0.5938) |
Epoch 48 [100/383] | loss: 5.8529 (avg: 1.5139) | acc nat: 0.6250 (avg: 0.6272) |
Epoch 48 [200/383] | loss: 6.5466 (avg: 3.0221) | acc nat: 0.5703 (avg: 0.6260) |
Epoch 48 [300/383] | loss: 5.6764 (avg: 4.5788) | acc nat: 0.6484 (avg: 0.6205) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:43<00:00,  3.53it/s, acc_nat=0.844, acc_rob=0.455]


Epoch 49 [0/383] | loss: 5.3298 (avg: 0.0139) | acc nat: 0.6875 (avg: 0.6875) |
Epoch 49 [100/383] | loss: 6.0090 (avg: 1.4675) | acc nat: 0.6328 (avg: 0.6388) |
Epoch 49 [200/383] | loss: 6.4959 (avg: 2.9907) | acc nat: 0.5469 (avg: 0.6311) |
Epoch 49 [300/383] | loss: 5.7901 (avg: 4.5424) | acc nat: 0.6016 (avg: 0.6253) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:47<00:00,  3.48it/s, acc_nat=0.821, acc_rob=0.482]


Epoch 50 [0/383] | loss: 4.8431 (avg: 0.0126) | acc nat: 0.6875 (avg: 0.6875) |
Epoch 50 [100/383] | loss: 5.6009 (avg: 1.4807) | acc nat: 0.6406 (avg: 0.6392) |
Epoch 50 [200/383] | loss: 5.5227 (avg: 2.9806) | acc nat: 0.6250 (avg: 0.6330) |
Epoch 50 [300/383] | loss: 6.0978 (avg: 4.5075) | acc nat: 0.6016 (avg: 0.6289) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:50<00:00,  3.45it/s, acc_nat=0.828, acc_rob=0.493]


Epoch 51 [0/383] | loss: 5.5438 (avg: 0.0145) | acc nat: 0.6641 (avg: 0.6641) |
Epoch 51 [100/383] | loss: 6.1607 (avg: 1.4582) | acc nat: 0.6484 (avg: 0.6471) |
Epoch 51 [200/383] | loss: 5.9652 (avg: 2.9600) | acc nat: 0.6484 (avg: 0.6379) |
Epoch 51 [300/383] | loss: 6.4416 (avg: 4.4996) | acc nat: 0.5391 (avg: 0.6325) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:53<00:00,  3.41it/s, acc_nat=0.845, acc_rob=0.467]


Epoch 52 [0/383] | loss: 5.0399 (avg: 0.0132) | acc nat: 0.6953 (avg: 0.6953) |
Epoch 52 [100/383] | loss: 5.1551 (avg: 1.4726) | acc nat: 0.6484 (avg: 0.6448) |
Epoch 52 [200/383] | loss: 5.3566 (avg: 2.9630) | acc nat: 0.5938 (avg: 0.6403) |
Epoch 52 [300/383] | loss: 5.4833 (avg: 4.4790) | acc nat: 0.6562 (avg: 0.6367) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:56<00:00,  3.37it/s, acc_nat=0.825, acc_rob=0.48]


Epoch 53 [0/383] | loss: 5.0428 (avg: 0.0132) | acc nat: 0.6719 (avg: 0.6719) |
Epoch 53 [100/383] | loss: 5.1543 (avg: 1.4291) | acc nat: 0.6016 (avg: 0.6495) |
Epoch 53 [200/383] | loss: 5.3790 (avg: 2.9181) | acc nat: 0.6562 (avg: 0.6396) |
Epoch 53 [300/383] | loss: 5.6304 (avg: 4.4363) | acc nat: 0.6641 (avg: 0.6346) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:59<00:00,  3.34it/s, acc_nat=0.828, acc_rob=0.491]


Epoch 54 [0/383] | loss: 5.2571 (avg: 0.0137) | acc nat: 0.6172 (avg: 0.6172) |
Epoch 54 [100/383] | loss: 5.8472 (avg: 1.4314) | acc nat: 0.5859 (avg: 0.6448) |
Epoch 54 [200/383] | loss: 5.6097 (avg: 2.9037) | acc nat: 0.6016 (avg: 0.6401) |
Epoch 54 [300/383] | loss: 5.7750 (avg: 4.4119) | acc nat: 0.6562 (avg: 0.6361) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [05:00<00:00,  3.33it/s, acc_nat=0.842, acc_rob=0.463]


Epoch 55 [0/383] | loss: 5.9219 (avg: 0.0155) | acc nat: 0.6484 (avg: 0.6484) |
Epoch 55 [100/383] | loss: 4.8298 (avg: 1.4229) | acc nat: 0.6797 (avg: 0.6461) |
Epoch 55 [200/383] | loss: 5.9359 (avg: 2.9167) | acc nat: 0.5938 (avg: 0.6401) |
Epoch 55 [300/383] | loss: 6.3300 (avg: 4.4195) | acc nat: 0.6016 (avg: 0.6369) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:59<00:00,  3.34it/s, acc_nat=0.826, acc_rob=0.482]


Epoch 56 [0/383] | loss: 5.5611 (avg: 0.0145) | acc nat: 0.6328 (avg: 0.6328) |
Epoch 56 [100/383] | loss: 5.8490 (avg: 1.3982) | acc nat: 0.7031 (avg: 0.6557) |
Epoch 56 [200/383] | loss: 5.5904 (avg: 2.8695) | acc nat: 0.6562 (avg: 0.6477) |
Epoch 56 [300/383] | loss: 5.6509 (avg: 4.3528) | acc nat: 0.6562 (avg: 0.6440) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:59<00:00,  3.34it/s, acc_nat=0.853, acc_rob=0.488]


Epoch 57 [0/383] | loss: 5.0766 (avg: 0.0133) | acc nat: 0.7266 (avg: 0.7266) |
Epoch 57 [100/383] | loss: 5.7889 (avg: 1.4094) | acc nat: 0.6172 (avg: 0.6470) |
Epoch 57 [200/383] | loss: 5.9951 (avg: 2.8478) | acc nat: 0.6172 (avg: 0.6447) |
Epoch 57 [300/383] | loss: 5.5302 (avg: 4.3570) | acc nat: 0.6250 (avg: 0.6401) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:59<00:00,  3.34it/s, acc_nat=0.829, acc_rob=0.488]


Epoch 58 [0/383] | loss: 5.5681 (avg: 0.0145) | acc nat: 0.6719 (avg: 0.6719) |
Epoch 58 [100/383] | loss: 4.9883 (avg: 1.4199) | acc nat: 0.6562 (avg: 0.6522) |
Epoch 58 [200/383] | loss: 5.3049 (avg: 2.8629) | acc nat: 0.6172 (avg: 0.6460) |
Epoch 58 [300/383] | loss: 5.5198 (avg: 4.3623) | acc nat: 0.6562 (avg: 0.6393) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:59<00:00,  3.34it/s, acc_nat=0.833, acc_rob=0.483]


Epoch 59 [0/383] | loss: 5.0259 (avg: 0.0131) | acc nat: 0.6484 (avg: 0.6484) |
Epoch 59 [100/383] | loss: 5.8769 (avg: 1.4020) | acc nat: 0.6172 (avg: 0.6549) |
Epoch 59 [200/383] | loss: 4.9876 (avg: 2.8326) | acc nat: 0.7109 (avg: 0.6484) |
Epoch 59 [300/383] | loss: 5.8165 (avg: 4.2982) | acc nat: 0.5859 (avg: 0.6451) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [05:00<00:00,  3.33it/s, acc_nat=0.834, acc_rob=0.463]


Epoch 60 [0/383] | loss: 4.3691 (avg: 0.0114) | acc nat: 0.7422 (avg: 0.7422) |
Epoch 60 [100/383] | loss: 5.6931 (avg: 1.3961) | acc nat: 0.6016 (avg: 0.6631) |
Epoch 60 [200/383] | loss: 5.9749 (avg: 2.8459) | acc nat: 0.5938 (avg: 0.6480) |
Epoch 60 [300/383] | loss: 5.0553 (avg: 4.3092) | acc nat: 0.6328 (avg: 0.6443) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [05:00<00:00,  3.33it/s, acc_nat=0.828, acc_rob=0.477]


Epoch 61 [0/383] | loss: 5.5740 (avg: 0.0146) | acc nat: 0.6250 (avg: 0.6250) |
Epoch 61 [100/383] | loss: 5.4619 (avg: 1.4217) | acc nat: 0.6094 (avg: 0.6559) |
Epoch 61 [200/383] | loss: 5.2998 (avg: 2.8387) | acc nat: 0.7031 (avg: 0.6507) |
Epoch 61 [300/383] | loss: 6.2490 (avg: 4.3223) | acc nat: 0.5547 (avg: 0.6455) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [05:00<00:00,  3.33it/s, acc_nat=0.816, acc_rob=0.486]


Epoch 62 [0/383] | loss: 5.7114 (avg: 0.0149) | acc nat: 0.6562 (avg: 0.6562) |
Epoch 62 [100/383] | loss: 5.5654 (avg: 1.3924) | acc nat: 0.6250 (avg: 0.6549) |
Epoch 62 [200/383] | loss: 6.0256 (avg: 2.8112) | acc nat: 0.6016 (avg: 0.6538) |
Epoch 62 [300/383] | loss: 5.7634 (avg: 4.2723) | acc nat: 0.6250 (avg: 0.6495) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:58<00:00,  3.35it/s, acc_nat=0.837, acc_rob=0.471]


Epoch 63 [0/383] | loss: 5.9016 (avg: 0.0154) | acc nat: 0.6016 (avg: 0.6016) |
Epoch 63 [100/383] | loss: 5.2308 (avg: 1.3802) | acc nat: 0.6562 (avg: 0.6661) |
Epoch 63 [200/383] | loss: 5.3580 (avg: 2.8037) | acc nat: 0.6953 (avg: 0.6573) |
Epoch 63 [300/383] | loss: 6.5371 (avg: 4.2567) | acc nat: 0.5938 (avg: 0.6517) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:59<00:00,  3.34it/s, acc_nat=0.848, acc_rob=0.472]


Epoch 64 [0/383] | loss: 5.1344 (avg: 0.0134) | acc nat: 0.6406 (avg: 0.6406) |
Epoch 64 [100/383] | loss: 5.1369 (avg: 1.3889) | acc nat: 0.6719 (avg: 0.6532) |
Epoch 64 [200/383] | loss: 4.5382 (avg: 2.8072) | acc nat: 0.7578 (avg: 0.6526) |
Epoch 64 [300/383] | loss: 5.7618 (avg: 4.2458) | acc nat: 0.5312 (avg: 0.6498) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:58<00:00,  3.35it/s, acc_nat=0.843, acc_rob=0.48]


Epoch 65 [0/383] | loss: 4.9509 (avg: 0.0129) | acc nat: 0.6797 (avg: 0.6797) |
Epoch 65 [100/383] | loss: 5.7903 (avg: 1.3751) | acc nat: 0.6641 (avg: 0.6605) |
Epoch 65 [200/383] | loss: 5.0675 (avg: 2.8062) | acc nat: 0.7188 (avg: 0.6539) |
Epoch 65 [300/383] | loss: 5.8145 (avg: 4.2620) | acc nat: 0.6250 (avg: 0.6479) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:56<00:00,  3.37it/s, acc_nat=0.829, acc_rob=0.464]


Epoch 66 [0/383] | loss: 4.7828 (avg: 0.0125) | acc nat: 0.6875 (avg: 0.6875) |
Epoch 66 [100/383] | loss: 5.3079 (avg: 1.3758) | acc nat: 0.6094 (avg: 0.6620) |
Epoch 66 [200/383] | loss: 5.4496 (avg: 2.7831) | acc nat: 0.5859 (avg: 0.6536) |
Epoch 66 [300/383] | loss: 5.9137 (avg: 4.2292) | acc nat: 0.6328 (avg: 0.6503) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:53<00:00,  3.41it/s, acc_nat=0.835, acc_rob=0.493]


Epoch 67 [0/383] | loss: 5.3189 (avg: 0.0139) | acc nat: 0.5938 (avg: 0.5938) |
Epoch 67 [100/383] | loss: 5.1332 (avg: 1.3415) | acc nat: 0.6641 (avg: 0.6744) |
Epoch 67 [200/383] | loss: 5.2916 (avg: 2.7445) | acc nat: 0.6641 (avg: 0.6604) |
Epoch 67 [300/383] | loss: 5.8923 (avg: 4.2070) | acc nat: 0.6016 (avg: 0.6534) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:50<00:00,  3.44it/s, acc_nat=0.832, acc_rob=0.473]


Epoch 68 [0/383] | loss: 5.5832 (avg: 0.0146) | acc nat: 0.6875 (avg: 0.6875) |
Epoch 68 [100/383] | loss: 4.5695 (avg: 1.3952) | acc nat: 0.6875 (avg: 0.6564) |
Epoch 68 [200/383] | loss: 5.4856 (avg: 2.8122) | acc nat: 0.6719 (avg: 0.6536) |
Epoch 68 [300/383] | loss: 6.1255 (avg: 4.2417) | acc nat: 0.6172 (avg: 0.6529) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:47<00:00,  3.48it/s, acc_nat=0.825, acc_rob=0.476]


Epoch 69 [0/383] | loss: 5.2811 (avg: 0.0138) | acc nat: 0.6797 (avg: 0.6797) |
Epoch 69 [100/383] | loss: 5.2632 (avg: 1.3749) | acc nat: 0.6641 (avg: 0.6603) |
Epoch 69 [200/383] | loss: 5.0337 (avg: 2.7539) | acc nat: 0.6328 (avg: 0.6585) |
Epoch 69 [300/383] | loss: 5.5387 (avg: 4.1822) | acc nat: 0.6562 (avg: 0.6544) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:45<00:00,  3.51it/s, acc_nat=0.826, acc_rob=0.471]


Epoch 70 [0/383] | loss: 4.9635 (avg: 0.0130) | acc nat: 0.6641 (avg: 0.6641) |
Epoch 70 [100/383] | loss: 5.0927 (avg: 1.3346) | acc nat: 0.7031 (avg: 0.6684) |
Epoch 70 [200/383] | loss: 5.6543 (avg: 2.6940) | acc nat: 0.6016 (avg: 0.6659) |
Epoch 70 [300/383] | loss: 5.9722 (avg: 4.1192) | acc nat: 0.6250 (avg: 0.6610) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:41<00:00,  3.56it/s, acc_nat=0.851, acc_rob=0.465]


Epoch 71 [0/383] | loss: 4.8870 (avg: 0.0128) | acc nat: 0.6719 (avg: 0.6719) |
Epoch 71 [100/383] | loss: 4.9324 (avg: 1.3578) | acc nat: 0.6328 (avg: 0.6647) |
Epoch 71 [200/383] | loss: 5.7308 (avg: 2.7409) | acc nat: 0.6172 (avg: 0.6610) |
Epoch 71 [300/383] | loss: 5.0415 (avg: 4.1593) | acc nat: 0.6406 (avg: 0.6567) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:38<00:00,  3.59it/s, acc_nat=0.82, acc_rob=0.479]


Epoch 72 [0/383] | loss: 5.1451 (avg: 0.0134) | acc nat: 0.6875 (avg: 0.6875) |
Epoch 72 [100/383] | loss: 5.3649 (avg: 1.3396) | acc nat: 0.6641 (avg: 0.6691) |
Epoch 72 [200/383] | loss: 4.8105 (avg: 2.7141) | acc nat: 0.6562 (avg: 0.6629) |
Epoch 72 [300/383] | loss: 5.0495 (avg: 4.1366) | acc nat: 0.6562 (avg: 0.6584) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:35<00:00,  3.62it/s, acc_nat=0.837, acc_rob=0.495]


Epoch 73 [0/383] | loss: 4.9435 (avg: 0.0129) | acc nat: 0.7031 (avg: 0.7031) |
Epoch 73 [100/383] | loss: 4.9510 (avg: 1.3422) | acc nat: 0.7188 (avg: 0.6675) |
Epoch 73 [200/383] | loss: 5.7411 (avg: 2.7219) | acc nat: 0.6719 (avg: 0.6613) |
Epoch 73 [300/383] | loss: 5.2312 (avg: 4.1359) | acc nat: 0.6406 (avg: 0.6572) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:37<00:00,  3.61it/s, acc_nat=0.824, acc_rob=0.473]


Epoch 74 [0/383] | loss: 5.5018 (avg: 0.0144) | acc nat: 0.6719 (avg: 0.6719) |
Epoch 74 [100/383] | loss: 5.5800 (avg: 1.3352) | acc nat: 0.6016 (avg: 0.6740) |
Epoch 74 [200/383] | loss: 5.2496 (avg: 2.7110) | acc nat: 0.6484 (avg: 0.6669) |
Epoch 74 [300/383] | loss: 5.4548 (avg: 4.1057) | acc nat: 0.6484 (avg: 0.6633) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:41<00:00,  3.55it/s, acc_nat=0.826, acc_rob=0.492]


Epoch 75 [0/383] | loss: 5.4463 (avg: 0.0142) | acc nat: 0.6094 (avg: 0.6094) |
Epoch 75 [100/383] | loss: 3.6432 (avg: 1.1068) | acc nat: 0.7500 (avg: 0.7171) |
Epoch 75 [200/383] | loss: 3.2616 (avg: 2.0979) | acc nat: 0.7734 (avg: 0.7289) |
Epoch 75 [300/383] | loss: 3.9030 (avg: 3.0446) | acc nat: 0.7031 (avg: 0.7372) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:45<00:00,  3.51it/s, acc_nat=0.869, acc_rob=0.518]


Epoch 76 [0/383] | loss: 3.4343 (avg: 0.0090) | acc nat: 0.7969 (avg: 0.7969) |
Epoch 76 [100/383] | loss: 3.3438 (avg: 0.8807) | acc nat: 0.7734 (avg: 0.7717) |
Epoch 76 [200/383] | loss: 3.1237 (avg: 1.7475) | acc nat: 0.7812 (avg: 0.7723) |
Epoch 76 [300/383] | loss: 3.9202 (avg: 2.6039) | acc nat: 0.7109 (avg: 0.7722) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:47<00:00,  3.48it/s, acc_nat=0.873, acc_rob=0.535]


Epoch 77 [0/383] | loss: 3.3919 (avg: 0.0089) | acc nat: 0.7969 (avg: 0.7969) |
Epoch 77 [100/383] | loss: 2.6981 (avg: 0.8248) | acc nat: 0.8047 (avg: 0.7864) |
Epoch 77 [200/383] | loss: 2.8144 (avg: 1.6335) | acc nat: 0.8594 (avg: 0.7863) |
Epoch 77 [300/383] | loss: 3.4051 (avg: 2.4443) | acc nat: 0.7188 (avg: 0.7872) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:50<00:00,  3.44it/s, acc_nat=0.874, acc_rob=0.541]


Epoch 78 [0/383] | loss: 3.4082 (avg: 0.0089) | acc nat: 0.7891 (avg: 0.7891) |
Epoch 78 [100/383] | loss: 3.0241 (avg: 0.7757) | acc nat: 0.7578 (avg: 0.8024) |
Epoch 78 [200/383] | loss: 2.9809 (avg: 1.5399) | acc nat: 0.7812 (avg: 0.8017) |
Epoch 78 [300/383] | loss: 3.0510 (avg: 2.3049) | acc nat: 0.7812 (avg: 0.8024) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:53<00:00,  3.40it/s, acc_nat=0.877, acc_rob=0.524]


Epoch 79 [0/383] | loss: 2.2071 (avg: 0.0058) | acc nat: 0.8281 (avg: 0.8281) |
Epoch 79 [100/383] | loss: 2.6229 (avg: 0.7419) | acc nat: 0.7812 (avg: 0.8061) |
Epoch 79 [200/383] | loss: 2.8805 (avg: 1.4850) | acc nat: 0.7734 (avg: 0.8079) |
Epoch 79 [300/383] | loss: 2.5330 (avg: 2.2197) | acc nat: 0.8438 (avg: 0.8068) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:57<00:00,  3.37it/s, acc_nat=0.878, acc_rob=0.529]


Epoch 80 [0/383] | loss: 2.5751 (avg: 0.0067) | acc nat: 0.8359 (avg: 0.8359) |
Epoch 80 [100/383] | loss: 2.2912 (avg: 0.7012) | acc nat: 0.8047 (avg: 0.8164) |
Epoch 80 [200/383] | loss: 2.5401 (avg: 1.4087) | acc nat: 0.8672 (avg: 0.8185) |
Epoch 80 [300/383] | loss: 2.5529 (avg: 2.1291) | acc nat: 0.8516 (avg: 0.8167) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:59<00:00,  3.34it/s, acc_nat=0.88, acc_rob=0.538]


Epoch 81 [0/383] | loss: 2.5516 (avg: 0.0067) | acc nat: 0.8516 (avg: 0.8516) |
Epoch 81 [100/383] | loss: 2.8920 (avg: 0.6845) | acc nat: 0.8047 (avg: 0.8282) |
Epoch 81 [200/383] | loss: 3.0587 (avg: 1.3717) | acc nat: 0.7422 (avg: 0.8266) |
Epoch 81 [300/383] | loss: 2.3657 (avg: 2.0643) | acc nat: 0.7969 (avg: 0.8243) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:59<00:00,  3.33it/s, acc_nat=0.867, acc_rob=0.532]


Epoch 82 [0/383] | loss: 2.1173 (avg: 0.0055) | acc nat: 0.8750 (avg: 0.8750) |
Epoch 82 [100/383] | loss: 2.4827 (avg: 0.6771) | acc nat: 0.8281 (avg: 0.8312) |
Epoch 82 [200/383] | loss: 2.3552 (avg: 1.3446) | acc nat: 0.8750 (avg: 0.8325) |
Epoch 82 [300/383] | loss: 2.2700 (avg: 2.0143) | acc nat: 0.8594 (avg: 0.8312) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:59<00:00,  3.34it/s, acc_nat=0.887, acc_rob=0.521]


Epoch 83 [0/383] | loss: 2.3234 (avg: 0.0061) | acc nat: 0.8516 (avg: 0.8516) |
Epoch 83 [100/383] | loss: 2.5298 (avg: 0.6373) | acc nat: 0.8203 (avg: 0.8385) |
Epoch 83 [200/383] | loss: 2.7002 (avg: 1.2905) | acc nat: 0.8125 (avg: 0.8354) |
Epoch 83 [300/383] | loss: 2.3398 (avg: 1.9350) | acc nat: 0.8594 (avg: 0.8339) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:58<00:00,  3.35it/s, acc_nat=0.868, acc_rob=0.52]


Epoch 84 [0/383] | loss: 1.8922 (avg: 0.0049) | acc nat: 0.8906 (avg: 0.8906) |
Epoch 84 [100/383] | loss: 2.2575 (avg: 0.6189) | acc nat: 0.8516 (avg: 0.8492) |
Epoch 84 [200/383] | loss: 2.4404 (avg: 1.2442) | acc nat: 0.8203 (avg: 0.8431) |
Epoch 84 [300/383] | loss: 2.4796 (avg: 1.8755) | acc nat: 0.7656 (avg: 0.8408) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:58<00:00,  3.35it/s, acc_nat=0.885, acc_rob=0.52]


Epoch 85 [0/383] | loss: 2.4661 (avg: 0.0064) | acc nat: 0.7969 (avg: 0.7969) |
Epoch 85 [100/383] | loss: 2.0349 (avg: 0.5992) | acc nat: 0.8828 (avg: 0.8496) |
Epoch 85 [200/383] | loss: 2.3224 (avg: 1.1995) | acc nat: 0.8203 (avg: 0.8495) |
Epoch 85 [300/383] | loss: 2.3396 (avg: 1.8144) | acc nat: 0.8672 (avg: 0.8475) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:59<00:00,  3.34it/s, acc_nat=0.873, acc_rob=0.525]


Epoch 86 [0/383] | loss: 2.2903 (avg: 0.0060) | acc nat: 0.8203 (avg: 0.8203) |
Epoch 86 [100/383] | loss: 3.0204 (avg: 0.5887) | acc nat: 0.8281 (avg: 0.8541) |
Epoch 86 [200/383] | loss: 2.3234 (avg: 1.1798) | acc nat: 0.8438 (avg: 0.8525) |
Epoch 86 [300/383] | loss: 2.3157 (avg: 1.7682) | acc nat: 0.8516 (avg: 0.8522) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:58<00:00,  3.35it/s, acc_nat=0.883, acc_rob=0.519]


Epoch 87 [0/383] | loss: 2.0091 (avg: 0.0052) | acc nat: 0.8750 (avg: 0.8750) |
Epoch 87 [100/383] | loss: 2.4214 (avg: 0.5749) | acc nat: 0.8359 (avg: 0.8556) |
Epoch 87 [200/383] | loss: 2.4094 (avg: 1.1498) | acc nat: 0.8750 (avg: 0.8549) |
Epoch 87 [300/383] | loss: 2.2584 (avg: 1.7401) | acc nat: 0.8828 (avg: 0.8537) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:58<00:00,  3.34it/s, acc_nat=0.877, acc_rob=0.528]


Epoch 88 [0/383] | loss: 2.0381 (avg: 0.0053) | acc nat: 0.8281 (avg: 0.8281) |
Epoch 88 [100/383] | loss: 2.1718 (avg: 0.5607) | acc nat: 0.8516 (avg: 0.8568) |
Epoch 88 [200/383] | loss: 1.7347 (avg: 1.1256) | acc nat: 0.8672 (avg: 0.8579) |
Epoch 88 [300/383] | loss: 2.0237 (avg: 1.6871) | acc nat: 0.8672 (avg: 0.8583) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:58<00:00,  3.35it/s, acc_nat=0.877, acc_rob=0.52]


Epoch 89 [0/383] | loss: 2.1460 (avg: 0.0056) | acc nat: 0.8594 (avg: 0.8594) |
Epoch 89 [100/383] | loss: 1.9599 (avg: 0.5534) | acc nat: 0.8984 (avg: 0.8625) |
Epoch 89 [200/383] | loss: 1.9111 (avg: 1.0929) | acc nat: 0.8594 (avg: 0.8642) |
Epoch 89 [300/383] | loss: 2.1310 (avg: 1.6518) | acc nat: 0.8906 (avg: 0.8639) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:59<00:00,  3.34it/s, acc_nat=0.874, acc_rob=0.522]


Epoch 90 [0/383] | loss: 1.8681 (avg: 0.0049) | acc nat: 0.8672 (avg: 0.8672) |
Epoch 90 [100/383] | loss: 1.8333 (avg: 0.5216) | acc nat: 0.9219 (avg: 0.8768) |
Epoch 90 [200/383] | loss: 1.5630 (avg: 1.0305) | acc nat: 0.9141 (avg: 0.8758) |
Epoch 90 [300/383] | loss: 1.3965 (avg: 1.5305) | acc nat: 0.9219 (avg: 0.8759) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:58<00:00,  3.35it/s, acc_nat=0.874, acc_rob=0.508]


Epoch 91 [0/383] | loss: 2.0926 (avg: 0.0055) | acc nat: 0.8672 (avg: 0.8672) |
Epoch 91 [100/383] | loss: 2.3642 (avg: 0.5102) | acc nat: 0.8047 (avg: 0.8782) |
Epoch 91 [200/383] | loss: 2.1410 (avg: 1.0117) | acc nat: 0.8516 (avg: 0.8771) |
Epoch 91 [300/383] | loss: 2.1351 (avg: 1.5118) | acc nat: 0.8438 (avg: 0.8771) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:56<00:00,  3.37it/s, acc_nat=0.876, acc_rob=0.517]


Epoch 92 [0/383] | loss: 1.9961 (avg: 0.0052) | acc nat: 0.9062 (avg: 0.9062) |
Epoch 92 [100/383] | loss: 1.8118 (avg: 0.5017) | acc nat: 0.8672 (avg: 0.8779) |
Epoch 92 [200/383] | loss: 1.9796 (avg: 0.9944) | acc nat: 0.8750 (avg: 0.8795) |
Epoch 92 [300/383] | loss: 1.4668 (avg: 1.4905) | acc nat: 0.9062 (avg: 0.8798) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:53<00:00,  3.40it/s, acc_nat=0.875, acc_rob=0.522]


Epoch 93 [0/383] | loss: 1.8962 (avg: 0.0050) | acc nat: 0.8750 (avg: 0.8750) |
Epoch 93 [100/383] | loss: 1.9200 (avg: 0.5005) | acc nat: 0.8594 (avg: 0.8787) |
Epoch 93 [200/383] | loss: 1.4915 (avg: 0.9917) | acc nat: 0.8750 (avg: 0.8787) |
Epoch 93 [300/383] | loss: 1.8244 (avg: 1.4833) | acc nat: 0.8906 (avg: 0.8772) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:50<00:00,  3.44it/s, acc_nat=0.875, acc_rob=0.526]


Epoch 94 [0/383] | loss: 1.8440 (avg: 0.0048) | acc nat: 0.9062 (avg: 0.9062) |
Epoch 94 [100/383] | loss: 1.4957 (avg: 0.4918) | acc nat: 0.9141 (avg: 0.8814) |
Epoch 94 [200/383] | loss: 2.1528 (avg: 0.9720) | acc nat: 0.8359 (avg: 0.8836) |
Epoch 94 [300/383] | loss: 1.6164 (avg: 1.4622) | acc nat: 0.8828 (avg: 0.8813) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:48<00:00,  3.47it/s, acc_nat=0.881, acc_rob=0.53]


Epoch 95 [0/383] | loss: 1.9201 (avg: 0.0050) | acc nat: 0.8750 (avg: 0.8750) |
Epoch 95 [100/383] | loss: 1.8392 (avg: 0.4904) | acc nat: 0.8906 (avg: 0.8823) |
Epoch 95 [200/383] | loss: 1.7290 (avg: 0.9726) | acc nat: 0.9297 (avg: 0.8819) |
Epoch 95 [300/383] | loss: 1.6055 (avg: 1.4520) | acc nat: 0.8984 (avg: 0.8817) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:45<00:00,  3.51it/s, acc_nat=0.877, acc_rob=0.508]


Epoch 96 [0/383] | loss: 1.7884 (avg: 0.0047) | acc nat: 0.8906 (avg: 0.8906) |
Epoch 96 [100/383] | loss: 1.7445 (avg: 0.4887) | acc nat: 0.8750 (avg: 0.8817) |
Epoch 96 [200/383] | loss: 1.9921 (avg: 0.9685) | acc nat: 0.8750 (avg: 0.8827) |
Epoch 96 [300/383] | loss: 1.6199 (avg: 1.4451) | acc nat: 0.8906 (avg: 0.8835) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:40<00:00,  3.56it/s, acc_nat=0.876, acc_rob=0.525]


Epoch 97 [0/383] | loss: 2.0179 (avg: 0.0053) | acc nat: 0.8359 (avg: 0.8359) |
Epoch 97 [100/383] | loss: 1.8333 (avg: 0.4802) | acc nat: 0.8984 (avg: 0.8848) |
Epoch 97 [200/383] | loss: 1.7947 (avg: 0.9585) | acc nat: 0.8750 (avg: 0.8837) |
Epoch 97 [300/383] | loss: 1.6031 (avg: 1.4393) | acc nat: 0.8984 (avg: 0.8828) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:37<00:00,  3.60it/s, acc_nat=0.876, acc_rob=0.531]


Epoch 98 [0/383] | loss: 1.5365 (avg: 0.0040) | acc nat: 0.9062 (avg: 0.9062) |
Epoch 98 [100/383] | loss: 1.9924 (avg: 0.4812) | acc nat: 0.8828 (avg: 0.8834) |
Epoch 98 [200/383] | loss: 1.8344 (avg: 0.9532) | acc nat: 0.8906 (avg: 0.8837) |
Epoch 98 [300/383] | loss: 1.8751 (avg: 1.4320) | acc nat: 0.8984 (avg: 0.8833) |


[eval] 1000/1000: 100%|██████████| 1000/1000 [04:35<00:00,  3.63it/s, acc_nat=0.888, acc_rob=0.512]


Epoch 99 [0/383] | loss: 2.1739 (avg: 0.0057) | acc nat: 0.8594 (avg: 0.8594) |
Epoch 99 [100/383] | loss: 1.8550 (avg: 0.4715) | acc nat: 0.8828 (avg: 0.8876) |
Epoch 99 [200/383] | loss: 1.7761 (avg: 0.9473) | acc nat: 0.8750 (avg: 0.8858) |
Epoch 99 [300/383] | loss: 1.4049 (avg: 1.4237) | acc nat: 0.9219 (avg: 0.8846) |


[eval] 900/1000:  90%|████████▉ | 899/1000 [04:09<00:27,  3.64it/s, acc_nat=0.779, acc_rob=0.469]