In [1]:
import sys
sys.path.append("..")

import os.path
import json

import time
import numpy as np
import matplotlib.pyplot as plt

import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F

import seaborn as sns # conda install seaborn
import pandas as pd # ^^ this will automatically install pandas

import pyro
from pyro.infer.mcmc import MCMC
import pyro.distributions as dist

from kernel.sghmc import SGHMC
from kernel.sgld import SGLD
from kernel.sgd import SGD
from kernel.sgnuts import NUTS as SGNUTS

pyro.set_rng_seed(101)

plt.rcParams['figure.dpi'] = 300

In [2]:
assert torch.cuda.is_available()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu" )

In [3]:
RESULTS_DIR = os.path.join("results", "bnn")
RESULTS_SGHMC = os.path.join(RESULTS_DIR, "sghmc.json")
RESULTS_SGLD = os.path.join(RESULTS_DIR, "sgld.json")
RESULTS_SGD = os.path.join(RESULTS_DIR, "sgd.json")
RESULTS_SGDMOM = os.path.join(RESULTS_DIR, "sgdmom.json")

In [4]:
# Simple dataset wrapper class

class Dataset(torch.utils.data.Dataset):
    def __init__(self, data, targets):
        self.data = data
        self.targets = targets
        
    def __len__(self):
        return(len(self.data))
    
    def __getitem__(self, idx):
        return self.data[idx], self.targets[idx]

### Hyperparams

These hyperparameters were fixed during the hyperparameter search. All other hyperparameters in this notebook are the best ones we found during the hyperparameter search.

In [5]:
BATCH_SIZE = 500
NUM_EPOCHS = 800
WARMUP_EPOCHS = 50
HIDDEN_SIZE = 100

### Download MNIST and setup datasets / dataloaders

In [6]:
train_dataset = datasets.MNIST('./data', train=True, download=True)

test_dataset = datasets.MNIST('./data', train=False, download=True)

nvalid = 10000

perm = torch.arange(len(train_dataset))
train_idx = perm[nvalid:]
val_idx = perm[:nvalid]
    
mean = 0.1307
std = 0.3081

# scale the datasets
X_train = train_dataset.data[train_idx] / 255.0
Y_train = train_dataset.targets[train_idx]

X_val = train_dataset.data[val_idx] / 255.0
Y_val = train_dataset.targets[val_idx]

X_test = test_dataset.data / 255.0
Y_test = test_dataset.targets

# redefine the datasets
train_dataset = Dataset(X_train, Y_train)
val_dataset = Dataset(X_val, Y_val)
test_dataset = Dataset(X_test, Y_test)

# setup the dataloaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

### Define the Bayesian neural network  model

In [7]:
PyroLinear = pyro.nn.PyroModule[torch.nn.Linear]
    
class BNN(pyro.nn.PyroModule):
    
    def __init__(self, input_size, hidden_size, output_size, prec=1., device='cpu'):
        super().__init__()
        # prec is a kwarg that should only used by SGD to set the regularization strength 
        # recall that a Guassian prior over the weights is equivalent to L2 norm regularization in the non-Bayes setting

        self.device = device
        
        # TODO add gamma priors to precision terms

        self.fc1 = PyroLinear(input_size, hidden_size)

        fc1_weight_loc = torch.zeros((hidden_size, input_size), device=self.device)
        fc1_weight_scale = torch.ones((hidden_size, input_size), device=self.device) * prec

        fc1_bias_loc = torch.zeros((hidden_size,), device=self.device)
        fc1_bias_scale = torch.ones((hidden_size,), device=self.device) * prec

        self.fc1.weight = pyro.nn.PyroSample(dist.Normal(fc1_weight_loc, fc1_weight_scale).to_event(2))
        self.fc1.bias   = pyro.nn.PyroSample(dist.Normal(fc1_bias_loc, fc1_bias_scale).to_event(1))
        
        self.fc2 = PyroLinear(hidden_size, output_size)

        fc2_weight_loc = torch.zeros((output_size, hidden_size), device=self.device)
        fc2_weight_scale = torch.ones((output_size, hidden_size), device=self.device) * prec

        fc2_bias_loc = torch.zeros((output_size,), device=self.device)
        fc2_bias_scale = torch.ones((output_size,), device=self.device) * prec

        self.fc2.weight = pyro.nn.PyroSample(dist.Normal(fc2_weight_loc, fc2_weight_scale).to_event(2))
        self.fc2.bias   = pyro.nn.PyroSample(dist.Normal(fc2_bias_loc, fc2_bias_scale).to_event(1))
        
        self.relu = torch.nn.ReLU()
        self.log_softmax = torch.nn.LogSoftmax(dim=1)

    def forward(self, x, y=None):
        x = x.view(-1, 28*28).to(self.device)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        x = self.log_softmax(x)# output (log) softmax probabilities of each class

        if y is not None:
            y = y.to(self.device)
        
        with pyro.plate("data", x.shape[0]):
            obs = pyro.sample("obs", dist.Categorical(logits=x), obs=y)

### Run SGHMC 

We run SGHMC to sample approximately from the posterior distribution.

In [8]:
LR = 2e-6
MOMENTUM_DECAY = 0.01
RESAMPLE_EVERY_N = 0
NUM_STEPS = 1 # fixed during hypeparameter search

pyro.clear_param_store()

bnn = BNN(28*28, HIDDEN_SIZE, 10, device=device).to(device)

sghmc = SGHMC(bnn,
              subsample_positions=[0, 1],
              batch_size=BATCH_SIZE,
              learning_rate=LR,
              momentum_decay=MOMENTUM_DECAY,
              num_steps=NUM_STEPS,
              resample_every_n=RESAMPLE_EVERY_N,
              obs_info_noise=True,
              device=device)

sghmc_mcmc = MCMC(sghmc, num_samples=len(train_dataset)//BATCH_SIZE, warmup_steps=0)

sghmc_test_errs = []

# full posterior predictive 
full_predictive = torch.FloatTensor(10000, 10)
full_predictive.zero_()

for epoch in range(1, 1+NUM_EPOCHS + WARMUP_EPOCHS):
    sghmc_mcmc.run(X_train, Y_train)
    
    if epoch >= WARMUP_EPOCHS:
        
        sghmc_samples = sghmc_mcmc.get_samples()
        predictive = pyro.infer.Predictive(bnn, posterior_samples=sghmc_samples)
        start = time.time()
        
        with torch.no_grad():
            epoch_predictive = None
            for x, y in val_loader:
                prediction = predictive(x)['obs'].to(torch.int64).to("cpu")
                if epoch_predictive is None:
                    epoch_predictive = prediction
                else:
                    epoch_predictive = torch.cat((epoch_predictive, prediction), dim=1)
                    
            for sample in epoch_predictive:
                predictive_one_hot = F.one_hot(sample, num_classes=10)
                full_predictive = full_predictive + predictive_one_hot
                
            full_y_hat = torch.argmax(full_predictive, dim=1)
            total = Y_val.shape[0]
            correct = int((full_y_hat == Y_val).sum())
            
        end = time.time()
        
        sghmc_test_errs.append(1.0 - correct/total)

        print("Epoch [{}/{}] test accuracy: {:.4f} time: {:.2f}".format(epoch-WARMUP_EPOCHS, NUM_EPOCHS, correct/total, end - start))
        
        # Epoch [800/800] test accuracy: 0.9714 time: 4.26

# Save the errors to a file
with open(RESULTS_SGHMC, "w") as f:
    json.dump(sghmc_test_errs, f)

Sample: 100%|██████████| 100/100 [00:01, 58.08it/s, lr=2.00e-06]
Sample: 100%|██████████| 100/100 [00:01, 71.08it/s, lr=2.00e-06]
Sample: 100%|██████████| 100/100 [00:01, 75.61it/s, lr=2.00e-06]
Sample: 100%|██████████| 100/100 [00:01, 75.59it/s, lr=2.00e-06]
Sample: 100%|██████████| 100/100 [00:01, 63.21it/s, lr=2.00e-06]
Sample: 100%|██████████| 100/100 [00:01, 62.50it/s, lr=2.00e-06]
Sample: 100%|██████████| 100/100 [00:01, 62.42it/s, lr=2.00e-06]
Sample: 100%|██████████| 100/100 [00:01, 58.28it/s, lr=2.00e-06]
Sample: 100%|██████████| 100/100 [00:02, 38.93it/s, lr=2.00e-06]
Sample: 100%|██████████| 100/100 [00:01, 58.79it/s, lr=2.00e-06]
Sample: 100%|██████████| 100/100 [00:01, 62.31it/s, lr=2.00e-06]
Sample: 100%|██████████| 100/100 [00:01, 64.03it/s, lr=2.00e-06]
Sample: 100%|██████████| 100/100 [00:01, 61.82it/s, lr=2.00e-06]
Sample: 100%|██████████| 100/100 [00:01, 53.80it/s, lr=2.00e-06]
Sample: 100%|██████████| 100/100 [00:01, 64.06it/s, lr=2.00e-06]
Sample: 100%|██████████| 

Epoch [0/800] test accuracy: 0.7321 time: 3.17


Sample: 100%|██████████| 100/100 [00:01, 70.74it/s, lr=2.00e-06]


Epoch [1/800] test accuracy: 0.7721 time: 2.89


Sample: 100%|██████████| 100/100 [00:02, 42.99it/s, lr=2.00e-06]


Epoch [2/800] test accuracy: 0.8148 time: 3.58


Sample: 100%|██████████| 100/100 [00:02, 42.01it/s, lr=2.00e-06]


Epoch [3/800] test accuracy: 0.8332 time: 3.22


Sample: 100%|██████████| 100/100 [00:01, 72.78it/s, lr=2.00e-06]


Epoch [4/800] test accuracy: 0.8479 time: 3.32


Sample: 100%|██████████| 100/100 [00:01, 71.36it/s, lr=2.00e-06]


Epoch [5/800] test accuracy: 0.8568 time: 3.61


Sample: 100%|██████████| 100/100 [00:01, 66.81it/s, lr=2.00e-06]


Epoch [6/800] test accuracy: 0.8633 time: 3.08


Sample: 100%|██████████| 100/100 [00:01, 81.70it/s, lr=2.00e-06]


Epoch [7/800] test accuracy: 0.8681 time: 3.16


Sample: 100%|██████████| 100/100 [00:01, 66.69it/s, lr=2.00e-06]


Epoch [8/800] test accuracy: 0.8728 time: 3.42


Sample: 100%|██████████| 100/100 [00:01, 50.38it/s, lr=2.00e-06]


Epoch [9/800] test accuracy: 0.8759 time: 3.86


Sample: 100%|██████████| 100/100 [00:01, 58.27it/s, lr=2.00e-06]


Epoch [10/800] test accuracy: 0.8776 time: 4.04


Sample: 100%|██████████| 100/100 [00:02, 47.34it/s, lr=2.00e-06]


Epoch [11/800] test accuracy: 0.8806 time: 3.44


Sample: 100%|██████████| 100/100 [00:01, 55.66it/s, lr=2.00e-06]


Epoch [12/800] test accuracy: 0.8828 time: 4.21


Sample: 100%|██████████| 100/100 [00:01, 54.83it/s, lr=2.00e-06]


Epoch [13/800] test accuracy: 0.8854 time: 3.24


Sample: 100%|██████████| 100/100 [00:02, 40.40it/s, lr=2.00e-06]


Epoch [14/800] test accuracy: 0.8850 time: 3.30


Sample: 100%|██████████| 100/100 [00:01, 57.26it/s, lr=2.00e-06]


Epoch [15/800] test accuracy: 0.8859 time: 3.15


Sample: 100%|██████████| 100/100 [00:01, 59.35it/s, lr=2.00e-06]


Epoch [16/800] test accuracy: 0.8862 time: 3.51


Sample: 100%|██████████| 100/100 [00:01, 63.08it/s, lr=2.00e-06]


Epoch [17/800] test accuracy: 0.8862 time: 3.68


Sample: 100%|██████████| 100/100 [00:01, 65.34it/s, lr=2.00e-06]


Epoch [18/800] test accuracy: 0.8875 time: 3.88


Sample: 100%|██████████| 100/100 [00:02, 48.72it/s, lr=2.00e-06]


Epoch [19/800] test accuracy: 0.8874 time: 3.52


Sample: 100%|██████████| 100/100 [00:03, 32.73it/s, lr=2.00e-06]


Epoch [20/800] test accuracy: 0.8882 time: 3.12


Sample: 100%|██████████| 100/100 [00:01, 60.56it/s, lr=2.00e-06]


Epoch [21/800] test accuracy: 0.8903 time: 3.34


Sample: 100%|██████████| 100/100 [00:02, 38.15it/s, lr=2.00e-06]


Epoch [22/800] test accuracy: 0.8920 time: 3.41


Sample: 100%|██████████| 100/100 [00:01, 61.07it/s, lr=2.00e-06]


Epoch [23/800] test accuracy: 0.8939 time: 3.05


Sample: 100%|██████████| 100/100 [00:01, 63.02it/s, lr=2.00e-06]


Epoch [24/800] test accuracy: 0.8958 time: 3.17


Sample: 100%|██████████| 100/100 [00:01, 60.91it/s, lr=2.00e-06]


Epoch [25/800] test accuracy: 0.8993 time: 3.02


Sample: 100%|██████████| 100/100 [00:01, 64.46it/s, lr=2.00e-06]


Epoch [26/800] test accuracy: 0.9017 time: 3.29


Sample: 100%|██████████| 100/100 [00:03, 27.05it/s, lr=2.00e-06]


Epoch [27/800] test accuracy: 0.9031 time: 4.25


Sample: 100%|██████████| 100/100 [00:02, 48.52it/s, lr=2.00e-06]


Epoch [28/800] test accuracy: 0.9053 time: 3.86


Sample: 100%|██████████| 100/100 [00:02, 38.79it/s, lr=2.00e-06]


Epoch [29/800] test accuracy: 0.9058 time: 3.10


Sample: 100%|██████████| 100/100 [00:03, 31.44it/s, lr=2.00e-06]


Epoch [30/800] test accuracy: 0.9074 time: 3.12


Sample: 100%|██████████| 100/100 [00:02, 49.20it/s, lr=2.00e-06]


Epoch [31/800] test accuracy: 0.9085 time: 3.24


Sample: 100%|██████████| 100/100 [00:01, 62.50it/s, lr=2.00e-06]


Epoch [32/800] test accuracy: 0.9084 time: 2.99


Sample: 100%|██████████| 100/100 [00:01, 56.09it/s, lr=2.00e-06]


Epoch [33/800] test accuracy: 0.9091 time: 3.21


Sample: 100%|██████████| 100/100 [00:01, 58.42it/s, lr=2.00e-06]


Epoch [34/800] test accuracy: 0.9097 time: 2.92


Sample: 100%|██████████| 100/100 [00:01, 58.86it/s, lr=2.00e-06]


Epoch [35/800] test accuracy: 0.9092 time: 3.29


Sample: 100%|██████████| 100/100 [00:01, 55.36it/s, lr=2.00e-06]


Epoch [36/800] test accuracy: 0.9092 time: 4.32


Sample: 100%|██████████| 100/100 [00:01, 58.45it/s, lr=2.00e-06]


Epoch [37/800] test accuracy: 0.9093 time: 2.86


Sample: 100%|██████████| 100/100 [00:01, 63.12it/s, lr=2.00e-06]


Epoch [38/800] test accuracy: 0.9090 time: 3.08


Sample: 100%|██████████| 100/100 [00:01, 60.70it/s, lr=2.00e-06]


Epoch [39/800] test accuracy: 0.9093 time: 3.35


Sample: 100%|██████████| 100/100 [00:01, 64.00it/s, lr=2.00e-06]


Epoch [40/800] test accuracy: 0.9094 time: 2.98


Sample: 100%|██████████| 100/100 [00:01, 56.84it/s, lr=2.00e-06]


Epoch [41/800] test accuracy: 0.9101 time: 3.20


Sample: 100%|██████████| 100/100 [00:01, 61.99it/s, lr=2.00e-06]


Epoch [42/800] test accuracy: 0.9103 time: 2.88


Sample: 100%|██████████| 100/100 [00:01, 60.31it/s, lr=2.00e-06]


Epoch [43/800] test accuracy: 0.9109 time: 3.12


Sample: 100%|██████████| 100/100 [00:01, 68.37it/s, lr=2.00e-06]


Epoch [44/800] test accuracy: 0.9111 time: 3.11


Sample: 100%|██████████| 100/100 [00:01, 66.23it/s, lr=2.00e-06]


Epoch [45/800] test accuracy: 0.9112 time: 3.63


Sample: 100%|██████████| 100/100 [00:03, 29.30it/s, lr=2.00e-06]


Epoch [46/800] test accuracy: 0.9109 time: 3.16


Sample: 100%|██████████| 100/100 [00:01, 76.91it/s, lr=2.00e-06]


Epoch [47/800] test accuracy: 0.9108 time: 3.73


Sample: 100%|██████████| 100/100 [00:01, 85.83it/s, lr=2.00e-06]


Epoch [48/800] test accuracy: 0.9113 time: 3.86


Sample: 100%|██████████| 100/100 [00:03, 31.04it/s, lr=2.00e-06]


Epoch [49/800] test accuracy: 0.9114 time: 3.17


Sample: 100%|██████████| 100/100 [00:02, 34.90it/s, lr=2.00e-06]


Epoch [50/800] test accuracy: 0.9123 time: 3.97


Sample: 100%|██████████| 100/100 [00:03, 30.34it/s, lr=2.00e-06]


Epoch [51/800] test accuracy: 0.9125 time: 3.83


Sample: 100%|██████████| 100/100 [00:01, 63.04it/s, lr=2.00e-06]


Epoch [52/800] test accuracy: 0.9122 time: 3.82


Sample: 100%|██████████| 100/100 [00:02, 37.36it/s, lr=2.00e-06]


Epoch [53/800] test accuracy: 0.9125 time: 4.03


Sample: 100%|██████████| 100/100 [00:01, 57.88it/s, lr=2.00e-06]


Epoch [54/800] test accuracy: 0.9134 time: 3.88


Sample: 100%|██████████| 100/100 [00:01, 65.36it/s, lr=2.00e-06]


Epoch [55/800] test accuracy: 0.9143 time: 3.94


Sample: 100%|██████████| 100/100 [00:01, 63.56it/s, lr=2.00e-06]


Epoch [56/800] test accuracy: 0.9142 time: 3.62


Sample: 100%|██████████| 100/100 [00:01, 60.02it/s, lr=2.00e-06]


Epoch [57/800] test accuracy: 0.9152 time: 3.10


Sample: 100%|██████████| 100/100 [00:02, 47.67it/s, lr=2.00e-06]


Epoch [58/800] test accuracy: 0.9153 time: 3.84


Sample: 100%|██████████| 100/100 [00:01, 71.40it/s, lr=2.00e-06]


Epoch [59/800] test accuracy: 0.9157 time: 2.90


Sample: 100%|██████████| 100/100 [00:01, 64.58it/s, lr=2.00e-06]


Epoch [60/800] test accuracy: 0.9160 time: 3.55


Sample: 100%|██████████| 100/100 [00:02, 41.70it/s, lr=2.00e-06]


Epoch [61/800] test accuracy: 0.9170 time: 3.88


Sample: 100%|██████████| 100/100 [00:01, 64.47it/s, lr=2.00e-06]


Epoch [62/800] test accuracy: 0.9178 time: 3.07


Sample: 100%|██████████| 100/100 [00:01, 69.35it/s, lr=2.00e-06]


Epoch [63/800] test accuracy: 0.9181 time: 3.15


Sample: 100%|██████████| 100/100 [00:01, 63.45it/s, lr=2.00e-06]


Epoch [64/800] test accuracy: 0.9184 time: 3.13


Sample: 100%|██████████| 100/100 [00:01, 56.87it/s, lr=2.00e-06]


Epoch [65/800] test accuracy: 0.9185 time: 2.78


Sample: 100%|██████████| 100/100 [00:01, 75.29it/s, lr=2.00e-06]


Epoch [66/800] test accuracy: 0.9192 time: 2.98


Sample: 100%|██████████| 100/100 [00:01, 60.56it/s, lr=2.00e-06]


Epoch [67/800] test accuracy: 0.9195 time: 2.88


Sample: 100%|██████████| 100/100 [00:01, 53.99it/s, lr=2.00e-06]


Epoch [68/800] test accuracy: 0.9197 time: 3.27


Sample: 100%|██████████| 100/100 [00:03, 32.16it/s, lr=2.00e-06]


Epoch [69/800] test accuracy: 0.9198 time: 3.63


Sample: 100%|██████████| 100/100 [00:03, 30.79it/s, lr=2.00e-06]


Epoch [70/800] test accuracy: 0.9206 time: 3.17


Sample: 100%|██████████| 100/100 [00:01, 69.59it/s, lr=2.00e-06]


Epoch [71/800] test accuracy: 0.9206 time: 3.60


Sample: 100%|██████████| 100/100 [00:01, 59.06it/s, lr=2.00e-06]


Epoch [72/800] test accuracy: 0.9215 time: 2.93


Sample: 100%|██████████| 100/100 [00:01, 68.39it/s, lr=2.00e-06]


Epoch [73/800] test accuracy: 0.9216 time: 2.90


Sample: 100%|██████████| 100/100 [00:01, 66.91it/s, lr=2.00e-06]


Epoch [74/800] test accuracy: 0.9220 time: 3.24


Sample: 100%|██████████| 100/100 [00:01, 63.82it/s, lr=2.00e-06]


Epoch [75/800] test accuracy: 0.9223 time: 3.33


Sample: 100%|██████████| 100/100 [00:01, 64.64it/s, lr=2.00e-06]


Epoch [76/800] test accuracy: 0.9223 time: 3.01


Sample: 100%|██████████| 100/100 [00:01, 62.47it/s, lr=2.00e-06]


Epoch [77/800] test accuracy: 0.9227 time: 3.09


Sample: 100%|██████████| 100/100 [00:01, 54.51it/s, lr=2.00e-06]


Epoch [78/800] test accuracy: 0.9226 time: 3.01


Sample: 100%|██████████| 100/100 [00:01, 77.49it/s, lr=2.00e-06]


Epoch [79/800] test accuracy: 0.9226 time: 3.30


Sample: 100%|██████████| 100/100 [00:01, 63.22it/s, lr=2.00e-06]


Epoch [80/800] test accuracy: 0.9231 time: 3.19


Sample: 100%|██████████| 100/100 [00:01, 66.00it/s, lr=2.00e-06]


Epoch [81/800] test accuracy: 0.9232 time: 2.99


Sample: 100%|██████████| 100/100 [00:01, 63.93it/s, lr=2.00e-06]


Epoch [82/800] test accuracy: 0.9235 time: 3.49


Sample: 100%|██████████| 100/100 [00:01, 57.42it/s, lr=2.00e-06]


Epoch [83/800] test accuracy: 0.9238 time: 2.90


Sample: 100%|██████████| 100/100 [00:01, 57.02it/s, lr=2.00e-06]


Epoch [84/800] test accuracy: 0.9241 time: 3.87


Sample: 100%|██████████| 100/100 [00:01, 58.77it/s, lr=2.00e-06]


Epoch [85/800] test accuracy: 0.9243 time: 3.53


Sample: 100%|██████████| 100/100 [00:01, 61.76it/s, lr=2.00e-06]


Epoch [86/800] test accuracy: 0.9242 time: 3.53


Sample: 100%|██████████| 100/100 [00:01, 61.76it/s, lr=2.00e-06]


Epoch [87/800] test accuracy: 0.9245 time: 2.94


Sample: 100%|██████████| 100/100 [00:01, 57.90it/s, lr=2.00e-06]


Epoch [88/800] test accuracy: 0.9244 time: 3.20


Sample: 100%|██████████| 100/100 [00:01, 69.33it/s, lr=2.00e-06]


Epoch [89/800] test accuracy: 0.9242 time: 2.90


Sample: 100%|██████████| 100/100 [00:01, 62.42it/s, lr=2.00e-06]


Epoch [90/800] test accuracy: 0.9246 time: 3.19


Sample: 100%|██████████| 100/100 [00:01, 57.66it/s, lr=2.00e-06]


Epoch [91/800] test accuracy: 0.9249 time: 3.27


Sample: 100%|██████████| 100/100 [00:01, 55.70it/s, lr=2.00e-06]


Epoch [92/800] test accuracy: 0.9249 time: 2.83


Sample: 100%|██████████| 100/100 [00:01, 87.34it/s, lr=2.00e-06]


Epoch [93/800] test accuracy: 0.9249 time: 3.87


Sample: 100%|██████████| 100/100 [00:03, 26.32it/s, lr=2.00e-06]


Epoch [94/800] test accuracy: 0.9248 time: 3.69


Sample: 100%|██████████| 100/100 [00:02, 36.33it/s, lr=2.00e-06]


Epoch [95/800] test accuracy: 0.9249 time: 3.90


Sample: 100%|██████████| 100/100 [00:03, 25.79it/s, lr=2.00e-06]


Epoch [96/800] test accuracy: 0.9249 time: 3.44


Sample: 100%|██████████| 100/100 [00:02, 46.97it/s, lr=2.00e-06]


Epoch [97/800] test accuracy: 0.9248 time: 3.30


Sample: 100%|██████████| 100/100 [00:01, 66.20it/s, lr=2.00e-06]


Epoch [98/800] test accuracy: 0.9245 time: 3.60


Sample: 100%|██████████| 100/100 [00:01, 71.96it/s, lr=2.00e-06]


Epoch [99/800] test accuracy: 0.9244 time: 3.06


Sample: 100%|██████████| 100/100 [00:01, 59.76it/s, lr=2.00e-06]


Epoch [100/800] test accuracy: 0.9246 time: 2.82


Sample: 100%|██████████| 100/100 [00:01, 59.62it/s, lr=2.00e-06]


Epoch [101/800] test accuracy: 0.9251 time: 2.74


Sample: 100%|██████████| 100/100 [00:01, 63.34it/s, lr=2.00e-06]


Epoch [102/800] test accuracy: 0.9247 time: 3.11


Sample: 100%|██████████| 100/100 [00:01, 55.92it/s, lr=2.00e-06]


Epoch [103/800] test accuracy: 0.9248 time: 3.23


Sample: 100%|██████████| 100/100 [00:01, 84.94it/s, lr=2.00e-06]


Epoch [104/800] test accuracy: 0.9249 time: 3.47


Sample: 100%|██████████| 100/100 [00:01, 52.16it/s, lr=2.00e-06]


Epoch [105/800] test accuracy: 0.9248 time: 3.58


Sample: 100%|██████████| 100/100 [00:01, 58.79it/s, lr=2.00e-06]


Epoch [106/800] test accuracy: 0.9251 time: 3.00


Sample: 100%|██████████| 100/100 [00:01, 73.38it/s, lr=2.00e-06]


Epoch [107/800] test accuracy: 0.9254 time: 3.45


Sample: 100%|██████████| 100/100 [00:01, 63.41it/s, lr=2.00e-06]


Epoch [108/800] test accuracy: 0.9258 time: 3.11


Sample: 100%|██████████| 100/100 [00:03, 26.73it/s, lr=2.00e-06]


Epoch [109/800] test accuracy: 0.9261 time: 3.48


Sample: 100%|██████████| 100/100 [00:01, 62.22it/s, lr=2.00e-06]


Epoch [110/800] test accuracy: 0.9263 time: 3.09


Sample: 100%|██████████| 100/100 [00:01, 56.67it/s, lr=2.00e-06]


Epoch [111/800] test accuracy: 0.9269 time: 3.52


Sample: 100%|██████████| 100/100 [00:01, 57.89it/s, lr=2.00e-06]


Epoch [112/800] test accuracy: 0.9269 time: 2.95


Sample: 100%|██████████| 100/100 [00:01, 57.06it/s, lr=2.00e-06]


Epoch [113/800] test accuracy: 0.9265 time: 3.41


Sample: 100%|██████████| 100/100 [00:02, 48.88it/s, lr=2.00e-06]


Epoch [114/800] test accuracy: 0.9266 time: 3.96


Sample: 100%|██████████| 100/100 [00:02, 49.69it/s, lr=2.00e-06]


Epoch [115/800] test accuracy: 0.9270 time: 3.36


Sample: 100%|██████████| 100/100 [00:02, 42.48it/s, lr=2.00e-06]


Epoch [116/800] test accuracy: 0.9276 time: 4.32


Sample: 100%|██████████| 100/100 [00:02, 41.96it/s, lr=2.00e-06]


Epoch [117/800] test accuracy: 0.9274 time: 3.21


Sample: 100%|██████████| 100/100 [00:01, 60.34it/s, lr=2.00e-06]


Epoch [118/800] test accuracy: 0.9280 time: 3.60


Sample: 100%|██████████| 100/100 [00:01, 52.85it/s, lr=2.00e-06]


Epoch [119/800] test accuracy: 0.9286 time: 3.25


Sample: 100%|██████████| 100/100 [00:01, 73.05it/s, lr=2.00e-06]


Epoch [120/800] test accuracy: 0.9287 time: 3.45


Sample: 100%|██████████| 100/100 [00:02, 39.10it/s, lr=2.00e-06]


Epoch [121/800] test accuracy: 0.9287 time: 3.78


Sample: 100%|██████████| 100/100 [00:01, 55.22it/s, lr=2.00e-06]


Epoch [122/800] test accuracy: 0.9289 time: 3.01


Sample: 100%|██████████| 100/100 [00:01, 53.58it/s, lr=2.00e-06]


Epoch [123/800] test accuracy: 0.9292 time: 3.43


Sample: 100%|██████████| 100/100 [00:01, 64.26it/s, lr=2.00e-06]


Epoch [124/800] test accuracy: 0.9293 time: 2.87


Sample: 100%|██████████| 100/100 [00:01, 52.74it/s, lr=2.00e-06]


Epoch [125/800] test accuracy: 0.9300 time: 3.01


Sample: 100%|██████████| 100/100 [00:01, 57.95it/s, lr=2.00e-06]


Epoch [126/800] test accuracy: 0.9298 time: 2.87


Sample: 100%|██████████| 100/100 [00:01, 55.53it/s, lr=2.00e-06]


Epoch [127/800] test accuracy: 0.9301 time: 3.10


Sample: 100%|██████████| 100/100 [00:01, 51.34it/s, lr=2.00e-06]


Epoch [128/800] test accuracy: 0.9300 time: 3.49


Sample: 100%|██████████| 100/100 [00:01, 56.57it/s, lr=2.00e-06]


Epoch [129/800] test accuracy: 0.9301 time: 3.41


Sample: 100%|██████████| 100/100 [00:01, 52.38it/s, lr=2.00e-06]


Epoch [130/800] test accuracy: 0.9302 time: 4.07


Sample: 100%|██████████| 100/100 [00:01, 64.86it/s, lr=2.00e-06]


Epoch [131/800] test accuracy: 0.9303 time: 3.67


Sample: 100%|██████████| 100/100 [00:01, 57.99it/s, lr=2.00e-06]


Epoch [132/800] test accuracy: 0.9304 time: 3.32


Sample: 100%|██████████| 100/100 [00:02, 40.19it/s, lr=2.00e-06]


Epoch [133/800] test accuracy: 0.9303 time: 3.18


Sample: 100%|██████████| 100/100 [00:01, 68.34it/s, lr=2.00e-06]


Epoch [134/800] test accuracy: 0.9302 time: 3.07


Sample: 100%|██████████| 100/100 [00:01, 67.62it/s, lr=2.00e-06]


Epoch [135/800] test accuracy: 0.9304 time: 3.44


Sample: 100%|██████████| 100/100 [00:01, 50.30it/s, lr=2.00e-06]


Epoch [136/800] test accuracy: 0.9303 time: 3.53


Sample: 100%|██████████| 100/100 [00:01, 61.98it/s, lr=2.00e-06]


Epoch [137/800] test accuracy: 0.9302 time: 4.21


Sample: 100%|██████████| 100/100 [00:02, 40.81it/s, lr=2.00e-06]


Epoch [138/800] test accuracy: 0.9303 time: 6.48


Sample: 100%|██████████| 100/100 [00:01, 60.07it/s, lr=2.00e-06]


Epoch [139/800] test accuracy: 0.9304 time: 3.22


Sample: 100%|██████████| 100/100 [00:01, 52.80it/s, lr=2.00e-06]


Epoch [140/800] test accuracy: 0.9306 time: 3.07


Sample: 100%|██████████| 100/100 [00:01, 65.63it/s, lr=2.00e-06]


Epoch [141/800] test accuracy: 0.9308 time: 3.69


Sample: 100%|██████████| 100/100 [00:01, 60.82it/s, lr=2.00e-06]


Epoch [142/800] test accuracy: 0.9311 time: 3.39


Sample: 100%|██████████| 100/100 [00:02, 46.63it/s, lr=2.00e-06]


Epoch [143/800] test accuracy: 0.9314 time: 3.32


Sample: 100%|██████████| 100/100 [00:01, 52.30it/s, lr=2.00e-06]


Epoch [144/800] test accuracy: 0.9313 time: 3.47


Sample: 100%|██████████| 100/100 [00:01, 57.87it/s, lr=2.00e-06]


Epoch [145/800] test accuracy: 0.9314 time: 3.05


Sample: 100%|██████████| 100/100 [00:01, 71.63it/s, lr=2.00e-06]


Epoch [146/800] test accuracy: 0.9315 time: 3.50


Sample: 100%|██████████| 100/100 [00:01, 61.51it/s, lr=2.00e-06]


Epoch [147/800] test accuracy: 0.9314 time: 3.48


Sample: 100%|██████████| 100/100 [00:01, 70.76it/s, lr=2.00e-06]


Epoch [148/800] test accuracy: 0.9314 time: 2.75


Sample: 100%|██████████| 100/100 [00:01, 88.00it/s, lr=2.00e-06]


Epoch [149/800] test accuracy: 0.9315 time: 3.54


Sample: 100%|██████████| 100/100 [00:01, 75.61it/s, lr=2.00e-06]


Epoch [150/800] test accuracy: 0.9316 time: 4.01


Sample: 100%|██████████| 100/100 [00:01, 61.52it/s, lr=2.00e-06]


Epoch [151/800] test accuracy: 0.9318 time: 3.57


Sample: 100%|██████████| 100/100 [00:01, 65.88it/s, lr=2.00e-06]


Epoch [152/800] test accuracy: 0.9319 time: 2.94


Sample: 100%|██████████| 100/100 [00:01, 52.94it/s, lr=2.00e-06]


Epoch [153/800] test accuracy: 0.9322 time: 3.20


Sample: 100%|██████████| 100/100 [00:01, 69.53it/s, lr=2.00e-06]


Epoch [154/800] test accuracy: 0.9320 time: 3.53


Sample: 100%|██████████| 100/100 [00:01, 62.54it/s, lr=2.00e-06]


Epoch [155/800] test accuracy: 0.9320 time: 3.01


Sample: 100%|██████████| 100/100 [00:01, 62.94it/s, lr=2.00e-06]


Epoch [156/800] test accuracy: 0.9322 time: 3.56


Sample: 100%|██████████| 100/100 [00:01, 67.67it/s, lr=2.00e-06]


Epoch [157/800] test accuracy: 0.9324 time: 3.68


Sample: 100%|██████████| 100/100 [00:01, 63.95it/s, lr=2.00e-06]


Epoch [158/800] test accuracy: 0.9324 time: 3.29


Sample: 100%|██████████| 100/100 [00:01, 60.88it/s, lr=2.00e-06]


Epoch [159/800] test accuracy: 0.9325 time: 3.91


Sample: 100%|██████████| 100/100 [00:01, 60.71it/s, lr=2.00e-06]


Epoch [160/800] test accuracy: 0.9326 time: 3.10


Sample: 100%|██████████| 100/100 [00:01, 74.92it/s, lr=2.00e-06]


Epoch [161/800] test accuracy: 0.9327 time: 3.25


Sample: 100%|██████████| 100/100 [00:01, 87.97it/s, lr=2.00e-06]


Epoch [162/800] test accuracy: 0.9322 time: 2.96


Sample: 100%|██████████| 100/100 [00:01, 79.12it/s, lr=2.00e-06]


Epoch [163/800] test accuracy: 0.9320 time: 2.78


Sample: 100%|██████████| 100/100 [00:01, 64.75it/s, lr=2.00e-06]


Epoch [164/800] test accuracy: 0.9323 time: 3.29


Sample: 100%|██████████| 100/100 [00:01, 76.76it/s, lr=2.00e-06]


Epoch [165/800] test accuracy: 0.9325 time: 2.94


Sample: 100%|██████████| 100/100 [00:01, 73.37it/s, lr=2.00e-06]


Epoch [166/800] test accuracy: 0.9327 time: 2.80


Sample: 100%|██████████| 100/100 [00:01, 61.17it/s, lr=2.00e-06]


Epoch [167/800] test accuracy: 0.9329 time: 3.04


Sample: 100%|██████████| 100/100 [00:01, 84.46it/s, lr=2.00e-06]


Epoch [168/800] test accuracy: 0.9328 time: 3.31


Sample: 100%|██████████| 100/100 [00:01, 65.23it/s, lr=2.00e-06]


Epoch [169/800] test accuracy: 0.9327 time: 3.39


Sample: 100%|██████████| 100/100 [00:01, 88.33it/s, lr=2.00e-06]


Epoch [170/800] test accuracy: 0.9331 time: 2.83


Sample: 100%|██████████| 100/100 [00:01, 88.58it/s, lr=2.00e-06]


Epoch [171/800] test accuracy: 0.9334 time: 3.37


Sample: 100%|██████████| 100/100 [00:01, 75.97it/s, lr=2.00e-06]


Epoch [172/800] test accuracy: 0.9334 time: 3.59


Sample: 100%|██████████| 100/100 [00:01, 77.89it/s, lr=2.00e-06]


Epoch [173/800] test accuracy: 0.9333 time: 2.94


Sample: 100%|██████████| 100/100 [00:01, 70.18it/s, lr=2.00e-06]


Epoch [174/800] test accuracy: 0.9332 time: 2.89


Sample: 100%|██████████| 100/100 [00:01, 74.34it/s, lr=2.00e-06]


Epoch [175/800] test accuracy: 0.9336 time: 3.12


Sample: 100%|██████████| 100/100 [00:01, 51.63it/s, lr=2.00e-06]


Epoch [176/800] test accuracy: 0.9339 time: 2.84


Sample: 100%|██████████| 100/100 [00:01, 68.91it/s, lr=2.00e-06]


Epoch [177/800] test accuracy: 0.9341 time: 3.08


Sample: 100%|██████████| 100/100 [00:01, 66.31it/s, lr=2.00e-06]


Epoch [178/800] test accuracy: 0.9341 time: 3.07


Sample: 100%|██████████| 100/100 [00:01, 69.92it/s, lr=2.00e-06]


Epoch [179/800] test accuracy: 0.9343 time: 2.86


Sample: 100%|██████████| 100/100 [00:01, 74.77it/s, lr=2.00e-06]


Epoch [180/800] test accuracy: 0.9344 time: 2.98


Sample: 100%|██████████| 100/100 [00:01, 73.00it/s, lr=2.00e-06]


Epoch [181/800] test accuracy: 0.9344 time: 3.38


Sample: 100%|██████████| 100/100 [00:01, 51.88it/s, lr=2.00e-06]


Epoch [182/800] test accuracy: 0.9346 time: 3.51


Sample: 100%|██████████| 100/100 [00:02, 33.76it/s, lr=2.00e-06]


Epoch [183/800] test accuracy: 0.9346 time: 3.71


Sample: 100%|██████████| 100/100 [00:02, 49.45it/s, lr=2.00e-06]


Epoch [184/800] test accuracy: 0.9346 time: 3.23


Sample: 100%|██████████| 100/100 [00:01, 59.22it/s, lr=2.00e-06]


Epoch [185/800] test accuracy: 0.9347 time: 3.35


Sample: 100%|██████████| 100/100 [00:01, 65.35it/s, lr=2.00e-06]


Epoch [186/800] test accuracy: 0.9349 time: 3.41


Sample: 100%|██████████| 100/100 [00:01, 52.29it/s, lr=2.00e-06]


Epoch [187/800] test accuracy: 0.9349 time: 3.14


Sample: 100%|██████████| 100/100 [00:02, 40.58it/s, lr=2.00e-06]


Epoch [188/800] test accuracy: 0.9351 time: 3.74


Sample: 100%|██████████| 100/100 [00:02, 47.10it/s, lr=2.00e-06]


Epoch [189/800] test accuracy: 0.9351 time: 3.22


Sample: 100%|██████████| 100/100 [00:01, 70.29it/s, lr=2.00e-06]


Epoch [190/800] test accuracy: 0.9351 time: 3.03


Sample: 100%|██████████| 100/100 [00:01, 61.90it/s, lr=2.00e-06]


Epoch [191/800] test accuracy: 0.9352 time: 2.86


Sample: 100%|██████████| 100/100 [00:01, 79.28it/s, lr=2.00e-06]


Epoch [192/800] test accuracy: 0.9352 time: 3.70


Sample: 100%|██████████| 100/100 [00:01, 64.66it/s, lr=2.00e-06]


Epoch [193/800] test accuracy: 0.9353 time: 3.41


Sample: 100%|██████████| 100/100 [00:03, 33.09it/s, lr=2.00e-06]


Epoch [194/800] test accuracy: 0.9355 time: 3.57


Sample: 100%|██████████| 100/100 [00:01, 67.92it/s, lr=2.00e-06]


Epoch [195/800] test accuracy: 0.9355 time: 3.10


Sample: 100%|██████████| 100/100 [00:01, 69.75it/s, lr=2.00e-06]


Epoch [196/800] test accuracy: 0.9356 time: 2.88


Sample: 100%|██████████| 100/100 [00:01, 71.95it/s, lr=2.00e-06]


Epoch [197/800] test accuracy: 0.9356 time: 3.34


Sample: 100%|██████████| 100/100 [00:01, 56.76it/s, lr=2.00e-06]


Epoch [198/800] test accuracy: 0.9357 time: 3.71


Sample: 100%|██████████| 100/100 [00:01, 69.26it/s, lr=2.00e-06]


Epoch [199/800] test accuracy: 0.9358 time: 3.01


Sample: 100%|██████████| 100/100 [00:01, 65.39it/s, lr=2.00e-06]


Epoch [200/800] test accuracy: 0.9359 time: 2.96


Sample: 100%|██████████| 100/100 [00:01, 68.14it/s, lr=2.00e-06]


Epoch [201/800] test accuracy: 0.9360 time: 2.91


Sample: 100%|██████████| 100/100 [00:01, 68.72it/s, lr=2.00e-06]


Epoch [202/800] test accuracy: 0.9359 time: 3.13


Sample: 100%|██████████| 100/100 [00:01, 68.90it/s, lr=2.00e-06]


Epoch [203/800] test accuracy: 0.9361 time: 2.92


Sample: 100%|██████████| 100/100 [00:01, 68.96it/s, lr=2.00e-06]


Epoch [204/800] test accuracy: 0.9363 time: 3.06


Sample: 100%|██████████| 100/100 [00:01, 80.26it/s, lr=2.00e-06]


Epoch [205/800] test accuracy: 0.9365 time: 3.04


Sample: 100%|██████████| 100/100 [00:01, 65.89it/s, lr=2.00e-06]


Epoch [206/800] test accuracy: 0.9365 time: 2.81


Sample: 100%|██████████| 100/100 [00:02, 39.29it/s, lr=2.00e-06]


Epoch [207/800] test accuracy: 0.9365 time: 3.60


Sample: 100%|██████████| 100/100 [00:01, 67.49it/s, lr=2.00e-06]


Epoch [208/800] test accuracy: 0.9366 time: 2.89


Sample: 100%|██████████| 100/100 [00:01, 64.49it/s, lr=2.00e-06]


Epoch [209/800] test accuracy: 0.9363 time: 2.88


Sample: 100%|██████████| 100/100 [00:01, 80.92it/s, lr=2.00e-06]


Epoch [210/800] test accuracy: 0.9364 time: 2.94


Sample: 100%|██████████| 100/100 [00:01, 84.79it/s, lr=2.00e-06]


Epoch [211/800] test accuracy: 0.9364 time: 2.81


Sample: 100%|██████████| 100/100 [00:01, 71.69it/s, lr=2.00e-06]


Epoch [212/800] test accuracy: 0.9365 time: 2.95


Sample: 100%|██████████| 100/100 [00:01, 67.71it/s, lr=2.00e-06]


Epoch [213/800] test accuracy: 0.9367 time: 2.83


Sample: 100%|██████████| 100/100 [00:01, 76.49it/s, lr=2.00e-06]


Epoch [214/800] test accuracy: 0.9369 time: 2.86


Sample: 100%|██████████| 100/100 [00:01, 66.07it/s, lr=2.00e-06]


Epoch [215/800] test accuracy: 0.9370 time: 2.90


Sample: 100%|██████████| 100/100 [00:01, 65.66it/s, lr=2.00e-06]


Epoch [216/800] test accuracy: 0.9369 time: 3.71


Sample: 100%|██████████| 100/100 [00:01, 90.27it/s, lr=2.00e-06]


Epoch [217/800] test accuracy: 0.9371 time: 3.37


Sample: 100%|██████████| 100/100 [00:01, 76.52it/s, lr=2.00e-06]


Epoch [218/800] test accuracy: 0.9372 time: 2.77


Sample: 100%|██████████| 100/100 [00:01, 68.86it/s, lr=2.00e-06]


Epoch [219/800] test accuracy: 0.9371 time: 3.05


Sample: 100%|██████████| 100/100 [00:01, 61.52it/s, lr=2.00e-06]


Epoch [220/800] test accuracy: 0.9373 time: 2.82


Sample: 100%|██████████| 100/100 [00:01, 78.15it/s, lr=2.00e-06]


Epoch [221/800] test accuracy: 0.9373 time: 2.84


Sample: 100%|██████████| 100/100 [00:01, 67.44it/s, lr=2.00e-06]


Epoch [222/800] test accuracy: 0.9373 time: 2.87


Sample: 100%|██████████| 100/100 [00:01, 76.04it/s, lr=2.00e-06]


Epoch [223/800] test accuracy: 0.9375 time: 2.89


Sample: 100%|██████████| 100/100 [00:01, 58.96it/s, lr=2.00e-06]


Epoch [224/800] test accuracy: 0.9375 time: 3.40


Sample: 100%|██████████| 100/100 [00:01, 64.61it/s, lr=2.00e-06]


Epoch [225/800] test accuracy: 0.9377 time: 2.93


Sample: 100%|██████████| 100/100 [00:03, 32.05it/s, lr=2.00e-06]


Epoch [226/800] test accuracy: 0.9380 time: 3.36


Sample: 100%|██████████| 100/100 [00:01, 58.57it/s, lr=2.00e-06]


Epoch [227/800] test accuracy: 0.9382 time: 3.38


Sample: 100%|██████████| 100/100 [00:01, 68.57it/s, lr=2.00e-06]


Epoch [228/800] test accuracy: 0.9383 time: 3.37


Sample: 100%|██████████| 100/100 [00:01, 52.07it/s, lr=2.00e-06]


Epoch [229/800] test accuracy: 0.9382 time: 3.09


Sample: 100%|██████████| 100/100 [00:01, 63.81it/s, lr=2.00e-06]


Epoch [230/800] test accuracy: 0.9384 time: 3.00


Sample: 100%|██████████| 100/100 [00:01, 53.97it/s, lr=2.00e-06]


Epoch [231/800] test accuracy: 0.9383 time: 2.93


Sample: 100%|██████████| 100/100 [00:01, 61.98it/s, lr=2.00e-06]


Epoch [232/800] test accuracy: 0.9383 time: 3.21


Sample: 100%|██████████| 100/100 [00:01, 63.66it/s, lr=2.00e-06]


Epoch [233/800] test accuracy: 0.9383 time: 3.55


Sample: 100%|██████████| 100/100 [00:01, 66.63it/s, lr=2.00e-06]


Epoch [234/800] test accuracy: 0.9384 time: 4.48


Sample: 100%|██████████| 100/100 [00:01, 69.47it/s, lr=2.00e-06]


Epoch [235/800] test accuracy: 0.9387 time: 3.09


Sample: 100%|██████████| 100/100 [00:01, 69.47it/s, lr=2.00e-06]


Epoch [236/800] test accuracy: 0.9389 time: 2.78


Sample: 100%|██████████| 100/100 [00:01, 64.42it/s, lr=2.00e-06]


Epoch [237/800] test accuracy: 0.9390 time: 2.94


Sample: 100%|██████████| 100/100 [00:01, 64.05it/s, lr=2.00e-06]


Epoch [238/800] test accuracy: 0.9391 time: 2.84


Sample: 100%|██████████| 100/100 [00:01, 59.86it/s, lr=2.00e-06]


Epoch [239/800] test accuracy: 0.9390 time: 3.16


Sample: 100%|██████████| 100/100 [00:01, 84.79it/s, lr=2.00e-06]


Epoch [240/800] test accuracy: 0.9390 time: 2.92


Sample: 100%|██████████| 100/100 [00:01, 67.38it/s, lr=2.00e-06]


Epoch [241/800] test accuracy: 0.9390 time: 2.97


Sample: 100%|██████████| 100/100 [00:01, 67.90it/s, lr=2.00e-06]


Epoch [242/800] test accuracy: 0.9390 time: 2.80


Sample: 100%|██████████| 100/100 [00:01, 68.36it/s, lr=2.00e-06]


Epoch [243/800] test accuracy: 0.9390 time: 3.06


Sample: 100%|██████████| 100/100 [00:01, 80.59it/s, lr=2.00e-06]


Epoch [244/800] test accuracy: 0.9391 time: 3.24


Sample: 100%|██████████| 100/100 [00:01, 88.56it/s, lr=2.00e-06]


Epoch [245/800] test accuracy: 0.9391 time: 2.80


Sample: 100%|██████████| 100/100 [00:01, 60.92it/s, lr=2.00e-06]


Epoch [246/800] test accuracy: 0.9391 time: 2.85


Sample: 100%|██████████| 100/100 [00:01, 69.63it/s, lr=2.00e-06]


Epoch [247/800] test accuracy: 0.9392 time: 3.12


Sample: 100%|██████████| 100/100 [00:01, 53.05it/s, lr=2.00e-06]


Epoch [248/800] test accuracy: 0.9393 time: 3.43


Sample: 100%|██████████| 100/100 [00:01, 85.14it/s, lr=2.00e-06]


Epoch [249/800] test accuracy: 0.9393 time: 2.88


Sample: 100%|██████████| 100/100 [00:01, 86.74it/s, lr=2.00e-06]


Epoch [250/800] test accuracy: 0.9392 time: 2.85


Sample: 100%|██████████| 100/100 [00:01, 71.92it/s, lr=2.00e-06]


Epoch [251/800] test accuracy: 0.9390 time: 3.38


Sample: 100%|██████████| 100/100 [00:01, 64.67it/s, lr=2.00e-06]


Epoch [252/800] test accuracy: 0.9389 time: 3.25


Sample: 100%|██████████| 100/100 [00:01, 67.11it/s, lr=2.00e-06]


Epoch [253/800] test accuracy: 0.9389 time: 2.84


Sample: 100%|██████████| 100/100 [00:01, 68.11it/s, lr=2.00e-06]


Epoch [254/800] test accuracy: 0.9387 time: 3.62


Sample: 100%|██████████| 100/100 [00:01, 65.85it/s, lr=2.00e-06]


Epoch [255/800] test accuracy: 0.9390 time: 3.10


Sample: 100%|██████████| 100/100 [00:01, 63.24it/s, lr=2.00e-06]


Epoch [256/800] test accuracy: 0.9390 time: 2.79


Sample: 100%|██████████| 100/100 [00:01, 89.40it/s, lr=2.00e-06]


Epoch [257/800] test accuracy: 0.9390 time: 2.74


Sample: 100%|██████████| 100/100 [00:01, 85.08it/s, lr=2.00e-06]


Epoch [258/800] test accuracy: 0.9391 time: 2.76


Sample: 100%|██████████| 100/100 [00:01, 65.95it/s, lr=2.00e-06]


Epoch [259/800] test accuracy: 0.9390 time: 2.89


Sample: 100%|██████████| 100/100 [00:01, 78.00it/s, lr=2.00e-06]


Epoch [260/800] test accuracy: 0.9392 time: 2.80


Sample: 100%|██████████| 100/100 [00:01, 71.55it/s, lr=2.00e-06]


Epoch [261/800] test accuracy: 0.9394 time: 2.89


Sample: 100%|██████████| 100/100 [00:01, 69.52it/s, lr=2.00e-06]


Epoch [262/800] test accuracy: 0.9394 time: 3.84


Sample: 100%|██████████| 100/100 [00:01, 68.04it/s, lr=2.00e-06]


Epoch [263/800] test accuracy: 0.9394 time: 2.78


Sample: 100%|██████████| 100/100 [00:01, 68.81it/s, lr=2.00e-06]


Epoch [264/800] test accuracy: 0.9396 time: 3.23


Sample: 100%|██████████| 100/100 [00:01, 66.61it/s, lr=2.00e-06]


Epoch [265/800] test accuracy: 0.9398 time: 2.89


Sample: 100%|██████████| 100/100 [00:01, 88.98it/s, lr=2.00e-06]


Epoch [266/800] test accuracy: 0.9399 time: 2.81


Sample: 100%|██████████| 100/100 [00:01, 73.54it/s, lr=2.00e-06]


Epoch [267/800] test accuracy: 0.9398 time: 3.56


Sample: 100%|██████████| 100/100 [00:01, 74.19it/s, lr=2.00e-06]


Epoch [268/800] test accuracy: 0.9398 time: 3.02


Sample: 100%|██████████| 100/100 [00:01, 74.23it/s, lr=2.00e-06]


Epoch [269/800] test accuracy: 0.9399 time: 3.70


Sample: 100%|██████████| 100/100 [00:01, 60.15it/s, lr=2.00e-06]


Epoch [270/800] test accuracy: 0.9400 time: 3.06


Sample: 100%|██████████| 100/100 [00:01, 68.54it/s, lr=2.00e-06]


Epoch [271/800] test accuracy: 0.9401 time: 3.53


Sample: 100%|██████████| 100/100 [00:01, 68.83it/s, lr=2.00e-06]


Epoch [272/800] test accuracy: 0.9404 time: 3.04


Sample: 100%|██████████| 100/100 [00:01, 61.27it/s, lr=2.00e-06]


Epoch [273/800] test accuracy: 0.9403 time: 2.97


Sample: 100%|██████████| 100/100 [00:01, 78.09it/s, lr=2.00e-06]


Epoch [274/800] test accuracy: 0.9404 time: 3.04


Sample: 100%|██████████| 100/100 [00:01, 70.06it/s, lr=2.00e-06]


Epoch [275/800] test accuracy: 0.9403 time: 3.30


Sample: 100%|██████████| 100/100 [00:01, 89.34it/s, lr=2.00e-06]


Epoch [276/800] test accuracy: 0.9406 time: 2.83


Sample: 100%|██████████| 100/100 [00:01, 57.93it/s, lr=2.00e-06]


Epoch [277/800] test accuracy: 0.9408 time: 3.41


Sample: 100%|██████████| 100/100 [00:01, 75.49it/s, lr=2.00e-06]


Epoch [278/800] test accuracy: 0.9409 time: 4.45


Sample: 100%|██████████| 100/100 [00:02, 42.38it/s, lr=2.00e-06]


Epoch [279/800] test accuracy: 0.9411 time: 4.56


Sample: 100%|██████████| 100/100 [00:01, 62.71it/s, lr=2.00e-06]


Epoch [280/800] test accuracy: 0.9411 time: 3.81


Sample: 100%|██████████| 100/100 [00:01, 60.00it/s, lr=2.00e-06]


Epoch [281/800] test accuracy: 0.9411 time: 3.00


Sample: 100%|██████████| 100/100 [00:01, 66.20it/s, lr=2.00e-06]


Epoch [282/800] test accuracy: 0.9411 time: 3.05


Sample: 100%|██████████| 100/100 [00:01, 71.73it/s, lr=2.00e-06]


Epoch [283/800] test accuracy: 0.9410 time: 2.96


Sample: 100%|██████████| 100/100 [00:02, 43.58it/s, lr=2.00e-06]


Epoch [284/800] test accuracy: 0.9411 time: 3.37


Sample: 100%|██████████| 100/100 [00:01, 71.80it/s, lr=2.00e-06]


Epoch [285/800] test accuracy: 0.9412 time: 3.12


Sample: 100%|██████████| 100/100 [00:02, 46.01it/s, lr=2.00e-06]


Epoch [286/800] test accuracy: 0.9412 time: 3.02


Sample: 100%|██████████| 100/100 [00:01, 63.51it/s, lr=2.00e-06]


Epoch [287/800] test accuracy: 0.9413 time: 3.66


Sample: 100%|██████████| 100/100 [00:01, 62.80it/s, lr=2.00e-06]


Epoch [288/800] test accuracy: 0.9415 time: 3.67


Sample: 100%|██████████| 100/100 [00:01, 60.18it/s, lr=2.00e-06]


Epoch [289/800] test accuracy: 0.9414 time: 3.55


Sample: 100%|██████████| 100/100 [00:01, 58.87it/s, lr=2.00e-06]


Epoch [290/800] test accuracy: 0.9415 time: 3.35


Sample: 100%|██████████| 100/100 [00:01, 65.87it/s, lr=2.00e-06]


Epoch [291/800] test accuracy: 0.9417 time: 2.93


Sample: 100%|██████████| 100/100 [00:01, 62.68it/s, lr=2.00e-06]


Epoch [292/800] test accuracy: 0.9418 time: 3.10


Sample: 100%|██████████| 100/100 [00:01, 83.21it/s, lr=2.00e-06]


Epoch [293/800] test accuracy: 0.9419 time: 2.97


Sample: 100%|██████████| 100/100 [00:01, 65.28it/s, lr=2.00e-06]


Epoch [294/800] test accuracy: 0.9419 time: 3.09


Sample: 100%|██████████| 100/100 [00:01, 61.95it/s, lr=2.00e-06]


Epoch [295/800] test accuracy: 0.9420 time: 2.99


Sample: 100%|██████████| 100/100 [00:01, 68.07it/s, lr=2.00e-06]


Epoch [296/800] test accuracy: 0.9420 time: 3.28


Sample: 100%|██████████| 100/100 [00:01, 65.35it/s, lr=2.00e-06]


Epoch [297/800] test accuracy: 0.9420 time: 3.11


Sample: 100%|██████████| 100/100 [00:01, 79.08it/s, lr=2.00e-06]


Epoch [298/800] test accuracy: 0.9420 time: 2.85


Sample: 100%|██████████| 100/100 [00:01, 63.71it/s, lr=2.00e-06]


Epoch [299/800] test accuracy: 0.9420 time: 3.03


Sample: 100%|██████████| 100/100 [00:01, 74.21it/s, lr=2.00e-06]


Epoch [300/800] test accuracy: 0.9421 time: 2.98


Sample: 100%|██████████| 100/100 [00:01, 70.95it/s, lr=2.00e-06]


Epoch [301/800] test accuracy: 0.9422 time: 2.81


Sample: 100%|██████████| 100/100 [00:01, 82.85it/s, lr=2.00e-06]


Epoch [302/800] test accuracy: 0.9425 time: 2.79


Sample: 100%|██████████| 100/100 [00:01, 82.36it/s, lr=2.00e-06]


Epoch [303/800] test accuracy: 0.9426 time: 2.76


Sample: 100%|██████████| 100/100 [00:01, 74.91it/s, lr=2.00e-06]


Epoch [304/800] test accuracy: 0.9426 time: 2.88


Sample: 100%|██████████| 100/100 [00:01, 72.77it/s, lr=2.00e-06]


Epoch [305/800] test accuracy: 0.9426 time: 3.00


Sample: 100%|██████████| 100/100 [00:01, 68.59it/s, lr=2.00e-06]


Epoch [306/800] test accuracy: 0.9427 time: 3.22


Sample: 100%|██████████| 100/100 [00:01, 80.38it/s, lr=2.00e-06]


Epoch [307/800] test accuracy: 0.9428 time: 3.04


Sample: 100%|██████████| 100/100 [00:01, 66.95it/s, lr=2.00e-06]


Epoch [308/800] test accuracy: 0.9428 time: 3.21


Sample: 100%|██████████| 100/100 [00:01, 77.37it/s, lr=2.00e-06]


Epoch [309/800] test accuracy: 0.9427 time: 3.39


Sample: 100%|██████████| 100/100 [00:01, 59.93it/s, lr=2.00e-06]


Epoch [310/800] test accuracy: 0.9426 time: 3.03


Sample: 100%|██████████| 100/100 [00:01, 80.40it/s, lr=2.00e-06]


Epoch [311/800] test accuracy: 0.9427 time: 3.59


Sample: 100%|██████████| 100/100 [00:01, 67.98it/s, lr=2.00e-06]


Epoch [312/800] test accuracy: 0.9429 time: 4.05


Sample: 100%|██████████| 100/100 [00:01, 66.41it/s, lr=2.00e-06]


Epoch [313/800] test accuracy: 0.9430 time: 3.36


Sample: 100%|██████████| 100/100 [00:01, 63.73it/s, lr=2.00e-06]


Epoch [314/800] test accuracy: 0.9430 time: 3.18


Sample: 100%|██████████| 100/100 [00:01, 84.02it/s, lr=2.00e-06]


Epoch [315/800] test accuracy: 0.9432 time: 2.84


Sample: 100%|██████████| 100/100 [00:01, 89.61it/s, lr=2.00e-06]


Epoch [316/800] test accuracy: 0.9434 time: 2.84


Sample: 100%|██████████| 100/100 [00:01, 85.84it/s, lr=2.00e-06]


Epoch [317/800] test accuracy: 0.9435 time: 2.88


Sample: 100%|██████████| 100/100 [00:01, 64.30it/s, lr=2.00e-06]


Epoch [318/800] test accuracy: 0.9437 time: 2.89


Sample: 100%|██████████| 100/100 [00:01, 72.54it/s, lr=2.00e-06]


Epoch [319/800] test accuracy: 0.9437 time: 3.42


Sample: 100%|██████████| 100/100 [00:01, 70.39it/s, lr=2.00e-06]


Epoch [320/800] test accuracy: 0.9438 time: 2.98


Sample: 100%|██████████| 100/100 [00:01, 75.43it/s, lr=2.00e-06]


Epoch [321/800] test accuracy: 0.9439 time: 3.13


Sample: 100%|██████████| 100/100 [00:01, 64.97it/s, lr=2.00e-06]


Epoch [322/800] test accuracy: 0.9438 time: 3.81


Sample: 100%|██████████| 100/100 [00:01, 60.08it/s, lr=2.00e-06]


Epoch [323/800] test accuracy: 0.9437 time: 3.93


Sample: 100%|██████████| 100/100 [00:01, 56.56it/s, lr=2.00e-06]


Epoch [324/800] test accuracy: 0.9437 time: 4.17


Sample: 100%|██████████| 100/100 [00:02, 44.22it/s, lr=2.00e-06]


Epoch [325/800] test accuracy: 0.9436 time: 3.44


Sample: 100%|██████████| 100/100 [00:01, 52.63it/s, lr=2.00e-06]


Epoch [326/800] test accuracy: 0.9437 time: 3.56


Sample: 100%|██████████| 100/100 [00:01, 60.36it/s, lr=2.00e-06]


Epoch [327/800] test accuracy: 0.9437 time: 3.10


Sample: 100%|██████████| 100/100 [00:01, 73.37it/s, lr=2.00e-06]


Epoch [328/800] test accuracy: 0.9438 time: 2.75


Sample: 100%|██████████| 100/100 [00:01, 87.93it/s, lr=2.00e-06]


Epoch [329/800] test accuracy: 0.9437 time: 2.77


Sample: 100%|██████████| 100/100 [00:01, 72.34it/s, lr=2.00e-06]


Epoch [330/800] test accuracy: 0.9437 time: 2.95


Sample: 100%|██████████| 100/100 [00:01, 83.08it/s, lr=2.00e-06]


Epoch [331/800] test accuracy: 0.9437 time: 3.29


Sample: 100%|██████████| 100/100 [00:01, 73.17it/s, lr=2.00e-06]


Epoch [332/800] test accuracy: 0.9438 time: 3.64


Sample: 100%|██████████| 100/100 [00:01, 79.19it/s, lr=2.00e-06]


Epoch [333/800] test accuracy: 0.9438 time: 2.72


Sample: 100%|██████████| 100/100 [00:01, 70.62it/s, lr=2.00e-06]


Epoch [334/800] test accuracy: 0.9438 time: 2.82


Sample: 100%|██████████| 100/100 [00:01, 71.48it/s, lr=2.00e-06]


Epoch [335/800] test accuracy: 0.9438 time: 2.84


Sample: 100%|██████████| 100/100 [00:01, 69.87it/s, lr=2.00e-06]


Epoch [336/800] test accuracy: 0.9438 time: 3.39


Sample: 100%|██████████| 100/100 [00:01, 67.07it/s, lr=2.00e-06]


Epoch [337/800] test accuracy: 0.9437 time: 3.30


Sample: 100%|██████████| 100/100 [00:01, 63.15it/s, lr=2.00e-06]


Epoch [338/800] test accuracy: 0.9438 time: 3.33


Sample: 100%|██████████| 100/100 [00:01, 62.87it/s, lr=2.00e-06]


Epoch [339/800] test accuracy: 0.9438 time: 3.10


Sample: 100%|██████████| 100/100 [00:01, 87.12it/s, lr=2.00e-06]


Epoch [340/800] test accuracy: 0.9437 time: 3.26


Sample: 100%|██████████| 100/100 [00:01, 69.09it/s, lr=2.00e-06]


Epoch [341/800] test accuracy: 0.9437 time: 3.15


Sample: 100%|██████████| 100/100 [00:01, 60.02it/s, lr=2.00e-06]


Epoch [342/800] test accuracy: 0.9437 time: 3.13


Sample: 100%|██████████| 100/100 [00:01, 73.31it/s, lr=2.00e-06]


Epoch [343/800] test accuracy: 0.9437 time: 3.25


Sample: 100%|██████████| 100/100 [00:01, 74.24it/s, lr=2.00e-06]


Epoch [344/800] test accuracy: 0.9438 time: 3.11


Sample: 100%|██████████| 100/100 [00:01, 67.23it/s, lr=2.00e-06]


Epoch [345/800] test accuracy: 0.9436 time: 2.87


Sample: 100%|██████████| 100/100 [00:01, 70.15it/s, lr=2.00e-06]


Epoch [346/800] test accuracy: 0.9438 time: 2.88


Sample: 100%|██████████| 100/100 [00:01, 73.00it/s, lr=2.00e-06]


Epoch [347/800] test accuracy: 0.9439 time: 2.92


Sample: 100%|██████████| 100/100 [00:01, 75.81it/s, lr=2.00e-06]


Epoch [348/800] test accuracy: 0.9440 time: 2.85


Sample: 100%|██████████| 100/100 [00:01, 73.28it/s, lr=2.00e-06]


Epoch [349/800] test accuracy: 0.9441 time: 3.71


Sample: 100%|██████████| 100/100 [00:01, 68.41it/s, lr=2.00e-06]


Epoch [350/800] test accuracy: 0.9441 time: 2.88


Sample: 100%|██████████| 100/100 [00:01, 64.84it/s, lr=2.00e-06]


Epoch [351/800] test accuracy: 0.9443 time: 2.82


Sample: 100%|██████████| 100/100 [00:01, 69.22it/s, lr=2.00e-06]


Epoch [352/800] test accuracy: 0.9441 time: 2.88


Sample: 100%|██████████| 100/100 [00:01, 78.21it/s, lr=2.00e-06]


Epoch [353/800] test accuracy: 0.9441 time: 2.74


Sample: 100%|██████████| 100/100 [00:01, 85.88it/s, lr=2.00e-06]


Epoch [354/800] test accuracy: 0.9444 time: 3.08


Sample: 100%|██████████| 100/100 [00:01, 74.39it/s, lr=2.00e-06]


Epoch [355/800] test accuracy: 0.9445 time: 2.98


Sample: 100%|██████████| 100/100 [00:01, 61.81it/s, lr=2.00e-06]


Epoch [356/800] test accuracy: 0.9444 time: 3.13


Sample: 100%|██████████| 100/100 [00:01, 64.07it/s, lr=2.00e-06]


Epoch [357/800] test accuracy: 0.9447 time: 2.86


Sample: 100%|██████████| 100/100 [00:01, 85.90it/s, lr=2.00e-06]


Epoch [358/800] test accuracy: 0.9446 time: 2.78


Sample: 100%|██████████| 100/100 [00:01, 53.30it/s, lr=2.00e-06]


Epoch [359/800] test accuracy: 0.9443 time: 3.10


Sample: 100%|██████████| 100/100 [00:01, 76.57it/s, lr=2.00e-06]


Epoch [360/800] test accuracy: 0.9444 time: 3.13


Sample: 100%|██████████| 100/100 [00:01, 69.19it/s, lr=2.00e-06]


Epoch [361/800] test accuracy: 0.9443 time: 2.94


Sample: 100%|██████████| 100/100 [00:01, 80.38it/s, lr=2.00e-06]


Epoch [362/800] test accuracy: 0.9445 time: 3.12


Sample: 100%|██████████| 100/100 [00:01, 73.50it/s, lr=2.00e-06]


Epoch [363/800] test accuracy: 0.9444 time: 2.88


Sample: 100%|██████████| 100/100 [00:01, 60.17it/s, lr=2.00e-06]


Epoch [364/800] test accuracy: 0.9444 time: 2.84


Sample: 100%|██████████| 100/100 [00:01, 75.62it/s, lr=2.00e-06]


Epoch [365/800] test accuracy: 0.9443 time: 2.90


Sample: 100%|██████████| 100/100 [00:01, 51.89it/s, lr=2.00e-06]


Epoch [366/800] test accuracy: 0.9444 time: 3.35


Sample: 100%|██████████| 100/100 [00:01, 81.85it/s, lr=2.00e-06]


Epoch [367/800] test accuracy: 0.9443 time: 2.90


Sample: 100%|██████████| 100/100 [00:01, 84.36it/s, lr=2.00e-06]


Epoch [368/800] test accuracy: 0.9444 time: 3.18


Sample: 100%|██████████| 100/100 [00:01, 86.38it/s, lr=2.00e-06]


Epoch [369/800] test accuracy: 0.9443 time: 3.20


Sample: 100%|██████████| 100/100 [00:01, 79.59it/s, lr=2.00e-06]


Epoch [370/800] test accuracy: 0.9443 time: 2.88


Sample: 100%|██████████| 100/100 [00:01, 84.29it/s, lr=2.00e-06]


Epoch [371/800] test accuracy: 0.9442 time: 3.18


Sample: 100%|██████████| 100/100 [00:01, 76.33it/s, lr=2.00e-06]


Epoch [372/800] test accuracy: 0.9442 time: 3.10


Sample: 100%|██████████| 100/100 [00:01, 84.72it/s, lr=2.00e-06]


Epoch [373/800] test accuracy: 0.9443 time: 3.25


Sample: 100%|██████████| 100/100 [00:01, 73.14it/s, lr=2.00e-06]


Epoch [374/800] test accuracy: 0.9442 time: 4.09


Sample: 100%|██████████| 100/100 [00:01, 69.57it/s, lr=2.00e-06]


Epoch [375/800] test accuracy: 0.9440 time: 4.12


Sample: 100%|██████████| 100/100 [00:01, 66.92it/s, lr=2.00e-06]


Epoch [376/800] test accuracy: 0.9440 time: 3.04


Sample: 100%|██████████| 100/100 [00:01, 70.78it/s, lr=2.00e-06]


Epoch [377/800] test accuracy: 0.9440 time: 3.25


Sample: 100%|██████████| 100/100 [00:01, 66.98it/s, lr=2.00e-06]


Epoch [378/800] test accuracy: 0.9438 time: 3.04


Sample: 100%|██████████| 100/100 [00:01, 71.29it/s, lr=2.00e-06]


Epoch [379/800] test accuracy: 0.9438 time: 2.89


Sample: 100%|██████████| 100/100 [00:01, 59.29it/s, lr=2.00e-06]


Epoch [380/800] test accuracy: 0.9439 time: 3.17


Sample: 100%|██████████| 100/100 [00:01, 75.67it/s, lr=2.00e-06]


Epoch [381/800] test accuracy: 0.9442 time: 2.83


Sample: 100%|██████████| 100/100 [00:01, 57.21it/s, lr=2.00e-06]


Epoch [382/800] test accuracy: 0.9439 time: 3.27


Sample: 100%|██████████| 100/100 [00:01, 61.13it/s, lr=2.00e-06]


Epoch [383/800] test accuracy: 0.9439 time: 3.31


Sample: 100%|██████████| 100/100 [00:01, 67.79it/s, lr=2.00e-06]


Epoch [384/800] test accuracy: 0.9438 time: 2.75


Sample: 100%|██████████| 100/100 [00:01, 61.55it/s, lr=2.00e-06]


Epoch [385/800] test accuracy: 0.9438 time: 3.58


Sample: 100%|██████████| 100/100 [00:01, 65.50it/s, lr=2.00e-06]


Epoch [386/800] test accuracy: 0.9439 time: 3.49


Sample: 100%|██████████| 100/100 [00:03, 26.78it/s, lr=2.00e-06]


Epoch [387/800] test accuracy: 0.9438 time: 3.58


Sample: 100%|██████████| 100/100 [00:01, 57.60it/s, lr=2.00e-06]


Epoch [388/800] test accuracy: 0.9440 time: 3.33


Sample: 100%|██████████| 100/100 [00:01, 60.71it/s, lr=2.00e-06]


Epoch [389/800] test accuracy: 0.9439 time: 3.41


Sample: 100%|██████████| 100/100 [00:01, 70.16it/s, lr=2.00e-06]


Epoch [390/800] test accuracy: 0.9441 time: 3.34


Sample: 100%|██████████| 100/100 [00:01, 55.35it/s, lr=2.00e-06]


Epoch [391/800] test accuracy: 0.9440 time: 3.28


Sample: 100%|██████████| 100/100 [00:01, 58.01it/s, lr=2.00e-06]


Epoch [392/800] test accuracy: 0.9442 time: 3.66


Sample: 100%|██████████| 100/100 [00:02, 36.03it/s, lr=2.00e-06]


Epoch [393/800] test accuracy: 0.9444 time: 2.75


Sample: 100%|██████████| 100/100 [00:01, 60.80it/s, lr=2.00e-06]


Epoch [394/800] test accuracy: 0.9443 time: 3.24


Sample: 100%|██████████| 100/100 [00:01, 55.83it/s, lr=2.00e-06]


Epoch [395/800] test accuracy: 0.9444 time: 3.96


Sample: 100%|██████████| 100/100 [00:01, 63.82it/s, lr=2.00e-06]


Epoch [396/800] test accuracy: 0.9444 time: 3.49


Sample: 100%|██████████| 100/100 [00:01, 60.41it/s, lr=2.00e-06]


Epoch [397/800] test accuracy: 0.9445 time: 2.87


Sample: 100%|██████████| 100/100 [00:01, 63.72it/s, lr=2.00e-06]


Epoch [398/800] test accuracy: 0.9445 time: 2.98


Sample: 100%|██████████| 100/100 [00:01, 69.44it/s, lr=2.00e-06]


Epoch [399/800] test accuracy: 0.9446 time: 2.76


Sample: 100%|██████████| 100/100 [00:01, 69.94it/s, lr=2.00e-06]


Epoch [400/800] test accuracy: 0.9446 time: 3.98


Sample: 100%|██████████| 100/100 [00:01, 81.49it/s, lr=2.00e-06]


Epoch [401/800] test accuracy: 0.9446 time: 2.88


Sample: 100%|██████████| 100/100 [00:01, 59.21it/s, lr=2.00e-06]


Epoch [402/800] test accuracy: 0.9448 time: 3.71


Sample: 100%|██████████| 100/100 [00:01, 64.82it/s, lr=2.00e-06]


Epoch [403/800] test accuracy: 0.9449 time: 3.95


Sample: 100%|██████████| 100/100 [00:02, 44.28it/s, lr=2.00e-06]


Epoch [404/800] test accuracy: 0.9448 time: 3.56


Sample: 100%|██████████| 100/100 [00:01, 88.60it/s, lr=2.00e-06]


Epoch [405/800] test accuracy: 0.9449 time: 2.87


Sample: 100%|██████████| 100/100 [00:01, 86.22it/s, lr=2.00e-06]


Epoch [406/800] test accuracy: 0.9449 time: 3.20


Sample: 100%|██████████| 100/100 [00:01, 54.62it/s, lr=2.00e-06]


Epoch [407/800] test accuracy: 0.9448 time: 3.53


Sample: 100%|██████████| 100/100 [00:01, 68.66it/s, lr=2.00e-06]


Epoch [408/800] test accuracy: 0.9447 time: 3.16


Sample: 100%|██████████| 100/100 [00:01, 66.55it/s, lr=2.00e-06]


Epoch [409/800] test accuracy: 0.9447 time: 3.15


Sample: 100%|██████████| 100/100 [00:01, 70.89it/s, lr=2.00e-06]


Epoch [410/800] test accuracy: 0.9447 time: 2.93


Sample: 100%|██████████| 100/100 [00:01, 86.93it/s, lr=2.00e-06]


Epoch [411/800] test accuracy: 0.9448 time: 3.57


Sample: 100%|██████████| 100/100 [00:02, 46.58it/s, lr=2.00e-06]


Epoch [412/800] test accuracy: 0.9448 time: 4.47


Sample: 100%|██████████| 100/100 [00:02, 40.37it/s, lr=2.00e-06]


Epoch [413/800] test accuracy: 0.9449 time: 4.08


Sample: 100%|██████████| 100/100 [00:01, 69.17it/s, lr=2.00e-06]


Epoch [414/800] test accuracy: 0.9450 time: 3.15


Sample: 100%|██████████| 100/100 [00:02, 39.29it/s, lr=2.00e-06]


Epoch [415/800] test accuracy: 0.9450 time: 3.44


Sample: 100%|██████████| 100/100 [00:02, 41.66it/s, lr=2.00e-06]


Epoch [416/800] test accuracy: 0.9450 time: 3.61


Sample: 100%|██████████| 100/100 [00:01, 67.83it/s, lr=2.00e-06]


Epoch [417/800] test accuracy: 0.9451 time: 3.78


Sample: 100%|██████████| 100/100 [00:01, 69.49it/s, lr=2.00e-06]


Epoch [418/800] test accuracy: 0.9452 time: 3.40


Sample: 100%|██████████| 100/100 [00:01, 59.29it/s, lr=2.00e-06]


Epoch [419/800] test accuracy: 0.9452 time: 2.77


Sample: 100%|██████████| 100/100 [00:01, 56.96it/s, lr=2.00e-06]


Epoch [420/800] test accuracy: 0.9453 time: 3.76


Sample: 100%|██████████| 100/100 [00:01, 60.07it/s, lr=2.00e-06]


Epoch [421/800] test accuracy: 0.9455 time: 3.00


Sample: 100%|██████████| 100/100 [00:01, 79.75it/s, lr=2.00e-06]


Epoch [422/800] test accuracy: 0.9455 time: 3.20


Sample: 100%|██████████| 100/100 [00:01, 68.48it/s, lr=2.00e-06]


Epoch [423/800] test accuracy: 0.9455 time: 3.28


Sample: 100%|██████████| 100/100 [00:01, 69.11it/s, lr=2.00e-06]


Epoch [424/800] test accuracy: 0.9455 time: 2.84


Sample: 100%|██████████| 100/100 [00:01, 64.13it/s, lr=2.00e-06]


Epoch [425/800] test accuracy: 0.9458 time: 2.91


Sample: 100%|██████████| 100/100 [00:01, 82.11it/s, lr=2.00e-06]


Epoch [426/800] test accuracy: 0.9458 time: 2.95


Sample: 100%|██████████| 100/100 [00:01, 66.50it/s, lr=2.00e-06]


Epoch [427/800] test accuracy: 0.9459 time: 3.15


Sample: 100%|██████████| 100/100 [00:01, 59.59it/s, lr=2.00e-06]


Epoch [428/800] test accuracy: 0.9459 time: 3.81


Sample: 100%|██████████| 100/100 [00:01, 66.03it/s, lr=2.00e-06]


Epoch [429/800] test accuracy: 0.9459 time: 3.73


Sample: 100%|██████████| 100/100 [00:02, 48.25it/s, lr=2.00e-06]


Epoch [430/800] test accuracy: 0.9460 time: 2.88


Sample: 100%|██████████| 100/100 [00:01, 88.41it/s, lr=2.00e-06]


Epoch [431/800] test accuracy: 0.9459 time: 3.11


Sample: 100%|██████████| 100/100 [00:01, 88.66it/s, lr=2.00e-06]


Epoch [432/800] test accuracy: 0.9460 time: 2.74


Sample: 100%|██████████| 100/100 [00:01, 88.66it/s, lr=2.00e-06]


Epoch [433/800] test accuracy: 0.9461 time: 3.38


Sample: 100%|██████████| 100/100 [00:01, 89.27it/s, lr=2.00e-06]


Epoch [434/800] test accuracy: 0.9462 time: 2.79


Sample: 100%|██████████| 100/100 [00:01, 88.86it/s, lr=2.00e-06]


Epoch [435/800] test accuracy: 0.9461 time: 2.81


Sample: 100%|██████████| 100/100 [00:01, 74.28it/s, lr=2.00e-06]


Epoch [436/800] test accuracy: 0.9461 time: 3.15


Sample: 100%|██████████| 100/100 [00:01, 66.01it/s, lr=2.00e-06]


Epoch [437/800] test accuracy: 0.9463 time: 2.95


Sample: 100%|██████████| 100/100 [00:01, 76.46it/s, lr=2.00e-06]


Epoch [438/800] test accuracy: 0.9464 time: 4.22


Sample: 100%|██████████| 100/100 [00:01, 63.39it/s, lr=2.00e-06]


Epoch [439/800] test accuracy: 0.9464 time: 3.44


Sample: 100%|██████████| 100/100 [00:01, 64.72it/s, lr=2.00e-06]


Epoch [440/800] test accuracy: 0.9463 time: 2.87


Sample: 100%|██████████| 100/100 [00:01, 67.24it/s, lr=2.00e-06]


Epoch [441/800] test accuracy: 0.9464 time: 2.83


Sample: 100%|██████████| 100/100 [00:01, 80.28it/s, lr=2.00e-06]


Epoch [442/800] test accuracy: 0.9465 time: 2.77


Sample: 100%|██████████| 100/100 [00:01, 69.88it/s, lr=2.00e-06]


Epoch [443/800] test accuracy: 0.9466 time: 2.89


Sample: 100%|██████████| 100/100 [00:01, 60.48it/s, lr=2.00e-06]


Epoch [444/800] test accuracy: 0.9466 time: 2.89


Sample: 100%|██████████| 100/100 [00:01, 69.21it/s, lr=2.00e-06]


Epoch [445/800] test accuracy: 0.9469 time: 2.82


Sample: 100%|██████████| 100/100 [00:01, 63.08it/s, lr=2.00e-06]


Epoch [446/800] test accuracy: 0.9470 time: 2.74


Sample: 100%|██████████| 100/100 [00:01, 70.48it/s, lr=2.00e-06]


Epoch [447/800] test accuracy: 0.9472 time: 3.00


Sample: 100%|██████████| 100/100 [00:01, 57.20it/s, lr=2.00e-06]


Epoch [448/800] test accuracy: 0.9472 time: 3.18


Sample: 100%|██████████| 100/100 [00:01, 63.50it/s, lr=2.00e-06]


Epoch [449/800] test accuracy: 0.9472 time: 3.10


Sample: 100%|██████████| 100/100 [00:01, 70.46it/s, lr=2.00e-06]


Epoch [450/800] test accuracy: 0.9473 time: 2.83


Sample: 100%|██████████| 100/100 [00:01, 70.08it/s, lr=2.00e-06]


Epoch [451/800] test accuracy: 0.9475 time: 3.06


Sample: 100%|██████████| 100/100 [00:01, 63.61it/s, lr=2.00e-06]


Epoch [452/800] test accuracy: 0.9475 time: 3.37


Sample: 100%|██████████| 100/100 [00:01, 62.49it/s, lr=2.00e-06]


Epoch [453/800] test accuracy: 0.9477 time: 3.14


Sample: 100%|██████████| 100/100 [00:01, 68.21it/s, lr=2.00e-06]


Epoch [454/800] test accuracy: 0.9477 time: 2.73


Sample: 100%|██████████| 100/100 [00:01, 85.15it/s, lr=2.00e-06]


Epoch [455/800] test accuracy: 0.9477 time: 3.13


Sample: 100%|██████████| 100/100 [00:01, 77.26it/s, lr=2.00e-06]


Epoch [456/800] test accuracy: 0.9479 time: 3.00


Sample: 100%|██████████| 100/100 [00:01, 74.88it/s, lr=2.00e-06]


Epoch [457/800] test accuracy: 0.9480 time: 3.52


Sample: 100%|██████████| 100/100 [00:01, 64.42it/s, lr=2.00e-06]


Epoch [458/800] test accuracy: 0.9480 time: 4.07


Sample: 100%|██████████| 100/100 [00:01, 60.95it/s, lr=2.00e-06]


Epoch [459/800] test accuracy: 0.9480 time: 3.74


Sample: 100%|██████████| 100/100 [00:02, 40.86it/s, lr=2.00e-06]


Epoch [460/800] test accuracy: 0.9480 time: 3.78


Sample: 100%|██████████| 100/100 [00:02, 46.94it/s, lr=2.00e-06]


Epoch [461/800] test accuracy: 0.9481 time: 3.86


Sample: 100%|██████████| 100/100 [00:02, 40.79it/s, lr=2.00e-06]


Epoch [462/800] test accuracy: 0.9484 time: 3.63


Sample: 100%|██████████| 100/100 [00:01, 62.28it/s, lr=2.00e-06]


Epoch [463/800] test accuracy: 0.9484 time: 3.57


Sample: 100%|██████████| 100/100 [00:02, 41.94it/s, lr=2.00e-06]


Epoch [464/800] test accuracy: 0.9486 time: 3.85


Sample: 100%|██████████| 100/100 [00:01, 50.16it/s, lr=2.00e-06]


Epoch [465/800] test accuracy: 0.9487 time: 4.25


Sample: 100%|██████████| 100/100 [00:01, 67.65it/s, lr=2.00e-06]


Epoch [466/800] test accuracy: 0.9490 time: 3.05


Sample: 100%|██████████| 100/100 [00:01, 64.67it/s, lr=2.00e-06]


Epoch [467/800] test accuracy: 0.9490 time: 3.00


Sample: 100%|██████████| 100/100 [00:01, 84.19it/s, lr=2.00e-06]


Epoch [468/800] test accuracy: 0.9491 time: 2.83


Sample: 100%|██████████| 100/100 [00:01, 65.42it/s, lr=2.00e-06]


Epoch [469/800] test accuracy: 0.9490 time: 2.91


Sample: 100%|██████████| 100/100 [00:01, 65.55it/s, lr=2.00e-06]


Epoch [470/800] test accuracy: 0.9491 time: 3.25


Sample: 100%|██████████| 100/100 [00:01, 71.02it/s, lr=2.00e-06]


Epoch [471/800] test accuracy: 0.9492 time: 2.80


Sample: 100%|██████████| 100/100 [00:01, 90.05it/s, lr=2.00e-06]


Epoch [472/800] test accuracy: 0.9492 time: 2.79


Sample: 100%|██████████| 100/100 [00:01, 67.86it/s, lr=2.00e-06]


Epoch [473/800] test accuracy: 0.9492 time: 2.86


Sample: 100%|██████████| 100/100 [00:01, 76.81it/s, lr=2.00e-06]


Epoch [474/800] test accuracy: 0.9494 time: 3.41


Sample: 100%|██████████| 100/100 [00:01, 65.27it/s, lr=2.00e-06]


Epoch [475/800] test accuracy: 0.9494 time: 3.39


Sample: 100%|██████████| 100/100 [00:01, 69.36it/s, lr=2.00e-06]


Epoch [476/800] test accuracy: 0.9497 time: 2.98


Sample: 100%|██████████| 100/100 [00:01, 71.76it/s, lr=2.00e-06]


Epoch [477/800] test accuracy: 0.9497 time: 3.05


Sample: 100%|██████████| 100/100 [00:01, 65.34it/s, lr=2.00e-06]


Epoch [478/800] test accuracy: 0.9499 time: 3.09


Sample: 100%|██████████| 100/100 [00:01, 61.02it/s, lr=2.00e-06]


Epoch [479/800] test accuracy: 0.9500 time: 3.70


Sample: 100%|██████████| 100/100 [00:01, 69.61it/s, lr=2.00e-06]


Epoch [480/800] test accuracy: 0.9501 time: 3.07


Sample: 100%|██████████| 100/100 [00:01, 69.40it/s, lr=2.00e-06]


Epoch [481/800] test accuracy: 0.9503 time: 3.09


Sample: 100%|██████████| 100/100 [00:01, 69.61it/s, lr=2.00e-06]


Epoch [482/800] test accuracy: 0.9506 time: 2.79


Sample: 100%|██████████| 100/100 [00:01, 52.77it/s, lr=2.00e-06]


Epoch [483/800] test accuracy: 0.9510 time: 3.45


Sample: 100%|██████████| 100/100 [00:01, 65.40it/s, lr=2.00e-06]


Epoch [484/800] test accuracy: 0.9511 time: 3.15


Sample: 100%|██████████| 100/100 [00:01, 77.12it/s, lr=2.00e-06]


Epoch [485/800] test accuracy: 0.9511 time: 2.90


Sample: 100%|██████████| 100/100 [00:01, 70.94it/s, lr=2.00e-06]


Epoch [486/800] test accuracy: 0.9512 time: 2.99


Sample: 100%|██████████| 100/100 [00:01, 66.41it/s, lr=2.00e-06]


Epoch [487/800] test accuracy: 0.9513 time: 3.00


Sample: 100%|██████████| 100/100 [00:01, 62.93it/s, lr=2.00e-06]


Epoch [488/800] test accuracy: 0.9514 time: 3.11


Sample: 100%|██████████| 100/100 [00:01, 67.07it/s, lr=2.00e-06]


Epoch [489/800] test accuracy: 0.9518 time: 3.59


Sample: 100%|██████████| 100/100 [00:01, 59.29it/s, lr=2.00e-06]


Epoch [490/800] test accuracy: 0.9518 time: 3.11


Sample: 100%|██████████| 100/100 [00:01, 81.39it/s, lr=2.00e-06]


Epoch [491/800] test accuracy: 0.9517 time: 3.40


Sample: 100%|██████████| 100/100 [00:01, 52.00it/s, lr=2.00e-06]


Epoch [492/800] test accuracy: 0.9518 time: 4.63


Sample: 100%|██████████| 100/100 [00:01, 67.00it/s, lr=2.00e-06]


Epoch [493/800] test accuracy: 0.9518 time: 3.60


Sample: 100%|██████████| 100/100 [00:01, 72.12it/s, lr=2.00e-06]


Epoch [494/800] test accuracy: 0.9520 time: 3.89


Sample: 100%|██████████| 100/100 [00:01, 63.26it/s, lr=2.00e-06]


Epoch [495/800] test accuracy: 0.9520 time: 3.17


Sample: 100%|██████████| 100/100 [00:01, 66.89it/s, lr=2.00e-06]


Epoch [496/800] test accuracy: 0.9521 time: 3.06


Sample: 100%|██████████| 100/100 [00:01, 67.41it/s, lr=2.00e-06]


Epoch [497/800] test accuracy: 0.9522 time: 3.19


Sample: 100%|██████████| 100/100 [00:01, 79.69it/s, lr=2.00e-06]


Epoch [498/800] test accuracy: 0.9523 time: 2.87


Sample: 100%|██████████| 100/100 [00:01, 60.51it/s, lr=2.00e-06]


Epoch [499/800] test accuracy: 0.9524 time: 4.13


Sample: 100%|██████████| 100/100 [00:01, 63.09it/s, lr=2.00e-06]


Epoch [500/800] test accuracy: 0.9526 time: 3.51


Sample: 100%|██████████| 100/100 [00:02, 39.98it/s, lr=2.00e-06]


Epoch [501/800] test accuracy: 0.9526 time: 3.30


Sample: 100%|██████████| 100/100 [00:01, 60.81it/s, lr=2.00e-06]


Epoch [502/800] test accuracy: 0.9527 time: 3.01


Sample: 100%|██████████| 100/100 [00:01, 85.61it/s, lr=2.00e-06]


Epoch [503/800] test accuracy: 0.9529 time: 3.22


Sample: 100%|██████████| 100/100 [00:02, 42.68it/s, lr=2.00e-06]


Epoch [504/800] test accuracy: 0.9531 time: 3.68


Sample: 100%|██████████| 100/100 [00:01, 62.98it/s, lr=2.00e-06]


Epoch [505/800] test accuracy: 0.9531 time: 3.73


Sample: 100%|██████████| 100/100 [00:01, 55.03it/s, lr=2.00e-06]


Epoch [506/800] test accuracy: 0.9533 time: 3.89


Sample: 100%|██████████| 100/100 [00:03, 28.03it/s, lr=2.00e-06]


Epoch [507/800] test accuracy: 0.9535 time: 3.76


Sample: 100%|██████████| 100/100 [00:01, 53.16it/s, lr=2.00e-06]


Epoch [508/800] test accuracy: 0.9535 time: 4.63


Sample: 100%|██████████| 100/100 [00:01, 60.21it/s, lr=2.00e-06]


Epoch [509/800] test accuracy: 0.9536 time: 3.93


Sample: 100%|██████████| 100/100 [00:01, 62.50it/s, lr=2.00e-06]


Epoch [510/800] test accuracy: 0.9537 time: 3.56


Sample: 100%|██████████| 100/100 [00:01, 56.08it/s, lr=2.00e-06]


Epoch [511/800] test accuracy: 0.9539 time: 2.80


Sample: 100%|██████████| 100/100 [00:01, 68.49it/s, lr=2.00e-06]


Epoch [512/800] test accuracy: 0.9541 time: 2.91


Sample: 100%|██████████| 100/100 [00:01, 59.08it/s, lr=2.00e-06]


Epoch [513/800] test accuracy: 0.9541 time: 3.04


Sample: 100%|██████████| 100/100 [00:01, 62.56it/s, lr=2.00e-06]


Epoch [514/800] test accuracy: 0.9543 time: 3.29


Sample: 100%|██████████| 100/100 [00:02, 43.08it/s, lr=2.00e-06]


Epoch [515/800] test accuracy: 0.9545 time: 3.44


Sample: 100%|██████████| 100/100 [00:01, 67.82it/s, lr=2.00e-06]


Epoch [516/800] test accuracy: 0.9545 time: 3.75


Sample: 100%|██████████| 100/100 [00:01, 85.68it/s, lr=2.00e-06]


Epoch [517/800] test accuracy: 0.9545 time: 3.47


Sample: 100%|██████████| 100/100 [00:01, 87.76it/s, lr=2.00e-06]


Epoch [518/800] test accuracy: 0.9545 time: 2.81


Sample: 100%|██████████| 100/100 [00:01, 89.92it/s, lr=2.00e-06]


Epoch [519/800] test accuracy: 0.9545 time: 2.80


Sample: 100%|██████████| 100/100 [00:01, 80.92it/s, lr=2.00e-06]


Epoch [520/800] test accuracy: 0.9545 time: 3.44


Sample: 100%|██████████| 100/100 [00:01, 89.34it/s, lr=2.00e-06]


Epoch [521/800] test accuracy: 0.9547 time: 2.81


Sample: 100%|██████████| 100/100 [00:01, 88.32it/s, lr=2.00e-06]


Epoch [522/800] test accuracy: 0.9548 time: 2.87


Sample: 100%|██████████| 100/100 [00:01, 86.52it/s, lr=2.00e-06]


Epoch [523/800] test accuracy: 0.9548 time: 2.90


Sample: 100%|██████████| 100/100 [00:01, 66.51it/s, lr=2.00e-06]


Epoch [524/800] test accuracy: 0.9550 time: 3.13


Sample: 100%|██████████| 100/100 [00:01, 65.55it/s, lr=2.00e-06]


Epoch [525/800] test accuracy: 0.9550 time: 2.84


Sample: 100%|██████████| 100/100 [00:01, 89.32it/s, lr=2.00e-06]


Epoch [526/800] test accuracy: 0.9551 time: 3.09


Sample: 100%|██████████| 100/100 [00:01, 89.20it/s, lr=2.00e-06]


Epoch [527/800] test accuracy: 0.9551 time: 3.04


Sample: 100%|██████████| 100/100 [00:01, 65.97it/s, lr=2.00e-06]


Epoch [528/800] test accuracy: 0.9551 time: 2.83


Sample: 100%|██████████| 100/100 [00:01, 60.31it/s, lr=2.00e-06]


Epoch [529/800] test accuracy: 0.9551 time: 2.84


Sample: 100%|██████████| 100/100 [00:01, 50.80it/s, lr=2.00e-06]


Epoch [530/800] test accuracy: 0.9552 time: 3.02


Sample: 100%|██████████| 100/100 [00:01, 64.27it/s, lr=2.00e-06]


Epoch [531/800] test accuracy: 0.9552 time: 3.29


Sample: 100%|██████████| 100/100 [00:01, 66.40it/s, lr=2.00e-06]


Epoch [532/800] test accuracy: 0.9553 time: 3.31


Sample: 100%|██████████| 100/100 [00:01, 65.47it/s, lr=2.00e-06]


Epoch [533/800] test accuracy: 0.9553 time: 2.92


Sample: 100%|██████████| 100/100 [00:01, 82.07it/s, lr=2.00e-06]


Epoch [534/800] test accuracy: 0.9555 time: 2.88


Sample: 100%|██████████| 100/100 [00:01, 66.87it/s, lr=2.00e-06]


Epoch [535/800] test accuracy: 0.9557 time: 4.09


Sample: 100%|██████████| 100/100 [00:01, 66.50it/s, lr=2.00e-06]


Epoch [536/800] test accuracy: 0.9557 time: 3.30


Sample: 100%|██████████| 100/100 [00:01, 71.07it/s, lr=2.00e-06]


Epoch [537/800] test accuracy: 0.9558 time: 2.84


Sample: 100%|██████████| 100/100 [00:01, 59.72it/s, lr=2.00e-06]


Epoch [538/800] test accuracy: 0.9557 time: 2.89


Sample: 100%|██████████| 100/100 [00:01, 76.31it/s, lr=2.00e-06]


Epoch [539/800] test accuracy: 0.9556 time: 2.82


Sample: 100%|██████████| 100/100 [00:01, 69.59it/s, lr=2.00e-06]


Epoch [540/800] test accuracy: 0.9559 time: 2.91


Sample: 100%|██████████| 100/100 [00:01, 70.73it/s, lr=2.00e-06]


Epoch [541/800] test accuracy: 0.9559 time: 2.88


Sample: 100%|██████████| 100/100 [00:01, 84.17it/s, lr=2.00e-06]


Epoch [542/800] test accuracy: 0.9561 time: 2.86


Sample: 100%|██████████| 100/100 [00:01, 73.32it/s, lr=2.00e-06]


Epoch [543/800] test accuracy: 0.9561 time: 2.90


Sample: 100%|██████████| 100/100 [00:01, 64.50it/s, lr=2.00e-06]


Epoch [544/800] test accuracy: 0.9561 time: 2.84


Sample: 100%|██████████| 100/100 [00:01, 80.12it/s, lr=2.00e-06]


Epoch [545/800] test accuracy: 0.9561 time: 4.10


Sample: 100%|██████████| 100/100 [00:01, 66.69it/s, lr=2.00e-06]


Epoch [546/800] test accuracy: 0.9563 time: 3.14


Sample: 100%|██████████| 100/100 [00:01, 62.53it/s, lr=2.00e-06]


Epoch [547/800] test accuracy: 0.9563 time: 3.02


Sample: 100%|██████████| 100/100 [00:01, 73.34it/s, lr=2.00e-06]


Epoch [548/800] test accuracy: 0.9563 time: 3.07


Sample: 100%|██████████| 100/100 [00:01, 79.57it/s, lr=2.00e-06]


Epoch [549/800] test accuracy: 0.9563 time: 3.34


Sample: 100%|██████████| 100/100 [00:01, 53.58it/s, lr=2.00e-06]


Epoch [550/800] test accuracy: 0.9565 time: 3.11


Sample: 100%|██████████| 100/100 [00:01, 86.82it/s, lr=2.00e-06]


Epoch [551/800] test accuracy: 0.9567 time: 2.93


Sample: 100%|██████████| 100/100 [00:01, 68.88it/s, lr=2.00e-06]


Epoch [552/800] test accuracy: 0.9568 time: 3.10


Sample: 100%|██████████| 100/100 [00:01, 69.26it/s, lr=2.00e-06]


Epoch [553/800] test accuracy: 0.9568 time: 3.23


Sample: 100%|██████████| 100/100 [00:01, 62.69it/s, lr=2.00e-06]


Epoch [554/800] test accuracy: 0.9569 time: 3.65


Sample: 100%|██████████| 100/100 [00:01, 64.77it/s, lr=2.00e-06]


Epoch [555/800] test accuracy: 0.9572 time: 3.54


Sample: 100%|██████████| 100/100 [00:01, 73.73it/s, lr=2.00e-06]


Epoch [556/800] test accuracy: 0.9572 time: 3.03


Sample: 100%|██████████| 100/100 [00:01, 66.85it/s, lr=2.00e-06]


Epoch [557/800] test accuracy: 0.9572 time: 3.57


Sample: 100%|██████████| 100/100 [00:01, 88.14it/s, lr=2.00e-06]


Epoch [558/800] test accuracy: 0.9572 time: 2.77


Sample: 100%|██████████| 100/100 [00:01, 90.04it/s, lr=2.00e-06]


Epoch [559/800] test accuracy: 0.9573 time: 2.90


Sample: 100%|██████████| 100/100 [00:01, 90.58it/s, lr=2.00e-06]


Epoch [560/800] test accuracy: 0.9575 time: 2.90


Sample: 100%|██████████| 100/100 [00:01, 68.28it/s, lr=2.00e-06]


Epoch [561/800] test accuracy: 0.9577 time: 2.83


Sample: 100%|██████████| 100/100 [00:01, 71.01it/s, lr=2.00e-06]


Epoch [562/800] test accuracy: 0.9577 time: 2.94


Sample: 100%|██████████| 100/100 [00:01, 68.83it/s, lr=2.00e-06]


Epoch [563/800] test accuracy: 0.9577 time: 3.15


Sample: 100%|██████████| 100/100 [00:03, 32.93it/s, lr=2.00e-06]


Epoch [564/800] test accuracy: 0.9579 time: 2.93


Sample: 100%|██████████| 100/100 [00:01, 65.66it/s, lr=2.00e-06]


Epoch [565/800] test accuracy: 0.9579 time: 2.78


Sample: 100%|██████████| 100/100 [00:01, 64.48it/s, lr=2.00e-06]


Epoch [566/800] test accuracy: 0.9578 time: 3.11


Sample: 100%|██████████| 100/100 [00:01, 75.94it/s, lr=2.00e-06]


Epoch [567/800] test accuracy: 0.9578 time: 2.89


Sample: 100%|██████████| 100/100 [00:01, 70.38it/s, lr=2.00e-06]


Epoch [568/800] test accuracy: 0.9578 time: 2.79


Sample: 100%|██████████| 100/100 [00:01, 65.40it/s, lr=2.00e-06]


Epoch [569/800] test accuracy: 0.9578 time: 2.97


Sample: 100%|██████████| 100/100 [00:01, 85.78it/s, lr=2.00e-06]


Epoch [570/800] test accuracy: 0.9578 time: 2.91


Sample: 100%|██████████| 100/100 [00:01, 73.83it/s, lr=2.00e-06]


Epoch [571/800] test accuracy: 0.9579 time: 2.84


Sample: 100%|██████████| 100/100 [00:01, 79.90it/s, lr=2.00e-06]


Epoch [572/800] test accuracy: 0.9580 time: 2.91


Sample: 100%|██████████| 100/100 [00:01, 71.77it/s, lr=2.00e-06]


Epoch [573/800] test accuracy: 0.9580 time: 3.90


Sample: 100%|██████████| 100/100 [00:01, 66.52it/s, lr=2.00e-06]


Epoch [574/800] test accuracy: 0.9579 time: 3.40


Sample: 100%|██████████| 100/100 [00:01, 66.94it/s, lr=2.00e-06]


Epoch [575/800] test accuracy: 0.9579 time: 2.82


Sample: 100%|██████████| 100/100 [00:01, 67.09it/s, lr=2.00e-06]


Epoch [576/800] test accuracy: 0.9581 time: 2.86


Sample: 100%|██████████| 100/100 [00:01, 78.37it/s, lr=2.00e-06]


Epoch [577/800] test accuracy: 0.9582 time: 2.99


Sample: 100%|██████████| 100/100 [00:01, 84.13it/s, lr=2.00e-06]


Epoch [578/800] test accuracy: 0.9584 time: 3.04


Sample: 100%|██████████| 100/100 [00:01, 60.12it/s, lr=2.00e-06]


Epoch [579/800] test accuracy: 0.9586 time: 3.13


Sample: 100%|██████████| 100/100 [00:01, 68.48it/s, lr=2.00e-06]


Epoch [580/800] test accuracy: 0.9586 time: 2.96


Sample: 100%|██████████| 100/100 [00:01, 68.83it/s, lr=2.00e-06]


Epoch [581/800] test accuracy: 0.9586 time: 3.07


Sample: 100%|██████████| 100/100 [00:01, 80.25it/s, lr=2.00e-06]


Epoch [582/800] test accuracy: 0.9587 time: 5.21


Sample: 100%|██████████| 100/100 [00:01, 61.73it/s, lr=2.00e-06]


Epoch [583/800] test accuracy: 0.9588 time: 3.48


Sample: 100%|██████████| 100/100 [00:01, 64.57it/s, lr=2.00e-06]


Epoch [584/800] test accuracy: 0.9589 time: 2.84


Sample: 100%|██████████| 100/100 [00:01, 68.22it/s, lr=2.00e-06]


Epoch [585/800] test accuracy: 0.9590 time: 2.86


Sample: 100%|██████████| 100/100 [00:01, 78.33it/s, lr=2.00e-06]


Epoch [586/800] test accuracy: 0.9590 time: 2.76


Sample: 100%|██████████| 100/100 [00:01, 82.90it/s, lr=2.00e-06]


Epoch [587/800] test accuracy: 0.9590 time: 2.80


Sample: 100%|██████████| 100/100 [00:01, 86.15it/s, lr=2.00e-06]


Epoch [588/800] test accuracy: 0.9590 time: 2.94


Sample: 100%|██████████| 100/100 [00:01, 78.68it/s, lr=2.00e-06]


Epoch [589/800] test accuracy: 0.9591 time: 2.76


Sample: 100%|██████████| 100/100 [00:01, 73.32it/s, lr=2.00e-06]


Epoch [590/800] test accuracy: 0.9591 time: 3.58


Sample: 100%|██████████| 100/100 [00:01, 66.57it/s, lr=2.00e-06]


Epoch [591/800] test accuracy: 0.9591 time: 2.80


Sample: 100%|██████████| 100/100 [00:03, 26.34it/s, lr=2.00e-06]


Epoch [592/800] test accuracy: 0.9593 time: 3.21


Sample: 100%|██████████| 100/100 [00:01, 57.89it/s, lr=2.00e-06]


Epoch [593/800] test accuracy: 0.9593 time: 2.86


Sample: 100%|██████████| 100/100 [00:01, 83.80it/s, lr=2.00e-06]


Epoch [594/800] test accuracy: 0.9594 time: 2.93


Sample: 100%|██████████| 100/100 [00:01, 73.19it/s, lr=2.00e-06]


Epoch [595/800] test accuracy: 0.9595 time: 2.86


Sample: 100%|██████████| 100/100 [00:01, 74.59it/s, lr=2.00e-06]


Epoch [596/800] test accuracy: 0.9596 time: 3.00


Sample: 100%|██████████| 100/100 [00:01, 68.10it/s, lr=2.00e-06]


Epoch [597/800] test accuracy: 0.9597 time: 2.89


Sample: 100%|██████████| 100/100 [00:01, 73.03it/s, lr=2.00e-06]


Epoch [598/800] test accuracy: 0.9599 time: 3.64


Sample: 100%|██████████| 100/100 [00:02, 46.37it/s, lr=2.00e-06]


Epoch [599/800] test accuracy: 0.9600 time: 3.81


Sample: 100%|██████████| 100/100 [00:01, 57.72it/s, lr=2.00e-06]


Epoch [600/800] test accuracy: 0.9600 time: 4.09


Sample: 100%|██████████| 100/100 [00:01, 62.40it/s, lr=2.00e-06]


Epoch [601/800] test accuracy: 0.9600 time: 3.51


Sample: 100%|██████████| 100/100 [00:02, 46.53it/s, lr=2.00e-06]


Epoch [602/800] test accuracy: 0.9602 time: 3.62


Sample: 100%|██████████| 100/100 [00:02, 38.95it/s, lr=2.00e-06]


Epoch [603/800] test accuracy: 0.9603 time: 3.43


Sample: 100%|██████████| 100/100 [00:02, 34.04it/s, lr=2.00e-06]


Epoch [604/800] test accuracy: 0.9604 time: 3.39


Sample: 100%|██████████| 100/100 [00:01, 54.60it/s, lr=2.00e-06]


Epoch [605/800] test accuracy: 0.9606 time: 2.97


Sample: 100%|██████████| 100/100 [00:01, 80.40it/s, lr=2.00e-06]


Epoch [606/800] test accuracy: 0.9607 time: 3.08


Sample: 100%|██████████| 100/100 [00:01, 80.03it/s, lr=2.00e-06]


Epoch [607/800] test accuracy: 0.9608 time: 3.00


Sample: 100%|██████████| 100/100 [00:01, 53.82it/s, lr=2.00e-06]


Epoch [608/800] test accuracy: 0.9608 time: 3.82


Sample: 100%|██████████| 100/100 [00:01, 57.17it/s, lr=2.00e-06]


Epoch [609/800] test accuracy: 0.9608 time: 3.37


Sample: 100%|██████████| 100/100 [00:01, 89.23it/s, lr=2.00e-06]


Epoch [610/800] test accuracy: 0.9608 time: 4.00


Sample: 100%|██████████| 100/100 [00:01, 54.99it/s, lr=2.00e-06]


Epoch [611/800] test accuracy: 0.9608 time: 3.76


Sample: 100%|██████████| 100/100 [00:01, 69.75it/s, lr=2.00e-06]


Epoch [612/800] test accuracy: 0.9609 time: 3.73


Sample: 100%|██████████| 100/100 [00:02, 46.98it/s, lr=2.00e-06]


Epoch [613/800] test accuracy: 0.9609 time: 3.16


Sample: 100%|██████████| 100/100 [00:01, 69.92it/s, lr=2.00e-06]


Epoch [614/800] test accuracy: 0.9610 time: 3.25


Sample: 100%|██████████| 100/100 [00:01, 86.13it/s, lr=2.00e-06]


Epoch [615/800] test accuracy: 0.9610 time: 2.83


Sample: 100%|██████████| 100/100 [00:01, 78.19it/s, lr=2.00e-06]


Epoch [616/800] test accuracy: 0.9611 time: 3.18


Sample: 100%|██████████| 100/100 [00:01, 79.88it/s, lr=2.00e-06]


Epoch [617/800] test accuracy: 0.9611 time: 3.33


Sample: 100%|██████████| 100/100 [00:01, 78.11it/s, lr=2.00e-06]


Epoch [618/800] test accuracy: 0.9613 time: 3.03


Sample: 100%|██████████| 100/100 [00:01, 82.36it/s, lr=2.00e-06]


Epoch [619/800] test accuracy: 0.9613 time: 2.82


Sample: 100%|██████████| 100/100 [00:01, 82.68it/s, lr=2.00e-06]


Epoch [620/800] test accuracy: 0.9614 time: 2.73


Sample: 100%|██████████| 100/100 [00:01, 86.07it/s, lr=2.00e-06]


Epoch [621/800] test accuracy: 0.9614 time: 2.88


Sample: 100%|██████████| 100/100 [00:01, 80.01it/s, lr=2.00e-06]


Epoch [622/800] test accuracy: 0.9614 time: 2.76


Sample: 100%|██████████| 100/100 [00:01, 83.85it/s, lr=2.00e-06]


Epoch [623/800] test accuracy: 0.9615 time: 2.84


Sample: 100%|██████████| 100/100 [00:01, 74.10it/s, lr=2.00e-06]


Epoch [624/800] test accuracy: 0.9617 time: 2.96


Sample: 100%|██████████| 100/100 [00:01, 74.80it/s, lr=2.00e-06]


Epoch [625/800] test accuracy: 0.9619 time: 2.90


Sample: 100%|██████████| 100/100 [00:01, 66.34it/s, lr=2.00e-06]


Epoch [626/800] test accuracy: 0.9619 time: 3.82


Sample: 100%|██████████| 100/100 [00:01, 76.43it/s, lr=2.00e-06]


Epoch [627/800] test accuracy: 0.9621 time: 2.82


Sample: 100%|██████████| 100/100 [00:02, 46.57it/s, lr=2.00e-06]


Epoch [628/800] test accuracy: 0.9621 time: 3.81


Sample: 100%|██████████| 100/100 [00:02, 45.97it/s, lr=2.00e-06]


Epoch [629/800] test accuracy: 0.9621 time: 3.90


Sample: 100%|██████████| 100/100 [00:02, 39.04it/s, lr=2.00e-06]


Epoch [630/800] test accuracy: 0.9622 time: 2.81


Sample: 100%|██████████| 100/100 [00:01, 70.04it/s, lr=2.00e-06]


Epoch [631/800] test accuracy: 0.9622 time: 3.15


Sample: 100%|██████████| 100/100 [00:02, 36.55it/s, lr=2.00e-06]


Epoch [632/800] test accuracy: 0.9623 time: 3.41


Sample: 100%|██████████| 100/100 [00:02, 44.29it/s, lr=2.00e-06]


Epoch [633/800] test accuracy: 0.9623 time: 3.68


Sample: 100%|██████████| 100/100 [00:01, 79.33it/s, lr=2.00e-06]


Epoch [634/800] test accuracy: 0.9624 time: 4.57


Sample: 100%|██████████| 100/100 [00:02, 43.02it/s, lr=2.00e-06]


Epoch [635/800] test accuracy: 0.9624 time: 3.23


Sample: 100%|██████████| 100/100 [00:01, 86.54it/s, lr=2.00e-06]


Epoch [636/800] test accuracy: 0.9624 time: 2.76


Sample: 100%|██████████| 100/100 [00:01, 88.80it/s, lr=2.00e-06]


Epoch [637/800] test accuracy: 0.9625 time: 3.04


Sample: 100%|██████████| 100/100 [00:01, 90.02it/s, lr=2.00e-06]


Epoch [638/800] test accuracy: 0.9629 time: 3.44


Sample: 100%|██████████| 100/100 [00:01, 71.68it/s, lr=2.00e-06]


Epoch [639/800] test accuracy: 0.9629 time: 3.80


Sample: 100%|██████████| 100/100 [00:01, 59.79it/s, lr=2.00e-06]


Epoch [640/800] test accuracy: 0.9629 time: 3.58


Sample: 100%|██████████| 100/100 [00:01, 73.58it/s, lr=2.00e-06]


Epoch [641/800] test accuracy: 0.9631 time: 2.74


Sample: 100%|██████████| 100/100 [00:01, 76.87it/s, lr=2.00e-06]


Epoch [642/800] test accuracy: 0.9633 time: 3.62


Sample: 100%|██████████| 100/100 [00:01, 52.43it/s, lr=2.00e-06]


Epoch [643/800] test accuracy: 0.9634 time: 3.47


Sample: 100%|██████████| 100/100 [00:01, 86.38it/s, lr=2.00e-06]


Epoch [644/800] test accuracy: 0.9634 time: 3.35


Sample: 100%|██████████| 100/100 [00:02, 49.11it/s, lr=2.00e-06]


Epoch [645/800] test accuracy: 0.9634 time: 3.59


Sample: 100%|██████████| 100/100 [00:01, 72.03it/s, lr=2.00e-06]


Epoch [646/800] test accuracy: 0.9634 time: 3.34


Sample: 100%|██████████| 100/100 [00:01, 72.38it/s, lr=2.00e-06]


Epoch [647/800] test accuracy: 0.9634 time: 3.08


Sample: 100%|██████████| 100/100 [00:01, 72.06it/s, lr=2.00e-06]


Epoch [648/800] test accuracy: 0.9635 time: 3.79


Sample: 100%|██████████| 100/100 [00:01, 63.27it/s, lr=2.00e-06]


Epoch [649/800] test accuracy: 0.9636 time: 3.81


Sample: 100%|██████████| 100/100 [00:02, 43.52it/s, lr=2.00e-06]


Epoch [650/800] test accuracy: 0.9638 time: 3.02


Sample: 100%|██████████| 100/100 [00:01, 63.27it/s, lr=2.00e-06]


Epoch [651/800] test accuracy: 0.9637 time: 2.92


Sample: 100%|██████████| 100/100 [00:01, 77.49it/s, lr=2.00e-06]


Epoch [652/800] test accuracy: 0.9637 time: 3.32


Sample: 100%|██████████| 100/100 [00:01, 60.53it/s, lr=2.00e-06]


Epoch [653/800] test accuracy: 0.9637 time: 3.04


Sample: 100%|██████████| 100/100 [00:01, 80.25it/s, lr=2.00e-06]


Epoch [654/800] test accuracy: 0.9639 time: 2.91


Sample: 100%|██████████| 100/100 [00:01, 80.94it/s, lr=2.00e-06]


Epoch [655/800] test accuracy: 0.9639 time: 2.84


Sample: 100%|██████████| 100/100 [00:01, 60.47it/s, lr=2.00e-06]


Epoch [656/800] test accuracy: 0.9641 time: 3.32


Sample: 100%|██████████| 100/100 [00:01, 70.66it/s, lr=2.00e-06]


Epoch [657/800] test accuracy: 0.9641 time: 2.92


Sample: 100%|██████████| 100/100 [00:01, 67.34it/s, lr=2.00e-06]


Epoch [658/800] test accuracy: 0.9641 time: 2.77


Sample: 100%|██████████| 100/100 [00:01, 83.99it/s, lr=2.00e-06]


Epoch [659/800] test accuracy: 0.9641 time: 2.91


Sample: 100%|██████████| 100/100 [00:01, 58.40it/s, lr=2.00e-06]


Epoch [660/800] test accuracy: 0.9641 time: 3.66


Sample: 100%|██████████| 100/100 [00:01, 67.34it/s, lr=2.00e-06]


Epoch [661/800] test accuracy: 0.9641 time: 3.12


Sample: 100%|██████████| 100/100 [00:01, 67.35it/s, lr=2.00e-06]


Epoch [662/800] test accuracy: 0.9643 time: 3.00


Sample: 100%|██████████| 100/100 [00:01, 75.68it/s, lr=2.00e-06]


Epoch [663/800] test accuracy: 0.9643 time: 3.23


Sample: 100%|██████████| 100/100 [00:01, 74.55it/s, lr=2.00e-06]


Epoch [664/800] test accuracy: 0.9643 time: 2.81


Sample: 100%|██████████| 100/100 [00:01, 81.60it/s, lr=2.00e-06]


Epoch [665/800] test accuracy: 0.9644 time: 2.77


Sample: 100%|██████████| 100/100 [00:01, 61.57it/s, lr=2.00e-06]


Epoch [666/800] test accuracy: 0.9645 time: 2.74


Sample: 100%|██████████| 100/100 [00:01, 61.86it/s, lr=2.00e-06]


Epoch [667/800] test accuracy: 0.9647 time: 2.90


Sample: 100%|██████████| 100/100 [00:01, 87.11it/s, lr=2.00e-06]


Epoch [668/800] test accuracy: 0.9648 time: 3.22


Sample: 100%|██████████| 100/100 [00:01, 68.05it/s, lr=2.00e-06]


Epoch [669/800] test accuracy: 0.9648 time: 3.53


Sample: 100%|██████████| 100/100 [00:02, 40.52it/s, lr=2.00e-06]


Epoch [670/800] test accuracy: 0.9648 time: 3.39


Sample: 100%|██████████| 100/100 [00:02, 46.45it/s, lr=2.00e-06]


Epoch [671/800] test accuracy: 0.9648 time: 3.25


Sample: 100%|██████████| 100/100 [00:01, 87.63it/s, lr=2.00e-06]


Epoch [672/800] test accuracy: 0.9649 time: 2.88


Sample: 100%|██████████| 100/100 [00:01, 80.77it/s, lr=2.00e-06]


Epoch [673/800] test accuracy: 0.9650 time: 2.92


Sample: 100%|██████████| 100/100 [00:01, 65.83it/s, lr=2.00e-06]


Epoch [674/800] test accuracy: 0.9650 time: 3.40


Sample: 100%|██████████| 100/100 [00:01, 67.60it/s, lr=2.00e-06]


Epoch [675/800] test accuracy: 0.9650 time: 3.06


Sample: 100%|██████████| 100/100 [00:01, 66.50it/s, lr=2.00e-06]


Epoch [676/800] test accuracy: 0.9652 time: 3.05


Sample: 100%|██████████| 100/100 [00:01, 72.30it/s, lr=2.00e-06]


Epoch [677/800] test accuracy: 0.9652 time: 3.46


Sample: 100%|██████████| 100/100 [00:01, 66.57it/s, lr=2.00e-06]


Epoch [678/800] test accuracy: 0.9652 time: 4.06


Sample: 100%|██████████| 100/100 [00:01, 81.84it/s, lr=2.00e-06]


Epoch [679/800] test accuracy: 0.9655 time: 2.73


Sample: 100%|██████████| 100/100 [00:01, 70.27it/s, lr=2.00e-06]


Epoch [680/800] test accuracy: 0.9654 time: 2.99


Sample: 100%|██████████| 100/100 [00:01, 67.38it/s, lr=2.00e-06]


Epoch [681/800] test accuracy: 0.9654 time: 2.90


Sample: 100%|██████████| 100/100 [00:01, 69.91it/s, lr=2.00e-06]


Epoch [682/800] test accuracy: 0.9654 time: 2.83


Sample: 100%|██████████| 100/100 [00:01, 73.77it/s, lr=2.00e-06]


Epoch [683/800] test accuracy: 0.9656 time: 2.81


Sample: 100%|██████████| 100/100 [00:01, 90.14it/s, lr=2.00e-06]


Epoch [684/800] test accuracy: 0.9659 time: 2.91


Sample: 100%|██████████| 100/100 [00:01, 67.84it/s, lr=2.00e-06]


Epoch [685/800] test accuracy: 0.9661 time: 2.79


Sample: 100%|██████████| 100/100 [00:01, 66.41it/s, lr=2.00e-06]


Epoch [686/800] test accuracy: 0.9661 time: 2.93


Sample: 100%|██████████| 100/100 [00:01, 73.61it/s, lr=2.00e-06]


Epoch [687/800] test accuracy: 0.9661 time: 3.84


Sample: 100%|██████████| 100/100 [00:01, 55.03it/s, lr=2.00e-06]


Epoch [688/800] test accuracy: 0.9661 time: 3.63


Sample: 100%|██████████| 100/100 [00:01, 68.05it/s, lr=2.00e-06]


Epoch [689/800] test accuracy: 0.9663 time: 3.08


Sample: 100%|██████████| 100/100 [00:01, 64.05it/s, lr=2.00e-06]


Epoch [690/800] test accuracy: 0.9663 time: 3.50


Sample: 100%|██████████| 100/100 [00:01, 73.65it/s, lr=2.00e-06]


Epoch [691/800] test accuracy: 0.9664 time: 3.49


Sample: 100%|██████████| 100/100 [00:01, 69.53it/s, lr=2.00e-06]


Epoch [692/800] test accuracy: 0.9664 time: 4.04


Sample: 100%|██████████| 100/100 [00:01, 77.76it/s, lr=2.00e-06]


Epoch [693/800] test accuracy: 0.9664 time: 3.56


Sample: 100%|██████████| 100/100 [00:01, 63.78it/s, lr=2.00e-06]


Epoch [694/800] test accuracy: 0.9664 time: 3.74


Sample: 100%|██████████| 100/100 [00:01, 58.85it/s, lr=2.00e-06]


Epoch [695/800] test accuracy: 0.9665 time: 3.48


Sample: 100%|██████████| 100/100 [00:01, 69.23it/s, lr=2.00e-06]


Epoch [696/800] test accuracy: 0.9665 time: 3.18


Sample: 100%|██████████| 100/100 [00:01, 69.07it/s, lr=2.00e-06]


Epoch [697/800] test accuracy: 0.9666 time: 2.80


Sample: 100%|██████████| 100/100 [00:01, 71.81it/s, lr=2.00e-06]


Epoch [698/800] test accuracy: 0.9667 time: 2.84


Sample: 100%|██████████| 100/100 [00:01, 60.97it/s, lr=2.00e-06]


Epoch [699/800] test accuracy: 0.9668 time: 2.96


Sample: 100%|██████████| 100/100 [00:01, 72.42it/s, lr=2.00e-06]


Epoch [700/800] test accuracy: 0.9668 time: 2.95


Sample: 100%|██████████| 100/100 [00:01, 64.86it/s, lr=2.00e-06]


Epoch [701/800] test accuracy: 0.9669 time: 2.99


Sample: 100%|██████████| 100/100 [00:01, 67.25it/s, lr=2.00e-06]


Epoch [702/800] test accuracy: 0.9669 time: 2.79


Sample: 100%|██████████| 100/100 [00:01, 73.05it/s, lr=2.00e-06]


Epoch [703/800] test accuracy: 0.9669 time: 2.97


Sample: 100%|██████████| 100/100 [00:01, 89.50it/s, lr=2.00e-06]


Epoch [704/800] test accuracy: 0.9669 time: 2.83


Sample: 100%|██████████| 100/100 [00:01, 65.03it/s, lr=2.00e-06]


Epoch [705/800] test accuracy: 0.9670 time: 4.30


Sample: 100%|██████████| 100/100 [00:01, 67.32it/s, lr=2.00e-06]


Epoch [706/800] test accuracy: 0.9671 time: 2.87


Sample: 100%|██████████| 100/100 [00:01, 69.25it/s, lr=2.00e-06]


Epoch [707/800] test accuracy: 0.9671 time: 2.79


Sample: 100%|██████████| 100/100 [00:01, 68.07it/s, lr=2.00e-06]


Epoch [708/800] test accuracy: 0.9671 time: 2.91


Sample: 100%|██████████| 100/100 [00:01, 89.99it/s, lr=2.00e-06]


Epoch [709/800] test accuracy: 0.9672 time: 2.88


Sample: 100%|██████████| 100/100 [00:01, 68.42it/s, lr=2.00e-06]


Epoch [710/800] test accuracy: 0.9673 time: 2.81


Sample: 100%|██████████| 100/100 [00:01, 63.64it/s, lr=2.00e-06]


Epoch [711/800] test accuracy: 0.9674 time: 3.04


Sample: 100%|██████████| 100/100 [00:01, 61.46it/s, lr=2.00e-06]


Epoch [712/800] test accuracy: 0.9674 time: 3.09


Sample: 100%|██████████| 100/100 [00:01, 69.67it/s, lr=2.00e-06]


Epoch [713/800] test accuracy: 0.9674 time: 2.91


Sample: 100%|██████████| 100/100 [00:01, 76.54it/s, lr=2.00e-06]


Epoch [714/800] test accuracy: 0.9674 time: 3.55


Sample: 100%|██████████| 100/100 [00:01, 86.78it/s, lr=2.00e-06]


Epoch [715/800] test accuracy: 0.9675 time: 3.48


Sample: 100%|██████████| 100/100 [00:01, 72.60it/s, lr=2.00e-06]


Epoch [716/800] test accuracy: 0.9675 time: 3.21


Sample: 100%|██████████| 100/100 [00:01, 70.52it/s, lr=2.00e-06]


Epoch [717/800] test accuracy: 0.9676 time: 2.87


Sample: 100%|██████████| 100/100 [00:01, 80.30it/s, lr=2.00e-06]


Epoch [718/800] test accuracy: 0.9676 time: 2.91


Sample: 100%|██████████| 100/100 [00:01, 69.24it/s, lr=2.00e-06]


Epoch [719/800] test accuracy: 0.9676 time: 2.90


Sample: 100%|██████████| 100/100 [00:01, 63.77it/s, lr=2.00e-06]


Epoch [720/800] test accuracy: 0.9676 time: 2.84


Sample: 100%|██████████| 100/100 [00:01, 63.16it/s, lr=2.00e-06]


Epoch [721/800] test accuracy: 0.9677 time: 2.99


Sample: 100%|██████████| 100/100 [00:01, 63.63it/s, lr=2.00e-06]


Epoch [722/800] test accuracy: 0.9678 time: 3.07


Sample: 100%|██████████| 100/100 [00:01, 66.91it/s, lr=2.00e-06]


Epoch [723/800] test accuracy: 0.9680 time: 2.89


Sample: 100%|██████████| 100/100 [00:01, 59.90it/s, lr=2.00e-06]


Epoch [724/800] test accuracy: 0.9680 time: 3.21


Sample: 100%|██████████| 100/100 [00:01, 62.05it/s, lr=2.00e-06]


Epoch [725/800] test accuracy: 0.9681 time: 3.16


Sample: 100%|██████████| 100/100 [00:01, 68.35it/s, lr=2.00e-06]


Epoch [726/800] test accuracy: 0.9681 time: 2.92


Sample: 100%|██████████| 100/100 [00:01, 85.65it/s, lr=2.00e-06]


Epoch [727/800] test accuracy: 0.9681 time: 3.01


Sample: 100%|██████████| 100/100 [00:01, 61.70it/s, lr=2.00e-06]


Epoch [728/800] test accuracy: 0.9681 time: 3.50


Sample: 100%|██████████| 100/100 [00:01, 66.45it/s, lr=2.00e-06]


Epoch [729/800] test accuracy: 0.9681 time: 3.14


Sample: 100%|██████████| 100/100 [00:01, 65.57it/s, lr=2.00e-06]


Epoch [730/800] test accuracy: 0.9682 time: 2.91


Sample: 100%|██████████| 100/100 [00:01, 73.22it/s, lr=2.00e-06]


Epoch [731/800] test accuracy: 0.9682 time: 2.80


Sample: 100%|██████████| 100/100 [00:01, 71.71it/s, lr=2.00e-06]


Epoch [732/800] test accuracy: 0.9682 time: 2.77


Sample: 100%|██████████| 100/100 [00:01, 85.24it/s, lr=2.00e-06]


Epoch [733/800] test accuracy: 0.9683 time: 3.35


Sample: 100%|██████████| 100/100 [00:01, 89.23it/s, lr=2.00e-06]


Epoch [734/800] test accuracy: 0.9682 time: 3.04


Sample: 100%|██████████| 100/100 [00:01, 90.15it/s, lr=2.00e-06]


Epoch [735/800] test accuracy: 0.9683 time: 2.89


Sample: 100%|██████████| 100/100 [00:01, 75.20it/s, lr=2.00e-06]


Epoch [736/800] test accuracy: 0.9685 time: 2.89


Sample: 100%|██████████| 100/100 [00:01, 72.23it/s, lr=2.00e-06]


Epoch [737/800] test accuracy: 0.9685 time: 3.50


Sample: 100%|██████████| 100/100 [00:01, 86.01it/s, lr=2.00e-06]


Epoch [738/800] test accuracy: 0.9687 time: 2.93


Sample: 100%|██████████| 100/100 [00:01, 85.40it/s, lr=2.00e-06]


Epoch [739/800] test accuracy: 0.9687 time: 2.93


Sample: 100%|██████████| 100/100 [00:01, 78.51it/s, lr=2.00e-06]


Epoch [740/800] test accuracy: 0.9687 time: 3.09


Sample: 100%|██████████| 100/100 [00:01, 72.63it/s, lr=2.00e-06]


Epoch [741/800] test accuracy: 0.9687 time: 3.31


Sample: 100%|██████████| 100/100 [00:01, 64.60it/s, lr=2.00e-06]


Epoch [742/800] test accuracy: 0.9688 time: 3.20


Sample: 100%|██████████| 100/100 [00:02, 48.27it/s, lr=2.00e-06]


Epoch [743/800] test accuracy: 0.9689 time: 3.73


Sample: 100%|██████████| 100/100 [00:01, 58.60it/s, lr=2.00e-06]


Epoch [744/800] test accuracy: 0.9691 time: 3.38


Sample: 100%|██████████| 100/100 [00:01, 82.94it/s, lr=2.00e-06]


Epoch [745/800] test accuracy: 0.9691 time: 2.90


Sample: 100%|██████████| 100/100 [00:01, 65.31it/s, lr=2.00e-06]


Epoch [746/800] test accuracy: 0.9691 time: 2.79


Sample: 100%|██████████| 100/100 [00:01, 70.42it/s, lr=2.00e-06]


Epoch [747/800] test accuracy: 0.9692 time: 3.18


Sample: 100%|██████████| 100/100 [00:01, 63.95it/s, lr=2.00e-06]


Epoch [748/800] test accuracy: 0.9692 time: 3.69


Sample: 100%|██████████| 100/100 [00:01, 89.37it/s, lr=2.00e-06]


Epoch [749/800] test accuracy: 0.9693 time: 3.03


Sample: 100%|██████████| 100/100 [00:01, 70.65it/s, lr=2.00e-06]


Epoch [750/800] test accuracy: 0.9693 time: 3.77


Sample: 100%|██████████| 100/100 [00:01, 68.67it/s, lr=2.00e-06]


Epoch [751/800] test accuracy: 0.9693 time: 3.84


Sample: 100%|██████████| 100/100 [00:01, 81.18it/s, lr=2.00e-06]


Epoch [752/800] test accuracy: 0.9693 time: 3.04


Sample: 100%|██████████| 100/100 [00:01, 63.70it/s, lr=2.00e-06]


Epoch [753/800] test accuracy: 0.9692 time: 3.39


Sample: 100%|██████████| 100/100 [00:01, 61.42it/s, lr=2.00e-06]


Epoch [754/800] test accuracy: 0.9693 time: 3.17


Sample: 100%|██████████| 100/100 [00:01, 59.45it/s, lr=2.00e-06]


Epoch [755/800] test accuracy: 0.9692 time: 2.83


Sample: 100%|██████████| 100/100 [00:01, 73.76it/s, lr=2.00e-06]


Epoch [756/800] test accuracy: 0.9692 time: 3.19


Sample: 100%|██████████| 100/100 [00:01, 64.27it/s, lr=2.00e-06]


Epoch [757/800] test accuracy: 0.9692 time: 2.82


Sample: 100%|██████████| 100/100 [00:01, 60.00it/s, lr=2.00e-06]


Epoch [758/800] test accuracy: 0.9692 time: 3.24


Sample: 100%|██████████| 100/100 [00:01, 64.39it/s, lr=2.00e-06]


Epoch [759/800] test accuracy: 0.9693 time: 3.45


Sample: 100%|██████████| 100/100 [00:01, 60.67it/s, lr=2.00e-06]


Epoch [760/800] test accuracy: 0.9692 time: 3.45


Sample: 100%|██████████| 100/100 [00:01, 59.05it/s, lr=2.00e-06]


Epoch [761/800] test accuracy: 0.9695 time: 3.47


Sample: 100%|██████████| 100/100 [00:02, 39.24it/s, lr=2.00e-06]


Epoch [762/800] test accuracy: 0.9695 time: 3.10


Sample: 100%|██████████| 100/100 [00:02, 42.32it/s, lr=2.00e-06]


Epoch [763/800] test accuracy: 0.9698 time: 3.90


Sample: 100%|██████████| 100/100 [00:01, 57.44it/s, lr=2.00e-06]


Epoch [764/800] test accuracy: 0.9701 time: 3.48


Sample: 100%|██████████| 100/100 [00:01, 77.10it/s, lr=2.00e-06]


Epoch [765/800] test accuracy: 0.9701 time: 3.22


Sample: 100%|██████████| 100/100 [00:01, 64.55it/s, lr=2.00e-06]


Epoch [766/800] test accuracy: 0.9701 time: 3.16


Sample: 100%|██████████| 100/100 [00:01, 66.89it/s, lr=2.00e-06]


Epoch [767/800] test accuracy: 0.9701 time: 3.03


Sample: 100%|██████████| 100/100 [00:01, 51.09it/s, lr=2.00e-06]


Epoch [768/800] test accuracy: 0.9702 time: 3.13


Sample: 100%|██████████| 100/100 [00:01, 54.68it/s, lr=2.00e-06]


Epoch [769/800] test accuracy: 0.9702 time: 3.74


Sample: 100%|██████████| 100/100 [00:01, 65.12it/s, lr=2.00e-06]


Epoch [770/800] test accuracy: 0.9703 time: 3.40


Sample: 100%|██████████| 100/100 [00:01, 66.27it/s, lr=2.00e-06]


Epoch [771/800] test accuracy: 0.9703 time: 3.64


Sample: 100%|██████████| 100/100 [00:01, 74.80it/s, lr=2.00e-06]


Epoch [772/800] test accuracy: 0.9704 time: 4.05


Sample: 100%|██████████| 100/100 [00:01, 71.04it/s, lr=2.00e-06]


Epoch [773/800] test accuracy: 0.9704 time: 3.39


Sample: 100%|██████████| 100/100 [00:01, 56.97it/s, lr=2.00e-06]


Epoch [774/800] test accuracy: 0.9705 time: 3.46


Sample: 100%|██████████| 100/100 [00:02, 48.59it/s, lr=2.00e-06]


Epoch [775/800] test accuracy: 0.9706 time: 2.97


Sample: 100%|██████████| 100/100 [00:01, 53.80it/s, lr=2.00e-06]


Epoch [776/800] test accuracy: 0.9706 time: 4.05


Sample: 100%|██████████| 100/100 [00:01, 83.50it/s, lr=2.00e-06]


Epoch [777/800] test accuracy: 0.9706 time: 2.86


Sample: 100%|██████████| 100/100 [00:01, 70.50it/s, lr=2.00e-06]


Epoch [778/800] test accuracy: 0.9706 time: 2.87


Sample: 100%|██████████| 100/100 [00:01, 74.44it/s, lr=2.00e-06]


Epoch [779/800] test accuracy: 0.9706 time: 2.90


Sample: 100%|██████████| 100/100 [00:01, 65.09it/s, lr=2.00e-06]


Epoch [780/800] test accuracy: 0.9707 time: 3.33


Sample: 100%|██████████| 100/100 [00:02, 41.63it/s, lr=2.00e-06]


Epoch [781/800] test accuracy: 0.9707 time: 3.82


Sample: 100%|██████████| 100/100 [00:01, 59.59it/s, lr=2.00e-06]


Epoch [782/800] test accuracy: 0.9708 time: 3.25


Sample: 100%|██████████| 100/100 [00:01, 68.86it/s, lr=2.00e-06]


Epoch [783/800] test accuracy: 0.9708 time: 3.48


Sample: 100%|██████████| 100/100 [00:01, 86.45it/s, lr=2.00e-06]


Epoch [784/800] test accuracy: 0.9709 time: 3.67


Sample: 100%|██████████| 100/100 [00:01, 67.25it/s, lr=2.00e-06]


Epoch [785/800] test accuracy: 0.9710 time: 3.16


Sample: 100%|██████████| 100/100 [00:01, 54.85it/s, lr=2.00e-06]


Epoch [786/800] test accuracy: 0.9712 time: 3.41


Sample: 100%|██████████| 100/100 [00:01, 65.82it/s, lr=2.00e-06]


Epoch [787/800] test accuracy: 0.9714 time: 3.79


Sample: 100%|██████████| 100/100 [00:02, 38.41it/s, lr=2.00e-06]


Epoch [788/800] test accuracy: 0.9714 time: 3.03


Sample: 100%|██████████| 100/100 [00:01, 71.17it/s, lr=2.00e-06]


Epoch [789/800] test accuracy: 0.9714 time: 2.97


Sample: 100%|██████████| 100/100 [00:01, 72.54it/s, lr=2.00e-06]


Epoch [790/800] test accuracy: 0.9714 time: 2.90


Sample: 100%|██████████| 100/100 [00:01, 81.89it/s, lr=2.00e-06]


Epoch [791/800] test accuracy: 0.9714 time: 2.83


Sample: 100%|██████████| 100/100 [00:01, 63.70it/s, lr=2.00e-06]


Epoch [792/800] test accuracy: 0.9715 time: 3.77


Sample: 100%|██████████| 100/100 [00:01, 64.32it/s, lr=2.00e-06]


Epoch [793/800] test accuracy: 0.9715 time: 3.66


Sample: 100%|██████████| 100/100 [00:01, 62.44it/s, lr=2.00e-06]


Epoch [794/800] test accuracy: 0.9715 time: 3.78


Sample: 100%|██████████| 100/100 [00:02, 44.04it/s, lr=2.00e-06]


Epoch [795/800] test accuracy: 0.9716 time: 3.38


Sample: 100%|██████████| 100/100 [00:01, 69.88it/s, lr=2.00e-06]


Epoch [796/800] test accuracy: 0.9717 time: 2.81


Sample: 100%|██████████| 100/100 [00:01, 69.07it/s, lr=2.00e-06]


Epoch [797/800] test accuracy: 0.9717 time: 2.99


Sample: 100%|██████████| 100/100 [00:01, 62.86it/s, lr=2.00e-06]


Epoch [798/800] test accuracy: 0.9717 time: 2.91


Sample: 100%|██████████| 100/100 [00:01, 62.86it/s, lr=2.00e-06]


Epoch [799/800] test accuracy: 0.9717 time: 3.14


Sample: 100%|██████████| 100/100 [00:01, 71.15it/s, lr=2.00e-06]


Epoch [800/800] test accuracy: 0.9717 time: 2.91


### Run SGLD

We run SGLD to sample approximately from the posterior distribution.

In [None]:
LR = 4e-5
LR_DECAY = False

if LR_DECAY:
    D = 0.25 # decay by 1/4
    B = (NUM_EPOCHS * D**2) / (1 - D**2)
    A = LR * np.sqrt((NUM_EPOCHS * D**2) / (1 - D**2))
    
NUM_STEPS = 1

pyro.clear_param_store()

bnn = BNN(28*28, HIDDEN_SIZE, 10)

sgld = SGLD(bnn,
            subsample_positions=[0, 1],
            batch_size=BATCH_SIZE,
            learning_rate=LR,
            num_steps=NUM_STEPS)

sgld_mcmc = MCMC(sgld, num_samples=len(train_dataset)//BATCH_SIZE, warmup_steps=0)

sgld_test_errs = []

# full posterior predictive 
full_predictive = torch.FloatTensor(10000, 10)
full_predictive.zero_()

for epoch in range(1, 1+NUM_EPOCHS + WARMUP_EPOCHS):   
    sgld_mcmc.run(X_train, Y_train)
    
    if epoch >= WARMUP_EPOCHS:
        if LR_DECAY:
            LR = A / np.sqrt((B + (epoch-1)))
            sgld_mcmc.kernel.learning_rate = LR
        
        start = time.time()
        sgld_samples = sgld_mcmc.get_samples()
        predictive = pyro.infer.Predictive(bnn, posterior_samples=sgld_samples)
        
        with torch.no_grad():
            epoch_predictive = None
            for x, y in val_loader:
                if epoch_predictive is None:
                    epoch_predictive = predictive(x)['obs'].to(torch.int64)
                else:
                    epoch_predictive = torch.cat((epoch_predictive, predictive(x)['obs'].to(torch.int64)), dim=1)
            
            for sample in epoch_predictive:
                predictive_one_hot = F.one_hot(sample, num_classes=10)
                if LR_DECAY:
                    predictive_one_hot = predictive_one_hot * LR
                full_predictive = full_predictive + predictive_one_hot
                
            full_y_hat = torch.argmax(full_predictive, dim=1)
            total = Y_val.shape[0]
            correct = int((full_y_hat == Y_val).sum())
            
        end = time.time()
        
        sgld_test_errs.append(1.0 - correct/total)

        print("Epoch [{}/{}] test accuracy: {:.4f} time: {:.2f}".format(epoch-WARMUP_EPOCHS, NUM_EPOCHS, correct/total, end - start))
        
        # Epoch [800/800] test accuracy: 0.9563 time: 4.31

# Save the errors to a file
with open(RESULTS_SGLD, "w") as f:
    json.dump(sgld_test_errs, f)

### Run SGD

We run SGD to optimise the weights of the BNN and we take a point estimate which is the most recent sample to be our "best" parameters

In [None]:
LR = 1e-5
WEIGHT_DECAY=0.0
WITH_MOMENTUM=False
REGULARIZATION_TERM=1.

pyro.clear_param_store()

bnn = BNN(28*28, HIDDEN_SIZE, 10, prec=REGULARIZATION_TERM)

sgd = SGD(bnn,
          subsample_positions=[0, 1],
          batch_size=BATCH_SIZE,
          learning_rate=LR,
          weight_decay=WEIGHT_DECAY,
          with_momentum=WITH_MOMENTUM)

sgd_mcmc = MCMC(sgd, num_samples=len(train_dataset)//BATCH_SIZE, warmup_steps=0)

sgd_test_errs = []

for epoch in range(1, 1+NUM_EPOCHS+WARMUP_EPOCHS):
    sgd_mcmc.run(X_train, Y_train)
        
    if epoch >= WARMUP_EPOCHS:
        
        sgd_samples = sgd_mcmc.get_samples()
        point_estimate = {site : sgd_samples[site][-1, :].unsqueeze(0) for site in sgd_samples.keys()}
        predictive = pyro.infer.Predictive(bnn, posterior_samples=point_estimate)
        start = time.time()
        
        with torch.no_grad():
            total = 0
            correct = 0
            for x, y in val_loader:
                batch_predictive = predictive(x)['obs']
                batch_y_hat = batch_predictive.mode(0)[0]
                total += y.shape[0]
                correct += int((batch_y_hat == y).sum())
            
        end = time.time()
        
        sgd_test_errs.append(1.0 - correct/total)

        print("Epoch [{}/{}] test accuracy: {:.4f} time: {:.2f}".format(epoch-WARMUP_EPOCHS, NUM_EPOCHS, correct/total, end - start))
        
        # Epoch [800/800] test accuracy: 0.9565 time: 0.34

# Save the errors to a file
with open(RESULTS_SGD, "w") as f:
    json.dump(sgd_test_errs, f)

### Run SGD with momentum

We run SGD with momentum to optimise the weights of the BNN and we take a point estimate which is the most recent sample to be our "best" parameters

In [None]:
LR = 1e-6
WEIGHT_DECAY=0.0
WITH_MOMENTUM=True
MOMENTUM_DECAY=0.01
REGULARIZATION_TERM=1.

pyro.clear_param_store()

bnn = BNN(28*28, HIDDEN_SIZE, 10, prec=REGULARIZATION_TERM)

sgdmom = SGD(bnn,
             subsample_positions=[0, 1],
             batch_size=BATCH_SIZE,
             learning_rate=LR,
             weight_decay=WEIGHT_DECAY,
             with_momentum=WITH_MOMENTUM,
             momentum_decay=MOMENTUM_DECAY)

sgdmom_mcmc = MCMC(sgdmom, num_samples=len(train_dataset)//BATCH_SIZE, warmup_steps=0)

sgdmom_test_errs = []

for epoch in range(1, 1+NUM_EPOCHS+WARMUP_EPOCHS):
    sgdmom_mcmc.run(X_train, Y_train)
        
    if epoch >= WARMUP_EPOCHS:
        
        sgdmom_samples = sgdmom_mcmc.get_samples()
        point_estimate = {site : sgdmom_samples[site][-1, :].unsqueeze(0) for site in sgdmom_samples.keys()}
        predictive = pyro.infer.Predictive(bnn, posterior_samples=point_estimate)
        start = time.time()
        
        with torch.no_grad():
            total = 0
            correct = 0
            for x, y in val_loader:
                batch_predictive = predictive(x)['obs']
                batch_y_hat = batch_predictive.mode(0)[0]
                total += y.shape[0]
                correct += int((batch_y_hat == y).sum())
            
        end = time.time()
        
        sgdmom_test_errs.append(1.0 - correct/total)

        print("Epoch [{}/{}] test accuracy: {:.4f} time: {:.2f}".format(epoch-WARMUP_EPOCHS, NUM_EPOCHS, correct/total, end - start))
        
        # Epoch [800/800] test accuracy: 0.9663 time: 0.26

# Save the errors to a file
with open(RESULTS_SGDMOM, "w") as f:
    json.dump(sgdmom_test_errs, f)

### Plot the convergence dynamics

In [None]:
sns.set_style("dark")

# Load the previous results from the files
with open(RESULTS_SGHMC, "r") as f:
    sghmc_test_errs = json.load(f)
with open(RESULTS_SGLD, "r") as f:
    sgld_test_errs = json.load(f)
with open(RESULTS_SGD, "r") as f:
    sgd_test_errs = json.load(f)
with open(RESULTS_SGDMOM, "r") as f:
    sgdmom_test_errs = json.load(f)
    
sghmc_test_errs = np.array(sghmc_test_errs)
sgld_test_errs = np.array(sgld_test_errs)
sgd_test_errs = np.array(sgd_test_errs)
sgdmom_test_errs = np.array(sgdmom_test_errs)

err_dict = {'SGHMC' : sghmc_test_errs, 'SGLD' : sgld_test_errs, 'SGD' : sgd_test_errs, 'SGD with momentum' : sgdmom_test_errs}
x = np.arange(1, NUM_EPOCHS+1)
lst = []
for i in range(len(x)):
    for updater in err_dict.keys():
        lst.append([x[i], updater, err_dict[updater][i]])

df = pd.DataFrame(lst, columns=['iterations', 'updater','test error'])
sns.lineplot(data=df.pivot("iterations", "updater", "test error"))
plt.ylabel("test error")
plt.show() #dpi=300

### Stochastic Gradient NUTS 
*experimental doesn't quite work yet*

In [None]:
LR = 2e-6
MOMENTUM_DECAY = 0.01
RESAMPLE_EVERY_N = 0
NUM_STEPS = 1

WARMUP_EPOCHS = 5 

pyro.clear_param_store()

bnn = BNN(28*28, HIDDEN_SIZE, 10)

sgnuts = SGNUTS(bnn, 
                subsample_positions=[0, 1],
                batch_size=BATCH_SIZE,
                learning_rate=LR, 
                momentum_decay=MOMENTUM_DECAY,
                resample_every_n=RESAMPLE_EVERY_N, 
                obs_info_noise=False, 
                use_multinomial_sampling=True,
                max_tree_depth=10)

# do warm up
sgnuts_mcmc = MCMC(sgnuts, num_samples=len(train_dataset)//BATCH_SIZE, warmup_steps=0)

# full posterior predictive 
full_predictive = torch.FloatTensor(10000, 10)
full_predictive.zero_()

for epoch in range(1, 1+NUM_EPOCHS + WARMUP_EPOCHS):
    sgnuts_mcmc.run(X_train, Y_train)
    
    if epoch >= WARMUP_EPOCHS:
        
        sgnuts_samples = sgnuts_mcmc.get_samples()
        predictive = pyro.infer.Predictive(bnn, posterior_samples=sgnuts_samples)
        start = time.time()
        
        with torch.no_grad():
            epoch_predictive = None
            for x, y in val_loader:
                if epoch_predictive is None:
                    epoch_predictive = predictive(x)['obs'].to(torch.int64)
                else:
                    epoch_predictive = torch.cat((epoch_predictive, predictive(x)['obs'].to(torch.int64)), dim=1)
                    
            for sample in epoch_predictive:
                predictive_one_hot = F.one_hot(sample, num_classes=10)
                full_predictive = full_predictive + predictive_one_hot
                
            full_y_hat = torch.argmax(full_predictive, dim=1)
            total = Y_val.shape[0]
            correct = int((full_y_hat == Y_val).sum())
            
        end = time.time()

        print("Epoch [{}/{}] test accuracy: {:.4f} time: {:.2f}".format(epoch-WARMUP_EPOCHS, NUM_EPOCH, correct/total, end - start))