## Environment Setup

In [20]:
import sys
import os
import datetime
import ipywidgets as widgets
import torch

print('Python %s on %s' % (sys.version, sys.platform))

package_path = os.path.abspath(os.path.join(os.path.expanduser(os.getcwd()), os.pardir))
print(package_path)

"""
Adding the path to the neuralmagic-pytorch extension to the path so it isn't necessary to have it installed
"""
sys.path.extend([package_path])

print('Added current package path to sys.path')
print('Be sure to install from requirements.txt and pytorch separately')


Python 3.7.4 (default, Aug 13 2019, 20:35:49) 
[GCC 7.3.0] on linux
/home/dan/oldcode/neuralmagicml-pytorch
Added current package path to sys.path
Be sure to install from requirements.txt and pytorch separately


## Dataset

In [21]:
print('\nEnter the local path where the dataset can be found')

dataset_text = widgets.Text(value='/home/dan/Datasets/imagenette/', placeholder='Enter local path to dataset', description='Dataset Path')
display(dataset_text)

print('\nChoose the batch size to run through the model during train and test runs')
print('(be sure to press enter if/after inputting manually)')
train_batch_size_slider = widgets.IntSlider(
    value=256, min=1, max=1024, step=1, description='Train Batch Size:'
)
display(train_batch_size_slider)
test_batch_size_slider = widgets.IntSlider(
    value=256 if torch.cuda.is_available() else 1, min=1, max=1024, step=1, description='Test Batch Size:'
)
display(test_batch_size_slider)



Enter the local path where the dataset can be found


Text(value='/home/dan/Datasets/imagenette/', description='Dataset Path', placeholder='Enter local path to data…


Choose the batch size to run through the model during train and test runs
(be sure to press enter if/after inputting manually)


IntSlider(value=256, description='Train Batch Size:', max=1024, min=1)

IntSlider(value=256, description='Test Batch Size:', max=1024, min=1)

In [22]:
from neuralmagicML.datasets import ImagenetteDataset, ImageNetDataset, EarlyStopDataset
from torch.utils.data import Dataset, DataLoader

dataset_root = os.path.abspath(os.path.expanduser(dataset_text.value))
print('\nLoading dataset from {}'.format(dataset_root))

if not os.path.exists(dataset_root):
    raise Exception('Folder must exist for dataset at {}'.format(dataset_root))
    
train_batch_size = train_batch_size_slider.value
test_batch_size = test_batch_size_slider.value

print('\nUsing train batch size of {} and test batch size of {}\n'
      .format(train_batch_size, test_batch_size))

train_dataset = ImagenetteDataset(dataset_root, train=True, rand_trans=True)
train_data_loader = DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True, num_workers=4)
print('train dataset created: \n{}\n'.format(train_dataset))

val_dataset = ImagenetteDataset(dataset_root, train=False, rand_trans=False)
val_data_loader = DataLoader(val_dataset, batch_size=test_batch_size, shuffle=False, num_workers=4)
print('validation test dataset created: \n{}\n'.format(val_dataset))

train_test_dataset = EarlyStopDataset(ImagenetteDataset(dataset_root, train=True, rand_trans=False),
                                      early_stop=len(val_dataset) if len(val_dataset) > 1000 else round(0.1 * len(train_dataset)))
train_test_data_loader = DataLoader(train_test_dataset, batch_size=test_batch_size, shuffle=False, num_workers=4)
print('train test dataset created: \n{}\n'.format(train_test_dataset))



Loading dataset from /home/dan/Datasets/imagenette

Using train batch size of 256 and test batch size of 256

already downloaded imagenette of size ImagenetteSize.s160
train dataset created: 
Dataset ImagenetteDataset
    Number of datapoints: 12894
    Root location: /home/dan/Datasets/imagenette/imagenette-160/train

already downloaded imagenette of size ImagenetteSize.s160
validation test dataset created: 
Dataset ImagenetteDataset
    Number of datapoints: 500
    Root location: /home/dan/Datasets/imagenette/imagenette-160/val

already downloaded imagenette of size ImagenetteSize.s160
train test dataset created: 
Dataset ImagenetteDataset
    Number of datapoints: 1289
    Root location: /home/dan/Datasets/imagenette/imagenette-160/train



## Model Setup

In [23]:
from neuralmagicML.models import resnet18

num_classes = 10 if isinstance(train_dataset, ImagenetteDataset) else 1000
#pretrained = 'imagenette/dense' if isinstance(train_dataset, ImagenetteDataset) else True
pretrained = True
model = resnet18(num_classes=num_classes, pretrained=pretrained)
model_id = '{}-{}'.format(model.__class__.__name__,
                          datetime.datetime.today().strftime('%Y-%m-%d-%H:%M:%S')
                              .replace('-', '.').replace(':', '.'))
print('Created model {}'.format(model.__class__.__name__))

print('\nSet the model id')
model_id_text = widgets.Text(
    value=model_id
)
display(model_id_text)

print('\nChoose the device to run on')
device_choice = widgets.ToggleButtons(
    options=['cuda', 'cpu'] if torch.cuda.is_available() else ['cpu'],
    description='Device'
)
display(device_choice)


Created model ResNet

Set the model id


Text(value='ResNet-2019.08.23.16.49.21')


Choose the device to run on


ToggleButtons(description='Device', options=('cuda', 'cpu'), value='cuda')

## Hyperparams

In [24]:
model_id = model_id_text.value
device = device_choice.value

model = model.to(device)

print('\nchoose which tensors to regularize: the inputs or outputs to each conv layer')
reg_tensor_choice = widgets.ToggleButtons(
    options=['inp', 'out'],
    description='reg tens'
)
display(reg_tensor_choice)

print('\nchoose which regularization function to use: l1, l2, relu for the tensors')
reg_func_choice = widgets.ToggleButtons(
    options=['l1', 'l2', 'relu']
)
display(reg_func_choice)

print('\nchoose the alpha value to use for regularization of the activation values')
alpha_slider = widgets.FloatLogSlider(
    value=0.000018, min=-9, max=-1, step=0.0001, description='alpha'
)
display(alpha_slider)



choose which tensors to regularize: the inputs or outputs to each conv layer


ToggleButtons(description='reg tens', options=('inp', 'out'), value='inp')


choose which regularization function to use: l1, l2, relu for the tensors


ToggleButtons(options=('l1', 'l2', 'relu'), value='l1')


choose the alpha value to use for regularization of the activation values


FloatLogSlider(value=1.8e-05, description='alpha', max=-1.0, min=-9.0, step=0.0001)

In [25]:
from neuralmagicML.utils import lr_analysis, lr_analysis_figure, CrossEntropyLossWrapper
%matplotlib inline
import matplotlib.pyplot as plt

### optimizer definitions
momentum = 0.9
weight_decay = 1e-4
###

# print('\nrunning learning rate analysis...')
# batches_per_sample = round(500 / train_batch_size)  # make sure we have enough sample points per learning rate
# analysis = lr_analysis(model, device, train_data_loader, CrossEntropyLossWrapper(), batches_per_sample,
#                        init_lr=1e-7, final_lr=1e0, sgd_momentum=momentum, sgd_weight_decay=weight_decay)
# lr_analysis_figure(analysis)
# plt.show()

print('\nselect the initial learning rate')
lr_slider = widgets.FloatLogSlider(
    value=0.005, min=-7, max=1, step=0.0001, description='init lr'
)
display(lr_slider)

print('\nselect the number of epochs to train for')
finalize_epochs_text = widgets.IntText(value=90, description='num epochs')
display(finalize_epochs_text)

print('\nselect the final learning rate')
lr_final_slider = widgets.FloatLogSlider(
    value=0.0001, min=-7, max=1, step=0.0001, description='final lr'
)
display(lr_final_slider)

print('\nselect the number of exponential updates to apply to the learning rate over the epochs')
lr_updates_slider = widgets.IntText(value=2, description='lr updates')
display(lr_updates_slider)


select the initial learning rate


FloatLogSlider(value=0.005, description='init lr', max=1.0, min=-7.0, step=0.0001)


select the number of epochs to train for


IntText(value=90, description='num epochs')


select the final learning rate


FloatLogSlider(value=0.0001, description='final lr', max=1.0, min=-7.0, step=0.0001)


select the number of exponential updates to apply to the learning rate over the epochs


IntText(value=2, description='lr updates')

## Setup

In [26]:
from neuralmagicML.sparsity import (
    ASAnalyzerLayer, ASAnalyzerModule, ASRegModifier, LearningRateModifier,
    ScheduledModifierManager, ScheduledOptimizer
)
from neuralmagicML.utils import TopKAccuracy
from tensorboardX import SummaryWriter
from torch.nn.modules.conv import _ConvNd
from torch.nn import Linear
from torch.nn import ReLU
from torch import optim

reg_tens = reg_tensor_choice.value
reg_func = reg_func_choice.value
alpha = alpha_slider.value

print('using AS reg params of reg_tens:{} reg_func:{} and alpha:{}'
     .format(reg_tens, reg_func, alpha))

lr_init = lr_slider.value
lr_final = lr_final_slider.value
epochs = finalize_epochs_text.value
lr_updates = lr_updates_slider.value
lr_update_freq = epochs / (lr_updates + 1.0)
lr_gamma = (lr_final / lr_init) ** (1 / lr_updates)
print('using lr params of init:{} final:{} epochs:{} updates:{} update_freq:{} gamma:{}'
      .format(lr_init, lr_final, epochs, lr_updates, lr_update_freq, lr_gamma))

lr_modifier = LearningRateModifier(lr_class='ExponentialLR', lr_kwargs={'gamma': lr_gamma},
                                   start_epoch=0.0, end_epoch=epochs,
                                   update_frequency=lr_update_freq)
modify_layers = [name for name, mod in model.named_modules() if isinstance(mod, ReLU)]

# remove the first conv if we are working on the input to each conv
if reg_tens == 'inp':
    modify_layers = modify_layers[1:]
# remove the last conv if we are working on the output from each conv
elif reg_tens == 'out':
    modify_layers = modify_layers[:-1]

as_reg_modifier = ASRegModifier(modify_layers, alpha, reg_func, reg_tens, start_epoch=0.0)

modifier_manager = ScheduledModifierManager([lr_modifier, as_reg_modifier])
print('\nCreated ScheduledModifierManager with exponential lr_modifier with gamma {} and AS reg modifier'
      .format(lr_gamma))

optimizer = optim.SGD(model.parameters(), lr_slider.value, momentum=momentum,
                      weight_decay=weight_decay, nesterov=True)
optimizer = ScheduledOptimizer(optimizer, model, modifier_manager, steps_per_epoch=len(train_dataset))
print('\nCreated scheudled optimizer with initial lr: {}, momentum: {}, weight decay: {}'
      .format(lr_slider.value, momentum, weight_decay))

loss = CrossEntropyLossWrapper(extras={'top1acc': TopKAccuracy(1)})
print('\nCreated loss wrapper\n{}'.format(loss))

logs_dir = os.path.abspath(os.path.expanduser(os.path.join('.', 'model_training_logs', model_id)))

if not os.path.exists(logs_dir):
    os.makedirs(logs_dir)

writer = SummaryWriter(logdir=logs_dir, comment='imagenette training')
print('\nCreated summary writer logging to \n{}'.format(logs_dir))


using AS reg params of reg_tens:out reg_func:l1 and alpha:1.8e-05
using lr params of init:0.005 final:0.0001 epochs:90 updates:2 update_freq:30.0 gamma:0.1414213562373095

Created ScheduledModifierManager with exponential lr_modifier with gamma 0.1414213562373095 and AS reg modifier

Created scheudled optimizer with initial lr: 0.005, momentum: 0.9, weight decay: 0.0001

Created loss wrapper
CrossEntropyLossWrapper(Loss: cross_entropy; Extras: TopKAccuracy)

Created summary writer logging to 
/home/dan/oldcode/neuralmagicml-pytorch/notebooks/model_training_logs/ResNet-2019.08.23.16.49.21-relu-out


## Training

In [None]:
from tqdm import tqdm
import math
from neuralmagicML.models import save_model


def test_epoch(model, data_loader, loss, device, epoch):
    model.eval()
    results = {}
    
    with torch.no_grad():
        for batch, (*x_feature, y_lab) in enumerate(tqdm(data_loader)):
            y_lab = y_lab.to(device)
            x_feature = tuple([dat.to(device) for dat in x_feature])
            batch_size = y_lab.shape[0]
            y_pred = model(*x_feature)
            losses = loss(x_feature, y_lab, y_pred)
            
            for key, val in losses.items():
                if key not in results:
                    results[key] = []
                result = val.detach_().cpu()
                result = result.repeat(batch_size)
                results[key].append(result)
                
    return results

def test_epoch_writer(model, data_loader, loss, device, epoch, writer, key):
    losses = test_epoch(model, data_loader, loss, device, epoch)
    
    for loss, values in losses.items():
        val = torch.mean(torch.cat(values))
        writer.add_scalar(key.format(loss), val, epoch)
        print('{}: {}'.format(loss, val))
        
def test_as_values(as_model, data_loader, device, epoch, writer, sample_size=1000):
    as_model.eval()
    as_model.clear_layers()
    as_model.enable_layers()
    sample_count = 0
    
    with torch.no_grad():
        for batch, (*x_feature, y_lab) in enumerate(tqdm(data_loader)):
            y_lab = y_lab.to(device)
            x_feature = tuple([dat.to(device) for dat in x_feature])
            batch_size = y_lab.shape[0]
            y_pred = model(*x_feature)
            sample_count += batch_size
            
            if sample_count >= sample_size:
                break
        
    as_model.disable_layers()
    
    for name, layer in as_model.layers.items():
        writer.add_scalar('Act Sparsity/{}'.format(name), layer.inputs_sparsity_mean, epoch)
    
    as_model.clear_layers()
            

def train_epoch(model, data_loader, optimizer, loss, device, epoch, writer):
    model.train()
    init_batch_size = None
    batches_per_epoch = len(data_loader)
    
    for batch, (*x_feature, y_lab) in enumerate(tqdm(data_loader)):
        y_lab = y_lab.to(device)
        x_feature = tuple([dat.to(device) for dat in x_feature])
        batch_size = y_lab.shape[0]
        if init_batch_size is None:
            init_batch_size = batch_size
        optimizer.zero_grad()
        y_pred = model(*x_feature)
        losses = loss(x_feature, y_lab, y_pred)
        losses['loss'] = optimizer.loss_update(losses['loss']) # update loss with the AS modifier regularization
        losses['loss'].backward()
        optimizer.step(closure=None)
        
        step_count = init_batch_size * (epoch * batches_per_epoch + batch)
        for _loss, _value in losses.items():
            writer.add_scalar('Train/{}'.format(_loss), _value.item(), step_count)
            writer.add_scalar('Train/Learning Rate', optimizer.learning_rate, step_count)
            
print('Training model...')

analyzer_model = ASAnalyzerModule(
    model, [ASAnalyzerLayer(name, division=0, track_inputs_sparsity=True)
            for name, mod in model.named_modules() if isinstance(mod, _ConvNd) or isinstance(mod, Linear)]
)
print('\nCreated AS analyzer module')

print('Running initial validation values for later comparison')
test_epoch_writer(model, val_data_loader, loss, device, -1, writer, 'Test/Validation/{}')
test_as_values(analyzer_model, val_data_loader, device, -1, writer)
            
for epoch in tqdm(range(math.ceil(modifier_manager.max_epochs))):
    print('Starting epoch {}'.format(epoch))
    optimizer.epoch_start()
    train_epoch(model, train_data_loader, optimizer, loss, device, epoch, writer)
    
    print('Completed training for epoch {}, testing validation dataset'.format(epoch))
    test_epoch_writer(model, val_data_loader, loss, device, epoch, writer, 'Test/Validation/{}')
    
    print('Completed testing validation dataset for epoch {}, testing training dataset'.format(epoch))
    test_epoch_writer(model, train_test_data_loader, loss, device, epoch, writer, 'Test/Training/{}')
    
    print('Completed testing validation dataset for epoch {}, testing activation sparsity'.format(epoch))
    test_as_values(analyzer_model, val_data_loader, device, epoch, writer)
        
    optimizer.epoch_end()
    
save_path = os.path.abspath(os.path.expanduser(os.path.join('.', '{}.pth'.format(model_id))))
print('Finished training, saving model to {}'.format(save_path))
save_model(save_path, model, optimizer, epoch)
print('Saved model')










  0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A

Training model...

Created AS analyzer module
Running initial validation values for later comparison










 50%|█████     | 1/2 [00:00<00:00,  1.24it/s][A[A[A[A[A[A[A[A







100%|██████████| 2/2 [00:00<00:00,  2.04it/s][A[A[A[A[A[A[A[A







  0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A

loss: 2.3126208782196045
top1acc: 12.200007438659668










 50%|█████     | 1/2 [00:00<00:00,  1.40it/s][A[A[A[A[A[A[A[A







100%|██████████| 2/2 [00:00<00:00,  1.86it/s][A[A[A[A[A[A[A[A







  0%|          | 0/90 [00:00<?, ?it/s][A[A[A[A[A[A[A[A








  0%|          | 0/51 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

Starting epoch 0











  2%|▏         | 1/51 [00:00<00:48,  1.03it/s][A[A[A[A[A[A[A[A[A








  4%|▍         | 2/51 [00:01<00:38,  1.26it/s][A[A[A[A[A[A[A[A[A








  6%|▌         | 3/51 [00:01<00:30,  1.57it/s][A[A[A[A[A[A[A[A[A








  8%|▊         | 4/51 [00:01<00:25,  1.87it/s][A[A[A[A[A[A[A[A[A








 10%|▉         | 5/51 [00:02<00:22,  2.06it/s][A[A[A[A[A[A[A[A[A








 12%|█▏        | 6/51 [00:02<00:18,  2.39it/s][A[A[A[A[A[A[A[A[A








 14%|█▎        | 7/51 [00:02<00:16,  2.61it/s][A[A[A[A[A[A[A[A[A








 16%|█▌        | 8/51 [00:03<00:16,  2.64it/s][A[A[A[A[A[A[A[A[A








 18%|█▊        | 9/51 [00:03<00:14,  2.89it/s][A[A[A[A[A[A[A[A[A








 20%|█▉        | 10/51 [00:03<00:13,  3.02it/s][A[A[A[A[A[A[A[A[A








 22%|██▏       | 11/51 [00:04<00:13,  2.93it/s][A[A[A[A[A[A[A[A[A








 24%|██▎       | 12/51 [00:04<00:12,  3.02it/s][A[A[A[A[A[A[A[A[A








 25%

Completed training for epoch 0, testing validation dataset











 50%|█████     | 1/2 [00:01<00:01,  1.05s/it][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:01<00:00,  1.29it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.10458210110664368
top1acc: 96.60005187988281
Completed testing validation dataset for epoch 0, testing training dataset











 17%|█▋        | 1/6 [00:00<00:03,  1.25it/s][A[A[A[A[A[A[A[A[A








 50%|█████     | 3/6 [00:00<00:01,  1.70it/s][A[A[A[A[A[A[A[A[A








 67%|██████▋   | 4/6 [00:01<00:00,  2.23it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 6/6 [00:01<00:00,  4.72it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.10183872282505035
top1acc: 96.66407775878906
Completed testing validation dataset for epoch 0, testing activation sparsity











 50%|█████     | 1/2 [00:00<00:00,  1.36it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  1.80it/s][A[A[A[A[A[A[A[A[A







  1%|          | 1/90 [00:20<30:11, 20.35s/it][A[A[A[A[A[A[A[A








  0%|          | 0/51 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

Starting epoch 1











  2%|▏         | 1/51 [00:01<00:56,  1.12s/it][A[A[A[A[A[A[A[A[A








  4%|▍         | 2/51 [00:01<00:42,  1.16it/s][A[A[A[A[A[A[A[A[A








  6%|▌         | 3/51 [00:01<00:33,  1.43it/s][A[A[A[A[A[A[A[A[A








  8%|▊         | 4/51 [00:02<00:28,  1.68it/s][A[A[A[A[A[A[A[A[A








 10%|▉         | 5/51 [00:02<00:23,  1.94it/s][A[A[A[A[A[A[A[A[A








 12%|█▏        | 6/51 [00:02<00:21,  2.12it/s][A[A[A[A[A[A[A[A[A








 14%|█▎        | 7/51 [00:03<00:18,  2.41it/s][A[A[A[A[A[A[A[A[A








 16%|█▌        | 8/51 [00:03<00:16,  2.56it/s][A[A[A[A[A[A[A[A[A








 18%|█▊        | 9/51 [00:03<00:16,  2.59it/s][A[A[A[A[A[A[A[A[A








 20%|█▉        | 10/51 [00:04<00:14,  2.85it/s][A[A[A[A[A[A[A[A[A








 22%|██▏       | 11/51 [00:04<00:13,  2.89it/s][A[A[A[A[A[A[A[A[A








 24%|██▎       | 12/51 [00:04<00:13,  2.92it/s][A[A[A[A[A[A[A[A[A








 25%

Completed training for epoch 1, testing validation dataset











 50%|█████     | 1/2 [00:00<00:00,  1.31it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  2.22it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.08283724635839462
top1acc: 97.79991912841797
Completed testing validation dataset for epoch 1, testing training dataset











 17%|█▋        | 1/6 [00:00<00:03,  1.36it/s][A[A[A[A[A[A[A[A[A








 50%|█████     | 3/6 [00:00<00:01,  1.83it/s][A[A[A[A[A[A[A[A[A








 67%|██████▋   | 4/6 [00:01<00:00,  2.37it/s][A[A[A[A[A[A[A[A[A








 83%|████████▎ | 5/6 [00:01<00:00,  2.99it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 6/6 [00:01<00:00,  4.55it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.07788778841495514
top1acc: 97.75019073486328
Completed testing validation dataset for epoch 1, testing activation sparsity











 50%|█████     | 1/2 [00:00<00:00,  1.25it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  1.69it/s][A[A[A[A[A[A[A[A[A







  2%|▏         | 2/90 [00:40<29:53, 20.38s/it][A[A[A[A[A[A[A[A








  0%|          | 0/51 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

Starting epoch 2











  2%|▏         | 1/51 [00:01<00:56,  1.13s/it][A[A[A[A[A[A[A[A[A








  4%|▍         | 2/51 [00:01<00:44,  1.11it/s][A[A[A[A[A[A[A[A[A








  6%|▌         | 3/51 [00:01<00:34,  1.40it/s][A[A[A[A[A[A[A[A[A








  8%|▊         | 4/51 [00:02<00:28,  1.67it/s][A[A[A[A[A[A[A[A[A








 10%|▉         | 5/51 [00:02<00:24,  1.89it/s][A[A[A[A[A[A[A[A[A








 12%|█▏        | 6/51 [00:02<00:20,  2.15it/s][A[A[A[A[A[A[A[A[A








 14%|█▎        | 7/51 [00:03<00:19,  2.29it/s][A[A[A[A[A[A[A[A[A








 16%|█▌        | 8/51 [00:03<00:16,  2.56it/s][A[A[A[A[A[A[A[A[A








 18%|█▊        | 9/51 [00:03<00:15,  2.75it/s][A[A[A[A[A[A[A[A[A








 20%|█▉        | 10/51 [00:04<00:15,  2.72it/s][A[A[A[A[A[A[A[A[A








 22%|██▏       | 11/51 [00:04<00:13,  2.94it/s][A[A[A[A[A[A[A[A[A








 24%|██▎       | 12/51 [00:04<00:12,  3.08it/s][A[A[A[A[A[A[A[A[A








 25%

Completed training for epoch 2, testing validation dataset











 50%|█████     | 1/2 [00:00<00:00,  1.29it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  2.14it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.07420095801353455
top1acc: 97.39993286132812
Completed testing validation dataset for epoch 2, testing training dataset











 17%|█▋        | 1/6 [00:00<00:03,  1.44it/s][A[A[A[A[A[A[A[A[A








 50%|█████     | 3/6 [00:00<00:01,  1.93it/s][A[A[A[A[A[A[A[A[A








 67%|██████▋   | 4/6 [00:01<00:00,  2.49it/s][A[A[A[A[A[A[A[A[A








 83%|████████▎ | 5/6 [00:01<00:00,  3.12it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 6/6 [00:01<00:00,  4.81it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.06416139751672745
top1acc: 98.06050872802734
Completed testing validation dataset for epoch 2, testing activation sparsity











 50%|█████     | 1/2 [00:00<00:00,  1.27it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  1.70it/s][A[A[A[A[A[A[A[A[A







  3%|▎         | 3/90 [01:00<29:27, 20.32s/it][A[A[A[A[A[A[A[A








  0%|          | 0/51 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

Starting epoch 3











  2%|▏         | 1/51 [00:01<00:54,  1.10s/it][A[A[A[A[A[A[A[A[A








  4%|▍         | 2/51 [00:01<00:43,  1.14it/s][A[A[A[A[A[A[A[A[A








  6%|▌         | 3/51 [00:01<00:33,  1.44it/s][A[A[A[A[A[A[A[A[A








  8%|▊         | 4/51 [00:02<00:27,  1.70it/s][A[A[A[A[A[A[A[A[A








 10%|▉         | 5/51 [00:02<00:24,  1.91it/s][A[A[A[A[A[A[A[A[A








 12%|█▏        | 6/51 [00:02<00:20,  2.22it/s][A[A[A[A[A[A[A[A[A








 14%|█▎        | 7/51 [00:03<00:18,  2.42it/s][A[A[A[A[A[A[A[A[A








 16%|█▌        | 8/51 [00:03<00:16,  2.59it/s][A[A[A[A[A[A[A[A[A








 18%|█▊        | 9/51 [00:03<00:15,  2.69it/s][A[A[A[A[A[A[A[A[A








 20%|█▉        | 10/51 [00:04<00:15,  2.68it/s][A[A[A[A[A[A[A[A[A








 22%|██▏       | 11/51 [00:04<00:13,  2.91it/s][A[A[A[A[A[A[A[A[A








 24%|██▎       | 12/51 [00:04<00:12,  3.04it/s][A[A[A[A[A[A[A[A[A








 25%

Completed training for epoch 3, testing validation dataset











 50%|█████     | 1/2 [00:00<00:00,  1.28it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  1.71it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.07224398106336594
top1acc: 97.00004577636719
Completed testing validation dataset for epoch 3, testing training dataset











 17%|█▋        | 1/6 [00:00<00:03,  1.31it/s][A[A[A[A[A[A[A[A[A








 33%|███▎      | 2/6 [00:00<00:02,  1.77it/s][A[A[A[A[A[A[A[A[A








 50%|█████     | 3/6 [00:00<00:01,  2.30it/s][A[A[A[A[A[A[A[A[A








 83%|████████▎ | 5/6 [00:01<00:00,  2.97it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 6/6 [00:01<00:00,  4.52it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.04295782372355461
top1acc: 99.06904602050781
Completed testing validation dataset for epoch 3, testing activation sparsity











 50%|█████     | 1/2 [00:00<00:00,  1.38it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  1.81it/s][A[A[A[A[A[A[A[A[A







  4%|▍         | 4/90 [01:21<29:12, 20.37s/it][A[A[A[A[A[A[A[A








  0%|          | 0/51 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

Starting epoch 4











  2%|▏         | 1/51 [00:01<00:58,  1.16s/it][A[A[A[A[A[A[A[A[A








  4%|▍         | 2/51 [00:01<00:44,  1.10it/s][A[A[A[A[A[A[A[A[A








  6%|▌         | 3/51 [00:01<00:35,  1.34it/s][A[A[A[A[A[A[A[A[A








  8%|▊         | 4/51 [00:02<00:28,  1.66it/s][A[A[A[A[A[A[A[A[A








 10%|▉         | 5/51 [00:02<00:23,  1.93it/s][A[A[A[A[A[A[A[A[A








 12%|█▏        | 6/51 [00:02<00:21,  2.11it/s][A[A[A[A[A[A[A[A[A








 14%|█▎        | 7/51 [00:03<00:18,  2.43it/s][A[A[A[A[A[A[A[A[A








 16%|█▌        | 8/51 [00:03<00:16,  2.57it/s][A[A[A[A[A[A[A[A[A








 18%|█▊        | 9/51 [00:03<00:15,  2.65it/s][A[A[A[A[A[A[A[A[A








 20%|█▉        | 10/51 [00:04<00:14,  2.79it/s][A[A[A[A[A[A[A[A[A








 22%|██▏       | 11/51 [00:04<00:14,  2.75it/s][A[A[A[A[A[A[A[A[A








 24%|██▎       | 12/51 [00:04<00:13,  2.98it/s][A[A[A[A[A[A[A[A[A








 25%

Completed training for epoch 4, testing validation dataset











 50%|█████     | 1/2 [00:00<00:00,  1.33it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  2.20it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.07226584106683731
top1acc: 96.80005645751953
Completed testing validation dataset for epoch 4, testing training dataset











 17%|█▋        | 1/6 [00:00<00:03,  1.38it/s][A[A[A[A[A[A[A[A[A








 50%|█████     | 3/6 [00:00<00:01,  1.86it/s][A[A[A[A[A[A[A[A[A








 67%|██████▋   | 4/6 [00:01<00:00,  2.41it/s][A[A[A[A[A[A[A[A[A








 83%|████████▎ | 5/6 [00:01<00:00,  3.04it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 6/6 [00:01<00:00,  4.64it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.04130096733570099
top1acc: 98.75872802734375
Completed testing validation dataset for epoch 4, testing activation sparsity











 50%|█████     | 1/2 [00:00<00:00,  1.09it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:01<00:00,  1.48it/s][A[A[A[A[A[A[A[A[A







  6%|▌         | 5/90 [01:42<28:57, 20.44s/it][A[A[A[A[A[A[A[A








  0%|          | 0/51 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

Starting epoch 5











  2%|▏         | 1/51 [00:00<00:49,  1.02it/s][A[A[A[A[A[A[A[A[A








  4%|▍         | 2/51 [00:01<00:39,  1.25it/s][A[A[A[A[A[A[A[A[A








  6%|▌         | 3/51 [00:01<00:30,  1.56it/s][A[A[A[A[A[A[A[A[A








  8%|▊         | 4/51 [00:01<00:25,  1.86it/s][A[A[A[A[A[A[A[A[A








 10%|▉         | 5/51 [00:02<00:22,  2.05it/s][A[A[A[A[A[A[A[A[A








 12%|█▏        | 6/51 [00:02<00:19,  2.35it/s][A[A[A[A[A[A[A[A[A








 14%|█▎        | 7/51 [00:02<00:16,  2.60it/s][A[A[A[A[A[A[A[A[A








 16%|█▌        | 8/51 [00:03<00:16,  2.63it/s][A[A[A[A[A[A[A[A[A








 18%|█▊        | 9/51 [00:03<00:14,  2.85it/s][A[A[A[A[A[A[A[A[A








 20%|█▉        | 10/51 [00:03<00:13,  2.99it/s][A[A[A[A[A[A[A[A[A








 22%|██▏       | 11/51 [00:04<00:13,  2.91it/s][A[A[A[A[A[A[A[A[A








 24%|██▎       | 12/51 [00:04<00:13,  2.97it/s][A[A[A[A[A[A[A[A[A








 25%

Completed training for epoch 5, testing validation dataset











 50%|█████     | 1/2 [00:00<00:00,  1.40it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  2.32it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.07475918531417847
top1acc: 97.40005493164062
Completed testing validation dataset for epoch 5, testing training dataset











 17%|█▋        | 1/6 [00:00<00:03,  1.37it/s][A[A[A[A[A[A[A[A[A








 50%|█████     | 3/6 [00:00<00:01,  1.85it/s][A[A[A[A[A[A[A[A[A








 67%|██████▋   | 4/6 [00:01<00:00,  2.40it/s][A[A[A[A[A[A[A[A[A








 83%|████████▎ | 5/6 [00:01<00:00,  3.04it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 6/6 [00:01<00:00,  4.72it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.03955966234207153
top1acc: 98.75872802734375
Completed testing validation dataset for epoch 5, testing activation sparsity











 50%|█████     | 1/2 [00:00<00:00,  1.27it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  1.68it/s][A[A[A[A[A[A[A[A[A







  7%|▋         | 6/90 [02:02<28:27, 20.33s/it][A[A[A[A[A[A[A[A








  0%|          | 0/51 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

Starting epoch 6











  2%|▏         | 1/51 [00:01<00:53,  1.07s/it][A[A[A[A[A[A[A[A[A








  4%|▍         | 2/51 [00:01<00:42,  1.16it/s][A[A[A[A[A[A[A[A[A








  6%|▌         | 3/51 [00:01<00:32,  1.46it/s][A[A[A[A[A[A[A[A[A








  8%|▊         | 4/51 [00:02<00:26,  1.77it/s][A[A[A[A[A[A[A[A[A








 10%|▉         | 5/51 [00:02<00:23,  1.96it/s][A[A[A[A[A[A[A[A[A








 12%|█▏        | 6/51 [00:02<00:19,  2.27it/s][A[A[A[A[A[A[A[A[A








 14%|█▎        | 7/51 [00:02<00:17,  2.53it/s][A[A[A[A[A[A[A[A[A








 16%|█▌        | 8/51 [00:03<00:16,  2.59it/s][A[A[A[A[A[A[A[A[A








 18%|█▊        | 9/51 [00:03<00:14,  2.82it/s][A[A[A[A[A[A[A[A[A








 20%|█▉        | 10/51 [00:03<00:13,  2.97it/s][A[A[A[A[A[A[A[A[A








 22%|██▏       | 11/51 [00:04<00:13,  2.88it/s][A[A[A[A[A[A[A[A[A








 24%|██▎       | 12/51 [00:04<00:13,  2.96it/s][A[A[A[A[A[A[A[A[A








 25%

Completed training for epoch 6, testing validation dataset











 50%|█████     | 1/2 [00:00<00:00,  1.27it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  1.71it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.0652557983994484
top1acc: 97.3999252319336
Completed testing validation dataset for epoch 6, testing training dataset











 17%|█▋        | 1/6 [00:00<00:03,  1.30it/s][A[A[A[A[A[A[A[A[A








 33%|███▎      | 2/6 [00:00<00:02,  1.74it/s][A[A[A[A[A[A[A[A[A








 67%|██████▋   | 4/6 [00:01<00:00,  2.32it/s][A[A[A[A[A[A[A[A[A








 83%|████████▎ | 5/6 [00:01<00:00,  2.95it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 6/6 [00:01<00:00,  4.59it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.032360490411520004
top1acc: 99.14662170410156
Completed testing validation dataset for epoch 6, testing activation sparsity











 50%|█████     | 1/2 [00:00<00:00,  1.39it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  2.27it/s][A[A[A[A[A[A[A[A[A







  8%|▊         | 7/90 [02:22<28:08, 20.35s/it][A[A[A[A[A[A[A[A








  0%|          | 0/51 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

Starting epoch 7











  2%|▏         | 1/51 [00:01<00:56,  1.14s/it][A[A[A[A[A[A[A[A[A








  4%|▍         | 2/51 [00:01<00:43,  1.14it/s][A[A[A[A[A[A[A[A[A








  6%|▌         | 3/51 [00:01<00:33,  1.42it/s][A[A[A[A[A[A[A[A[A








  8%|▊         | 4/51 [00:02<00:28,  1.65it/s][A[A[A[A[A[A[A[A[A








 10%|▉         | 5/51 [00:02<00:23,  1.99it/s][A[A[A[A[A[A[A[A[A








 12%|█▏        | 6/51 [00:02<00:19,  2.28it/s][A[A[A[A[A[A[A[A[A








 14%|█▎        | 7/51 [00:03<00:18,  2.39it/s][A[A[A[A[A[A[A[A[A








 16%|█▌        | 8/51 [00:03<00:16,  2.65it/s][A[A[A[A[A[A[A[A[A








 18%|█▊        | 9/51 [00:03<00:15,  2.78it/s][A[A[A[A[A[A[A[A[A








 20%|█▉        | 10/51 [00:03<00:14,  2.86it/s][A[A[A[A[A[A[A[A[A








 22%|██▏       | 11/51 [00:04<00:13,  2.93it/s][A[A[A[A[A[A[A[A[A








 24%|██▎       | 12/51 [00:04<00:13,  2.84it/s][A[A[A[A[A[A[A[A[A








 25%

Completed training for epoch 7, testing validation dataset











 50%|█████     | 1/2 [00:00<00:00,  1.28it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  2.15it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.07176816463470459
top1acc: 97.1999282836914
Completed testing validation dataset for epoch 7, testing training dataset











 17%|█▋        | 1/6 [00:00<00:03,  1.37it/s][A[A[A[A[A[A[A[A[A








 33%|███▎      | 2/6 [00:00<00:02,  1.83it/s][A[A[A[A[A[A[A[A[A








 67%|██████▋   | 4/6 [00:01<00:00,  2.39it/s][A[A[A[A[A[A[A[A[A








 83%|████████▎ | 5/6 [00:01<00:00,  3.01it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 6/6 [00:01<00:00,  4.56it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.03364945203065872
top1acc: 98.99146270751953
Completed testing validation dataset for epoch 7, testing activation sparsity











 50%|█████     | 1/2 [00:00<00:00,  1.23it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  1.62it/s][A[A[A[A[A[A[A[A[A







  9%|▉         | 8/90 [02:42<27:50, 20.37s/it][A[A[A[A[A[A[A[A








  0%|          | 0/51 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

Starting epoch 8











  2%|▏         | 1/51 [00:01<00:50,  1.01s/it][A[A[A[A[A[A[A[A[A








  4%|▍         | 2/51 [00:01<00:39,  1.24it/s][A[A[A[A[A[A[A[A[A








  6%|▌         | 3/51 [00:01<00:32,  1.48it/s][A[A[A[A[A[A[A[A[A








  8%|▊         | 4/51 [00:02<00:26,  1.77it/s][A[A[A[A[A[A[A[A[A








 10%|▉         | 5/51 [00:02<00:23,  1.97it/s][A[A[A[A[A[A[A[A[A








 12%|█▏        | 6/51 [00:02<00:19,  2.29it/s][A[A[A[A[A[A[A[A[A








 14%|█▎        | 7/51 [00:02<00:17,  2.45it/s][A[A[A[A[A[A[A[A[A








 16%|█▌        | 8/51 [00:03<00:16,  2.53it/s][A[A[A[A[A[A[A[A[A








 18%|█▊        | 9/51 [00:03<00:15,  2.80it/s][A[A[A[A[A[A[A[A[A








 20%|█▉        | 10/51 [00:03<00:14,  2.86it/s][A[A[A[A[A[A[A[A[A








 22%|██▏       | 11/51 [00:04<00:13,  2.87it/s][A[A[A[A[A[A[A[A[A








 24%|██▎       | 12/51 [00:04<00:13,  2.95it/s][A[A[A[A[A[A[A[A[A








 25%

Completed training for epoch 8, testing validation dataset











 50%|█████     | 1/2 [00:00<00:00,  1.38it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  2.25it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.07195153087377548
top1acc: 97.39993286132812
Completed testing validation dataset for epoch 8, testing training dataset











 17%|█▋        | 1/6 [00:00<00:03,  1.35it/s][A[A[A[A[A[A[A[A[A








 33%|███▎      | 2/6 [00:00<00:02,  1.76it/s][A[A[A[A[A[A[A[A[A








 67%|██████▋   | 4/6 [00:01<00:00,  2.34it/s][A[A[A[A[A[A[A[A[A








 83%|████████▎ | 5/6 [00:01<00:00,  2.95it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 6/6 [00:01<00:00,  4.46it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.030390609055757523
top1acc: 99.14662170410156
Completed testing validation dataset for epoch 8, testing activation sparsity











 50%|█████     | 1/2 [00:00<00:00,  1.25it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  1.66it/s][A[A[A[A[A[A[A[A[A







 10%|█         | 9/90 [03:03<27:24, 20.31s/it][A[A[A[A[A[A[A[A








  0%|          | 0/51 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

Starting epoch 9











  2%|▏         | 1/51 [00:01<00:51,  1.02s/it][A[A[A[A[A[A[A[A[A








  4%|▍         | 2/51 [00:01<00:39,  1.23it/s][A[A[A[A[A[A[A[A[A








  6%|▌         | 3/51 [00:01<00:32,  1.49it/s][A[A[A[A[A[A[A[A[A








  8%|▊         | 4/51 [00:02<00:26,  1.77it/s][A[A[A[A[A[A[A[A[A








 10%|▉         | 5/51 [00:02<00:23,  1.98it/s][A[A[A[A[A[A[A[A[A








 12%|█▏        | 6/51 [00:02<00:19,  2.30it/s][A[A[A[A[A[A[A[A[A








 14%|█▎        | 7/51 [00:02<00:17,  2.48it/s][A[A[A[A[A[A[A[A[A








 16%|█▌        | 8/51 [00:03<00:16,  2.55it/s][A[A[A[A[A[A[A[A[A








 18%|█▊        | 9/51 [00:03<00:14,  2.83it/s][A[A[A[A[A[A[A[A[A








 20%|█▉        | 10/51 [00:03<00:14,  2.89it/s][A[A[A[A[A[A[A[A[A








 22%|██▏       | 11/51 [00:04<00:13,  2.93it/s][A[A[A[A[A[A[A[A[A








 24%|██▎       | 12/51 [00:04<00:13,  2.99it/s][A[A[A[A[A[A[A[A[A








 25%

Completed training for epoch 9, testing validation dataset











 50%|█████     | 1/2 [00:00<00:00,  1.18it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  1.59it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.06409253925085068
top1acc: 97.00005340576172
Completed testing validation dataset for epoch 9, testing training dataset











 17%|█▋        | 1/6 [00:00<00:03,  1.29it/s][A[A[A[A[A[A[A[A[A








 33%|███▎      | 2/6 [00:00<00:02,  1.72it/s][A[A[A[A[A[A[A[A[A








 50%|█████     | 3/6 [00:01<00:01,  2.25it/s][A[A[A[A[A[A[A[A[A








 83%|████████▎ | 5/6 [00:01<00:00,  2.93it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 6/6 [00:01<00:00,  4.49it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.025333374738693237
top1acc: 99.45693969726562
Completed testing validation dataset for epoch 9, testing activation sparsity











 50%|█████     | 1/2 [00:00<00:00,  1.27it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  1.71it/s][A[A[A[A[A[A[A[A[A







 11%|█         | 10/90 [03:23<27:08, 20.36s/it][A[A[A[A[A[A[A[A








  0%|          | 0/51 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

Starting epoch 10











  2%|▏         | 1/51 [00:01<00:56,  1.13s/it][A[A[A[A[A[A[A[A[A








  4%|▍         | 2/51 [00:01<00:42,  1.15it/s][A[A[A[A[A[A[A[A[A








  6%|▌         | 3/51 [00:01<00:33,  1.41it/s][A[A[A[A[A[A[A[A[A








  8%|▊         | 4/51 [00:02<00:28,  1.65it/s][A[A[A[A[A[A[A[A[A








 10%|▉         | 5/51 [00:02<00:23,  1.94it/s][A[A[A[A[A[A[A[A[A








 12%|█▏        | 6/51 [00:02<00:21,  2.13it/s][A[A[A[A[A[A[A[A[A








 14%|█▎        | 7/51 [00:03<00:18,  2.42it/s][A[A[A[A[A[A[A[A[A








 16%|█▌        | 8/51 [00:03<00:16,  2.64it/s][A[A[A[A[A[A[A[A[A








 18%|█▊        | 9/51 [00:03<00:15,  2.65it/s][A[A[A[A[A[A[A[A[A








 20%|█▉        | 10/51 [00:03<00:14,  2.89it/s][A[A[A[A[A[A[A[A[A








 22%|██▏       | 11/51 [00:04<00:13,  3.00it/s][A[A[A[A[A[A[A[A[A








 24%|██▎       | 12/51 [00:04<00:13,  2.89it/s][A[A[A[A[A[A[A[A[A








 25%

Completed training for epoch 10, testing validation dataset











 50%|█████     | 1/2 [00:00<00:00,  1.40it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  1.86it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.06902569532394409
top1acc: 97.20005798339844
Completed testing validation dataset for epoch 10, testing training dataset











 17%|█▋        | 1/6 [00:00<00:03,  1.27it/s][A[A[A[A[A[A[A[A[A








 50%|█████     | 3/6 [00:01<00:01,  1.71it/s][A[A[A[A[A[A[A[A[A








 67%|██████▋   | 4/6 [00:01<00:00,  2.24it/s][A[A[A[A[A[A[A[A[A








 83%|████████▎ | 5/6 [00:01<00:00,  2.85it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 6/6 [00:01<00:00,  4.41it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.02335912548005581
top1acc: 99.37936401367188
Completed testing validation dataset for epoch 10, testing activation sparsity











 50%|█████     | 1/2 [00:00<00:00,  1.29it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  1.74it/s][A[A[A[A[A[A[A[A[A







 12%|█▏        | 11/90 [03:43<26:49, 20.37s/it][A[A[A[A[A[A[A[A








  0%|          | 0/51 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

Starting epoch 11











  2%|▏         | 1/51 [00:01<00:55,  1.11s/it][A[A[A[A[A[A[A[A[A








  4%|▍         | 2/51 [00:01<00:43,  1.12it/s][A[A[A[A[A[A[A[A[A








  6%|▌         | 3/51 [00:01<00:33,  1.42it/s][A[A[A[A[A[A[A[A[A








  8%|▊         | 4/51 [00:02<00:27,  1.71it/s][A[A[A[A[A[A[A[A[A








 10%|▉         | 5/51 [00:02<00:23,  1.92it/s][A[A[A[A[A[A[A[A[A








 12%|█▏        | 6/51 [00:02<00:18,  2.38it/s][A[A[A[A[A[A[A[A[A








 14%|█▎        | 7/51 [00:02<00:16,  2.72it/s][A[A[A[A[A[A[A[A[A








 16%|█▌        | 8/51 [00:03<00:15,  2.85it/s][A[A[A[A[A[A[A[A[A








 18%|█▊        | 9/51 [00:03<00:14,  2.80it/s][A[A[A[A[A[A[A[A[A








 20%|█▉        | 10/51 [00:03<00:14,  2.89it/s][A[A[A[A[A[A[A[A[A








 22%|██▏       | 11/51 [00:04<00:14,  2.83it/s][A[A[A[A[A[A[A[A[A








 24%|██▎       | 12/51 [00:04<00:12,  3.02it/s][A[A[A[A[A[A[A[A[A








 25%

Completed training for epoch 11, testing validation dataset











 50%|█████     | 1/2 [00:00<00:00,  1.40it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  2.35it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.06035646051168442
top1acc: 97.79991912841797
Completed testing validation dataset for epoch 11, testing training dataset











 17%|█▋        | 1/6 [00:01<00:05,  1.18s/it][A[A[A[A[A[A[A[A[A








 33%|███▎      | 2/6 [00:01<00:03,  1.16it/s][A[A[A[A[A[A[A[A[A








 67%|██████▋   | 4/6 [00:01<00:01,  1.57it/s][A[A[A[A[A[A[A[A[A








 83%|████████▎ | 5/6 [00:01<00:00,  2.07it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 6/6 [00:01<00:00,  3.44it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.02235257625579834
top1acc: 99.3017807006836
Completed testing validation dataset for epoch 11, testing activation sparsity











 50%|█████     | 1/2 [00:00<00:00,  1.41it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  2.35it/s][A[A[A[A[A[A[A[A[A







 13%|█▎        | 12/90 [04:04<26:33, 20.43s/it][A[A[A[A[A[A[A[A








  0%|          | 0/51 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

Starting epoch 12











  2%|▏         | 1/51 [00:01<00:52,  1.06s/it][A[A[A[A[A[A[A[A[A








  4%|▍         | 2/51 [00:01<00:41,  1.19it/s][A[A[A[A[A[A[A[A[A








  6%|▌         | 3/51 [00:01<00:33,  1.45it/s][A[A[A[A[A[A[A[A[A








  8%|▊         | 4/51 [00:02<00:28,  1.68it/s][A[A[A[A[A[A[A[A[A








 10%|▉         | 5/51 [00:02<00:22,  2.00it/s][A[A[A[A[A[A[A[A[A








 12%|█▏        | 6/51 [00:02<00:19,  2.28it/s][A[A[A[A[A[A[A[A[A








 14%|█▎        | 7/51 [00:03<00:18,  2.40it/s][A[A[A[A[A[A[A[A[A








 16%|█▌        | 8/51 [00:03<00:16,  2.66it/s][A[A[A[A[A[A[A[A[A








 18%|█▊        | 9/51 [00:03<00:14,  2.85it/s][A[A[A[A[A[A[A[A[A








 20%|█▉        | 10/51 [00:03<00:14,  2.81it/s][A[A[A[A[A[A[A[A[A








 22%|██▏       | 11/51 [00:04<00:13,  2.91it/s][A[A[A[A[A[A[A[A[A








 24%|██▎       | 12/51 [00:04<00:13,  2.83it/s][A[A[A[A[A[A[A[A[A








 25%

Completed training for epoch 12, testing validation dataset











 50%|█████     | 1/2 [00:00<00:00,  1.31it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  2.15it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.061384573578834534
top1acc: 97.60004425048828
Completed testing validation dataset for epoch 12, testing training dataset











 17%|█▋        | 1/6 [00:00<00:03,  1.32it/s][A[A[A[A[A[A[A[A[A








 33%|███▎      | 2/6 [00:00<00:02,  1.76it/s][A[A[A[A[A[A[A[A[A








 67%|██████▋   | 4/6 [00:01<00:00,  2.36it/s][A[A[A[A[A[A[A[A[A








 83%|████████▎ | 5/6 [00:01<00:00,  2.96it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 6/6 [00:01<00:00,  4.56it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.016865259036421776
top1acc: 99.68968200683594
Completed testing validation dataset for epoch 12, testing activation sparsity











 50%|█████     | 1/2 [00:00<00:00,  1.27it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  1.67it/s][A[A[A[A[A[A[A[A[A







 14%|█▍        | 13/90 [04:25<26:14, 20.45s/it][A[A[A[A[A[A[A[A








  0%|          | 0/51 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

Starting epoch 13











  2%|▏         | 1/51 [00:01<00:52,  1.04s/it][A[A[A[A[A[A[A[A[A








  4%|▍         | 2/51 [00:01<00:40,  1.22it/s][A[A[A[A[A[A[A[A[A








  6%|▌         | 3/51 [00:01<00:32,  1.46it/s][A[A[A[A[A[A[A[A[A








  8%|▊         | 4/51 [00:01<00:26,  1.77it/s][A[A[A[A[A[A[A[A[A








 10%|▉         | 5/51 [00:02<00:22,  2.01it/s][A[A[A[A[A[A[A[A[A








 12%|█▏        | 6/51 [00:02<00:19,  2.27it/s][A[A[A[A[A[A[A[A[A








 14%|█▎        | 7/51 [00:02<00:18,  2.43it/s][A[A[A[A[A[A[A[A[A








 16%|█▌        | 8/51 [00:03<00:17,  2.50it/s][A[A[A[A[A[A[A[A[A








 18%|█▊        | 9/51 [00:03<00:14,  2.84it/s][A[A[A[A[A[A[A[A[A








 20%|█▉        | 10/51 [00:03<00:14,  2.89it/s][A[A[A[A[A[A[A[A[A








 22%|██▏       | 11/51 [00:04<00:14,  2.83it/s][A[A[A[A[A[A[A[A[A








 24%|██▎       | 12/51 [00:04<00:12,  3.05it/s][A[A[A[A[A[A[A[A[A








 25%

Completed training for epoch 13, testing validation dataset











 50%|█████     | 1/2 [00:00<00:00,  1.32it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  1.76it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.06253994256258011
top1acc: 97.79991912841797
Completed testing validation dataset for epoch 13, testing training dataset











 17%|█▋        | 1/6 [00:00<00:03,  1.28it/s][A[A[A[A[A[A[A[A[A








 33%|███▎      | 2/6 [00:00<00:02,  1.70it/s][A[A[A[A[A[A[A[A[A








 50%|█████     | 3/6 [00:01<00:01,  2.23it/s][A[A[A[A[A[A[A[A[A








 83%|████████▎ | 5/6 [00:01<00:00,  2.93it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 6/6 [00:01<00:00,  4.52it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.020263610407710075
top1acc: 99.45693969726562
Completed testing validation dataset for epoch 13, testing activation sparsity











 50%|█████     | 1/2 [00:00<00:00,  1.19it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  2.01it/s][A[A[A[A[A[A[A[A[A







 16%|█▌        | 14/90 [04:45<25:52, 20.43s/it][A[A[A[A[A[A[A[A








  0%|          | 0/51 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

Starting epoch 14











  2%|▏         | 1/51 [00:01<00:57,  1.15s/it][A[A[A[A[A[A[A[A[A








  4%|▍         | 2/51 [00:01<00:43,  1.13it/s][A[A[A[A[A[A[A[A[A








  6%|▌         | 3/51 [00:01<00:34,  1.39it/s][A[A[A[A[A[A[A[A[A








  8%|▊         | 4/51 [00:02<00:28,  1.62it/s][A[A[A[A[A[A[A[A[A








 10%|▉         | 5/51 [00:02<00:22,  2.06it/s][A[A[A[A[A[A[A[A[A








 12%|█▏        | 6/51 [00:02<00:19,  2.36it/s][A[A[A[A[A[A[A[A[A








 14%|█▎        | 7/51 [00:02<00:17,  2.53it/s][A[A[A[A[A[A[A[A[A








 16%|█▌        | 8/51 [00:03<00:16,  2.68it/s][A[A[A[A[A[A[A[A[A








 18%|█▊        | 9/51 [00:03<00:15,  2.77it/s][A[A[A[A[A[A[A[A[A








 20%|█▉        | 10/51 [00:03<00:15,  2.73it/s][A[A[A[A[A[A[A[A[A








 22%|██▏       | 11/51 [00:04<00:13,  2.95it/s][A[A[A[A[A[A[A[A[A








 24%|██▎       | 12/51 [00:04<00:13,  2.96it/s][A[A[A[A[A[A[A[A[A








 25%

Completed training for epoch 14, testing validation dataset











 50%|█████     | 1/2 [00:00<00:00,  1.25it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  2.04it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.05891065299510956
top1acc: 97.59992218017578
Completed testing validation dataset for epoch 14, testing training dataset











 17%|█▋        | 1/6 [00:00<00:03,  1.34it/s][A[A[A[A[A[A[A[A[A








 50%|█████     | 3/6 [00:00<00:01,  1.80it/s][A[A[A[A[A[A[A[A[A








 83%|████████▎ | 5/6 [00:01<00:00,  2.36it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 6/6 [00:01<00:00,  4.62it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.016555197536945343
top1acc: 99.68968200683594
Completed testing validation dataset for epoch 14, testing activation sparsity











 50%|█████     | 1/2 [00:00<00:00,  1.17it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  1.56it/s][A[A[A[A[A[A[A[A[A







 17%|█▋        | 15/90 [05:05<25:34, 20.47s/it][A[A[A[A[A[A[A[A








  0%|          | 0/51 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

Starting epoch 15











  2%|▏         | 1/51 [00:01<00:50,  1.02s/it][A[A[A[A[A[A[A[A[A








  4%|▍         | 2/51 [00:01<00:39,  1.23it/s][A[A[A[A[A[A[A[A[A








  6%|▌         | 3/51 [00:01<00:32,  1.48it/s][A[A[A[A[A[A[A[A[A








  8%|▊         | 4/51 [00:01<00:26,  1.80it/s][A[A[A[A[A[A[A[A[A








 10%|▉         | 5/51 [00:02<00:22,  2.05it/s][A[A[A[A[A[A[A[A[A








 12%|█▏        | 6/51 [00:02<00:19,  2.29it/s][A[A[A[A[A[A[A[A[A








 14%|█▎        | 7/51 [00:02<00:17,  2.48it/s][A[A[A[A[A[A[A[A[A








 16%|█▌        | 8/51 [00:03<00:16,  2.53it/s][A[A[A[A[A[A[A[A[A








 18%|█▊        | 9/51 [00:03<00:15,  2.79it/s][A[A[A[A[A[A[A[A[A








 20%|█▉        | 10/51 [00:03<00:14,  2.86it/s][A[A[A[A[A[A[A[A[A








 22%|██▏       | 11/51 [00:04<00:14,  2.82it/s][A[A[A[A[A[A[A[A[A








 24%|██▎       | 12/51 [00:04<00:13,  3.00it/s][A[A[A[A[A[A[A[A[A








 25%

Completed training for epoch 15, testing validation dataset











 50%|█████     | 1/2 [00:00<00:00,  1.28it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  1.71it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.06352201849222183
top1acc: 97.60005187988281
Completed testing validation dataset for epoch 15, testing training dataset











 17%|█▋        | 1/6 [00:00<00:03,  1.27it/s][A[A[A[A[A[A[A[A[A








 50%|█████     | 3/6 [00:00<00:01,  1.73it/s][A[A[A[A[A[A[A[A[A








 67%|██████▋   | 4/6 [00:01<00:00,  2.26it/s][A[A[A[A[A[A[A[A[A








 83%|████████▎ | 5/6 [00:01<00:00,  2.87it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 6/6 [00:01<00:00,  4.50it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.016724426299333572
top1acc: 99.68968200683594
Completed testing validation dataset for epoch 15, testing activation sparsity











 50%|█████     | 1/2 [00:00<00:00,  1.24it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  1.66it/s][A[A[A[A[A[A[A[A[A







 18%|█▊        | 16/90 [05:26<25:09, 20.39s/it][A[A[A[A[A[A[A[A








  0%|          | 0/51 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

Starting epoch 16











  2%|▏         | 1/51 [00:01<00:51,  1.02s/it][A[A[A[A[A[A[A[A[A








  4%|▍         | 2/51 [00:01<00:40,  1.21it/s][A[A[A[A[A[A[A[A[A








  6%|▌         | 3/51 [00:01<00:31,  1.50it/s][A[A[A[A[A[A[A[A[A








  8%|▊         | 4/51 [00:02<00:26,  1.77it/s][A[A[A[A[A[A[A[A[A








 10%|▉         | 5/51 [00:02<00:23,  1.98it/s][A[A[A[A[A[A[A[A[A








 12%|█▏        | 6/51 [00:02<00:19,  2.29it/s][A[A[A[A[A[A[A[A[A








 14%|█▎        | 7/51 [00:02<00:17,  2.47it/s][A[A[A[A[A[A[A[A[A








 16%|█▌        | 8/51 [00:03<00:16,  2.59it/s][A[A[A[A[A[A[A[A[A








 18%|█▊        | 9/51 [00:03<00:15,  2.73it/s][A[A[A[A[A[A[A[A[A








 20%|█▉        | 10/51 [00:04<00:15,  2.73it/s][A[A[A[A[A[A[A[A[A








 22%|██▏       | 11/51 [00:04<00:13,  2.95it/s][A[A[A[A[A[A[A[A[A








 24%|██▎       | 12/51 [00:04<00:12,  3.09it/s][A[A[A[A[A[A[A[A[A








 25%

Completed training for epoch 16, testing validation dataset











 50%|█████     | 1/2 [00:00<00:00,  1.32it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  1.76it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.06208568438887596
top1acc: 97.7999267578125
Completed testing validation dataset for epoch 16, testing training dataset











 17%|█▋        | 1/6 [00:00<00:03,  1.28it/s][A[A[A[A[A[A[A[A[A








 50%|█████     | 3/6 [00:00<00:01,  1.73it/s][A[A[A[A[A[A[A[A[A








 83%|████████▎ | 5/6 [00:01<00:00,  2.31it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 6/6 [00:01<00:00,  4.67it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.014375473372638226
top1acc: 99.61209869384766
Completed testing validation dataset for epoch 16, testing activation sparsity











 50%|█████     | 1/2 [00:00<00:00,  1.25it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  2.10it/s][A[A[A[A[A[A[A[A[A







 19%|█▉        | 17/90 [05:46<24:48, 20.40s/it][A[A[A[A[A[A[A[A








  0%|          | 0/51 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

Starting epoch 17











  2%|▏         | 1/51 [00:01<00:53,  1.08s/it][A[A[A[A[A[A[A[A[A








  4%|▍         | 2/51 [00:01<00:41,  1.18it/s][A[A[A[A[A[A[A[A[A








  6%|▌         | 3/51 [00:01<00:33,  1.44it/s][A[A[A[A[A[A[A[A[A








  8%|▊         | 4/51 [00:02<00:28,  1.67it/s][A[A[A[A[A[A[A[A[A








 10%|▉         | 5/51 [00:02<00:23,  1.98it/s][A[A[A[A[A[A[A[A[A








 12%|█▏        | 6/51 [00:02<00:19,  2.26it/s][A[A[A[A[A[A[A[A[A








 14%|█▎        | 7/51 [00:03<00:18,  2.38it/s][A[A[A[A[A[A[A[A[A








 16%|█▌        | 8/51 [00:03<00:16,  2.65it/s][A[A[A[A[A[A[A[A[A








 18%|█▊        | 9/51 [00:03<00:14,  2.84it/s][A[A[A[A[A[A[A[A[A








 20%|█▉        | 10/51 [00:03<00:14,  2.83it/s][A[A[A[A[A[A[A[A[A








 22%|██▏       | 11/51 [00:04<00:13,  2.91it/s][A[A[A[A[A[A[A[A[A








 24%|██▎       | 12/51 [00:04<00:13,  2.84it/s][A[A[A[A[A[A[A[A[A








 25%

Completed training for epoch 17, testing validation dataset











 50%|█████     | 1/2 [00:00<00:00,  1.31it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  1.76it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.06685403734445572
top1acc: 97.80005645751953
Completed testing validation dataset for epoch 17, testing training dataset











 17%|█▋        | 1/6 [00:00<00:03,  1.31it/s][A[A[A[A[A[A[A[A[A








 33%|███▎      | 2/6 [00:00<00:02,  1.77it/s][A[A[A[A[A[A[A[A[A








 50%|█████     | 3/6 [00:01<00:01,  2.29it/s][A[A[A[A[A[A[A[A[A








 83%|████████▎ | 5/6 [00:01<00:00,  2.99it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 6/6 [00:01<00:00,  4.64it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.015089184045791626
top1acc: 99.61209869384766
Completed testing validation dataset for epoch 17, testing activation sparsity











 50%|█████     | 1/2 [00:00<00:00,  1.24it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  1.64it/s][A[A[A[A[A[A[A[A[A







 20%|██        | 18/90 [06:07<24:30, 20.42s/it][A[A[A[A[A[A[A[A








  0%|          | 0/51 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

Starting epoch 18











  2%|▏         | 1/51 [00:01<00:51,  1.03s/it][A[A[A[A[A[A[A[A[A








  4%|▍         | 2/51 [00:01<00:39,  1.23it/s][A[A[A[A[A[A[A[A[A








  6%|▌         | 3/51 [00:01<00:32,  1.46it/s][A[A[A[A[A[A[A[A[A








  8%|▊         | 4/51 [00:01<00:26,  1.80it/s][A[A[A[A[A[A[A[A[A








 10%|▉         | 5/51 [00:02<00:22,  2.07it/s][A[A[A[A[A[A[A[A[A








 12%|█▏        | 6/51 [00:02<00:20,  2.22it/s][A[A[A[A[A[A[A[A[A








 14%|█▎        | 7/51 [00:02<00:17,  2.51it/s][A[A[A[A[A[A[A[A[A








 16%|█▌        | 8/51 [00:03<00:16,  2.66it/s][A[A[A[A[A[A[A[A[A








 18%|█▊        | 9/51 [00:03<00:15,  2.75it/s][A[A[A[A[A[A[A[A[A








 20%|█▉        | 10/51 [00:03<00:14,  2.84it/s][A[A[A[A[A[A[A[A[A








 22%|██▏       | 11/51 [00:04<00:14,  2.78it/s][A[A[A[A[A[A[A[A[A








 24%|██▎       | 12/51 [00:04<00:12,  3.01it/s][A[A[A[A[A[A[A[A[A








 25%

Completed training for epoch 18, testing validation dataset











 50%|█████     | 1/2 [00:00<00:00,  1.33it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  2.20it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.06711196154356003
top1acc: 97.2000503540039
Completed testing validation dataset for epoch 18, testing training dataset











 17%|█▋        | 1/6 [00:00<00:03,  1.43it/s][A[A[A[A[A[A[A[A[A








 50%|█████     | 3/6 [00:00<00:01,  1.91it/s][A[A[A[A[A[A[A[A[A








 67%|██████▋   | 4/6 [00:01<00:00,  2.46it/s][A[A[A[A[A[A[A[A[A








 83%|████████▎ | 5/6 [00:01<00:00,  3.09it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 6/6 [00:01<00:00,  4.66it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.017762506380677223
top1acc: 99.5345230102539
Completed testing validation dataset for epoch 18, testing activation sparsity











 50%|█████     | 1/2 [00:00<00:00,  1.26it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  2.07it/s][A[A[A[A[A[A[A[A[A







 21%|██        | 19/90 [06:27<24:05, 20.36s/it][A[A[A[A[A[A[A[A








  0%|          | 0/51 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

Starting epoch 19











  2%|▏         | 1/51 [00:01<00:55,  1.11s/it][A[A[A[A[A[A[A[A[A








  4%|▍         | 2/51 [00:01<00:43,  1.12it/s][A[A[A[A[A[A[A[A[A








  6%|▌         | 3/51 [00:01<00:33,  1.42it/s][A[A[A[A[A[A[A[A[A








  8%|▊         | 4/51 [00:02<00:27,  1.69it/s][A[A[A[A[A[A[A[A[A








 10%|▉         | 5/51 [00:02<00:24,  1.90it/s][A[A[A[A[A[A[A[A[A








 12%|█▏        | 6/51 [00:02<00:20,  2.17it/s][A[A[A[A[A[A[A[A[A








 14%|█▎        | 7/51 [00:03<00:19,  2.30it/s][A[A[A[A[A[A[A[A[A








 16%|█▌        | 8/51 [00:03<00:16,  2.60it/s][A[A[A[A[A[A[A[A[A








 18%|█▊        | 9/51 [00:03<00:15,  2.72it/s][A[A[A[A[A[A[A[A[A








 20%|█▉        | 10/51 [00:04<00:15,  2.70it/s][A[A[A[A[A[A[A[A[A








 22%|██▏       | 11/51 [00:04<00:13,  2.93it/s][A[A[A[A[A[A[A[A[A








 24%|██▎       | 12/51 [00:04<00:13,  2.95it/s][A[A[A[A[A[A[A[A[A








 25%

Completed training for epoch 19, testing validation dataset











 50%|█████     | 1/2 [00:00<00:00,  1.30it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  1.74it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.05875855311751366
top1acc: 97.60005187988281
Completed testing validation dataset for epoch 19, testing training dataset











 17%|█▋        | 1/6 [00:00<00:03,  1.29it/s][A[A[A[A[A[A[A[A[A








 50%|█████     | 3/6 [00:00<00:01,  1.74it/s][A[A[A[A[A[A[A[A[A








 83%|████████▎ | 5/6 [00:01<00:00,  2.30it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 6/6 [00:01<00:00,  4.56it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.016479136422276497
top1acc: 99.68968200683594
Completed testing validation dataset for epoch 19, testing activation sparsity











 50%|█████     | 1/2 [00:00<00:00,  1.32it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  1.78it/s][A[A[A[A[A[A[A[A[A







 22%|██▏       | 20/90 [06:47<23:47, 20.40s/it][A[A[A[A[A[A[A[A








  0%|          | 0/51 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

Starting epoch 20











  2%|▏         | 1/51 [00:01<00:57,  1.14s/it][A[A[A[A[A[A[A[A[A








  4%|▍         | 2/51 [00:01<00:43,  1.13it/s][A[A[A[A[A[A[A[A[A








  6%|▌         | 3/51 [00:01<00:34,  1.39it/s][A[A[A[A[A[A[A[A[A








  8%|▊         | 4/51 [00:02<00:28,  1.66it/s][A[A[A[A[A[A[A[A[A








 10%|▉         | 5/51 [00:02<00:23,  1.93it/s][A[A[A[A[A[A[A[A[A








 12%|█▏        | 6/51 [00:02<00:21,  2.09it/s][A[A[A[A[A[A[A[A[A








 14%|█▎        | 7/51 [00:03<00:18,  2.40it/s][A[A[A[A[A[A[A[A[A








 16%|█▌        | 8/51 [00:03<00:16,  2.61it/s][A[A[A[A[A[A[A[A[A








 18%|█▊        | 9/51 [00:03<00:15,  2.63it/s][A[A[A[A[A[A[A[A[A








 20%|█▉        | 10/51 [00:04<00:14,  2.87it/s][A[A[A[A[A[A[A[A[A








 22%|██▏       | 11/51 [00:04<00:13,  3.02it/s][A[A[A[A[A[A[A[A[A








 24%|██▎       | 12/51 [00:04<00:13,  2.91it/s][A[A[A[A[A[A[A[A[A








 25%

Completed training for epoch 20, testing validation dataset











 50%|█████     | 1/2 [00:00<00:00,  1.34it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  2.19it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/6 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.05746648460626602
top1acc: 97.60005187988281
Completed testing validation dataset for epoch 20, testing training dataset











 17%|█▋        | 1/6 [00:00<00:03,  1.37it/s][A[A[A[A[A[A[A[A[A








 33%|███▎      | 2/6 [00:00<00:02,  1.82it/s][A[A[A[A[A[A[A[A[A








 50%|█████     | 3/6 [00:00<00:01,  2.39it/s][A[A[A[A[A[A[A[A[A








 67%|██████▋   | 4/6 [00:01<00:00,  3.01it/s][A[A[A[A[A[A[A[A[A








 83%|████████▎ | 5/6 [00:01<00:00,  3.68it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 6/6 [00:01<00:00,  4.51it/s][A[A[A[A[A[A[A[A[A








  0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

loss: 0.016126228496432304
top1acc: 99.5345230102539
Completed testing validation dataset for epoch 20, testing activation sparsity











 50%|█████     | 1/2 [00:00<00:00,  1.26it/s][A[A[A[A[A[A[A[A[A








100%|██████████| 2/2 [00:00<00:00,  1.67it/s][A[A[A[A[A[A[A[A[A







 23%|██▎       | 21/90 [07:08<23:28, 20.41s/it][A[A[A[A[A[A[A[A








  0%|          | 0/51 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A

Starting epoch 21











  2%|▏         | 1/51 [00:01<00:50,  1.01s/it][A[A[A[A[A[A[A[A[A








  4%|▍         | 2/51 [00:01<00:38,  1.26it/s][A[A[A[A[A[A[A[A[A








  6%|▌         | 3/51 [00:01<00:32,  1.50it/s][A[A[A[A[A[A[A[A[A








  8%|▊         | 4/51 [00:01<00:25,  1.82it/s][A[A[A[A[A[A[A[A[A








 10%|▉         | 5/51 [00:02<00:21,  2.12it/s][A[A[A[A[A[A[A[A[A








 12%|█▏        | 6/51 [00:02<00:19,  2.27it/s][A[A[A[A[A[A[A[A[A








 14%|█▎        | 7/51 [00:02<00:17,  2.56it/s][A[A[A[A[A[A[A[A[A








 16%|█▌        | 8/51 [00:03<00:15,  2.76it/s][A[A[A[A[A[A[A[A[A








 18%|█▊        | 9/51 [00:03<00:15,  2.75it/s][A[A[A[A[A[A[A[A[A








 20%|█▉        | 10/51 [00:03<00:14,  2.86it/s][A[A[A[A[A[A[A[A[A








 22%|██▏       | 11/51 [00:04<00:14,  2.81it/s][A[A[A[A[A[A[A[A[A








 24%|██▎       | 12/51 [00:04<00:12,  3.04it/s][A[A[A[A[A[A[A[A[A








 25%

Completed training for epoch 21, testing validation dataset


In [16]:
test_as_values(analyzer_model, val_data_loader, device, epoch, writer)

100%|██████████| 2/2 [00:01<00:00,  1.11it/s]


In [17]:
print(str(save_path))

/home/dan/oldcode/neuralmagicml-pytorch/notebooks/ResNet-2019.08.23.12.10.30.pth
