[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuromorphs/osn23-huge-tapeout/blob/main/Train_Binarized_SNN.ipynb)

In [1]:
!pip install snntorch

Collecting snntorch
  Downloading snntorch-0.6.4-py2.py3-none-any.whl (107 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/107.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m107.8/107.8 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: snntorch
Successfully installed snntorch-0.6.4


In [2]:
import torch
config = {
        'model' : 'NetFC',

        'exp_name' : 'mnist_tha',
        'num_trials' : 5,
        'num_epochs' : 50, #500,
        'binarize' : True,
        'binarize_input' : True,
        'post_quantize' : True,
        'enable_bias' : True,
        'enable_batch_norm' : True,
        'enable_dropout' : True,
        'enable_threshold' : True,
        'enable_slope' : True,
        'on_spike_reset_to_zero' : False,
        'data_dir' : "~/data/mnist",
        'batch_size' : 128,
        'seed' : 0,
        'num_workers' : 0,

        # final run sweeps
        'save_csv' : True,
        'save_model' : True,
        'early_stopping': True,
        'patience': 100,

        # final params
        'grad_clip' : False,
        'weight_clip' : False,
        'dropout1' : 0.02856,
        'beta' : 0.992187, # was 0.99, 0.9921875 (1/2^7) other possible values = 0.75, 0.875, 0.9375
        'lr' : 9.97e-3,
        'slope': 10.22,

        # threshold annealing. note: thr_final = threshold + thr_final
        'threshold1' : 11.666,
        'alpha_thr1' : 0.024,
        'thr_final1' : 4.317,

        'threshold2' : 14.105,
        'alpha_thr2' : 0.119,
        'thr_final2' : 16.29,

        'threshold3' : 0.6656,
        'alpha_thr3' : 0.0011,
        'thr_final3' : 3.496,

        # fixed params
        'num_steps' : 100,
        'correct_rate': 0.8,
        'incorrect_rate' : 0.2,
        'betas' : (0.9, 0.999),
        't_0' : 4688,
        'eta_min' : 0,
        'df_lr' : True, # return learning rate. Useful for scheduling

        # debug params
        'print_weights' : False
    }

def optim_func(net, config):
    optimizer = torch.optim.Adam(net.parameters(), lr=config["lr"], betas=config['betas'])
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=config['t_0'], eta_min=config['eta_min'], last_epoch=-1)
    return optimizer, scheduler


In [3]:
import torch
import torch.nn as nn
from torch.autograd import Function

class BinarizeF(Function):
    @staticmethod
    def forward(ctx, input):
        output = input.new(input.size())
        output[input >= 0] = 1
        output[input < 0] = -1
        return output

    @staticmethod
    def backward(ctx, grad_output):
        grad_input = grad_output.clone()
        return grad_input

# aliases
binarize = BinarizeF.apply

def binarize_activations(input):
    output = input.new(input.size())
    output[input >= 0.5] = 1
    output[input < 0.5] = 0
    return output

In [4]:
import math

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

class BinaryTanh(nn.Module):
    def __init__(self):
        super(BinaryTanh, self).__init__()
        self.hardtanh = nn.Hardtanh()

    def forward(self, input):
        output = self.hardtanh(input)
        output = binarize(output)
        return output

class BinaryLinear(nn.Linear):
    def forward(self, input):
        binary_weight = binarize(self.weight)
        if self.bias is None:
            return F.linear(input, binary_weight)
        else:
            return F.linear(input, binary_weight, self.bias)

    def reset_parameters(self):
        # Glorot initialization
        in_features, out_features = self.weight.size()
        stdv = math.sqrt(1.5 / (in_features + out_features))
        self.weight.data.uniform_(-stdv, stdv)
        if self.bias is not None:
            self.bias.data.zero_()

        self.weight.lr_scale = 1. / stdv

class SparseBinaryLinear(nn.Linear):
    def __init__(self, in_features, out_features, sparsity=0, bias=True, device=None, dtype=None):
        super(SparseBinaryLinear, self).__init__(in_features, out_features, bias, device, dtype)
        self.mask = torch.bernoulli(torch.ones_like(self.weight) * (1-sparsity))
        self.register_buffer('weight_mask_const', self.mask)
        print(self.mask.mean())

    def forward(self, input):
        input = binarize_activations(input)
        binary_weight = binarize(self.weight).mul(Variable(self.weight_mask_const))
        #print(self.weight_mask_const.mean(), binary_weight.mean())
        if self.bias is None:
            return F.linear(input, binary_weight)
        else:
            return F.linear(input, binary_weight, self.bias)

    def reset_parameters(self):
        # Glorot initialization
        in_features, out_features = self.weight.size()
        stdv = math.sqrt(1.5 / (in_features + out_features))
        self.weight.data.uniform_(-stdv, stdv)
        if self.bias is not None:
            self.bias.data.zero_()

        self.weight.lr_scale = 1. / stdv

class BinaryConv2d(nn.Conv2d):

    def forward(self, input):
        bw = binarize(self.weight)
        return F.conv2d(input, bw, self.bias, self.stride,
                               self.padding, self.dilation, self.groups)

    def reset_parameters(self):
        # Glorot initialization
        in_features = self.in_channels
        out_features = self.out_channels
        for k in self.kernel_size:
            in_features *= k
            out_features *= k
        stdv = math.sqrt(1.5 / (in_features + out_features))
        self.weight.data.uniform_(-stdv, stdv)
        if self.bias is not None:
            self.bias.data.zero_()

        self.weight.lr_scale = 1. / stdv


# class QuantizedBatchNorm1d(nn.BatchNorm1d):
#     def forward(self, input):
#         if self.momentum is None:
#             exponential_average_factor = 0.0
#         else:
#             exponential_average_factor = self.momentum

#         if self.training and self.track_running_stats:
#             # TODO: if statement only here to tell the jit to skip emitting this when it is None
#             if self.num_batches_tracked is not None:  # type: ignore[has-type]
#                 self.num_batches_tracked.add_(1)  # type: ignore[has-type]
#                 if self.momentum is None:  # use cumulative moving average
#                     exponential_average_factor = 1.0 / float(self.num_batches_tracked)
#                 else:  # use exponential moving average
#                     exponential_average_factor = self.momentum

#         if self.training:
#             bn_training = True
#         else:
#             bn_training = (self.running_mean is None) and (self.running_var is None)

#         return F.batch_norm(
#             input,
#             # If buffers are not to be tracked, ensure that they won't be updated
#             self.running_mean
#             if not self.training or self.track_running_stats
#             else None,
#             self.running_var if not self.training or self.track_running_stats else None,
#             quantize(self.weight),
#             self.bias,
#             bn_training,
#             exponential_average_factor,
#             self.eps,
#         )


In [5]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

MNIST_INPUT_RESOLUTION = 16

def load_data(config):
        data_dir = config['data_dir']

        transform = transforms.Compose([
                transforms.Resize((MNIST_INPUT_RESOLUTION, MNIST_INPUT_RESOLUTION)),
                transforms.Grayscale(),
                transforms.ToTensor(),
                transforms.Normalize((0,), (1,))])

        trainset = datasets.MNIST(data_dir, train=True, download=True, transform=transform)
        testset = datasets.MNIST(data_dir, train=False, download=True, transform=transform)

        return trainset, testset

In [6]:
import torch


class EarlyStopping_acc:
    """Early stops the training if test acc doesn't improve after a given patience."""
    def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt', trace_func=print):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement.
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
            path (str): Path for the checkpoint to be saved to.
                            Default: 'checkpoint.pt'
            trace_func (function): trace print function.
                            Default: print
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.test_loss_min = 0
        self.delta = delta
        self.path = path
        self.trace_func = trace_func
    def __call__(self, test_loss, model):

        score = test_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(test_loss, model)
        elif score <= self.best_score + self.delta:
            self.counter += 1
            self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
                self.counter = 0
        else:
            self.best_score = score
            self.save_checkpoint(test_loss, model)
            self.counter = 0

    def save_checkpoint(self, test_loss, model):
        '''Saves model when test acc increases.'''
        if self.verbose:
            self.trace_func(f'Test acc increased ({self.test_loss_min:.6f} --> {test_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), self.path)
        self.test_loss_min = test_loss

In [7]:
import torch
import snntorch as snn
from snntorch import functional as SF

def test_accuracy(config, net, testloader, device="cpu"):
    correct = 0
    total = 0
    with torch.no_grad():
        net.eval()
        for data in testloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)

            outputs, _ = net(images)
            accuracy = SF.accuracy_rate(outputs, labels)

            total += labels.size(0)
            correct += accuracy * labels.size(0)

    return 100 * correct / total

In [8]:
# exp relaxation implementation of THA based on Eq (4)

def thr_annealing(config, network):
    alpha_thr1 = config['alpha_thr1']
    alpha_thr2 = config['alpha_thr2']
    alpha_thr3 = config['alpha_thr3']

    ### to address conditional parameters, s.t. thr_final > threshold
    thr_final1 = config['thr_final1'] + config['threshold1']
    thr_final2 = config['thr_final2'] + config['threshold2']
    thr_final3 = config['thr_final3'] + config['threshold3']

    network.lif1.threshold += (thr_final1 - network.lif1.threshold) * alpha_thr1
    network.lif2.threshold += (thr_final2 - network.lif2.threshold) * alpha_thr2
    network.lif3.threshold += (thr_final3 - network.lif3.threshold) * alpha_thr3

    return

In [9]:
# snntorch
import snntorch as snn
from snntorch import spikegen
from snntorch import surrogate
from snntorch import functional as SF

# torch
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import torch.nn.functional as F
from torch.optim.lr_scheduler import StepLR

# misc
import os
import numpy as np
import math
import itertools
import matplotlib.pyplot as plt
import pandas as pd
import shutil
import time
from tqdm import tqdm

def train(config, net, epoch, trainloader, testloader, criterion, optimizer, scheduler, device):
    net.train()
    loss_accum = []
    lr_accum = []

    # TRAIN
    progress_bar = tqdm(trainloader)
    loss_current = None

    #for data, labels in trainloader:
    for data, labels in progress_bar:
        data, labels = data.to(device), labels.to(device)

        spk_rec2, _ = net(data)
        loss = criterion(spk_rec2, labels)
        optimizer.zero_grad()
        loss.backward()
        if loss_current is None:
            loss_current = loss.item()
        else:
            loss_current = 0.9 * loss_current + 0.1 * loss.item()
        progress_bar.set_description(f"loss: {loss_current:.4f}")

        if config['grad_clip']:
            nn.utils.clip_grad_norm_(net.parameters(), 1.0)
        if config['weight_clip']:
            with torch.no_grad():
                for param in net.parameters():
                    param.clamp_(-1, 1)

        optimizer.step()
        scheduler.step()
        thr_annealing(config, net)


        loss_accum.append(loss.item()/config['num_steps'])
        lr_accum.append(optimizer.param_groups[0]["lr"])


    return loss_accum, lr_accum

In [10]:
# snntorch
import snntorch as snn
from snntorch import spikegen
from snntorch import surrogate

# torch
import copy
import torch
import torch.nn as nn
from torch.utils.data import DataLoader

# misc
import numpy as np
import pandas as pd
import time
import logging

def run(config):
    print(config)
    file_name = config['exp_name']

    for trial in range(config['num_trials']):
        # file names
        SAVE_CSV = config['save_csv']
        SAVE_MODEL = config['save_model']
        csv_name = file_name + '_t' + str(trial) + '.csv'
        log_name = file_name + '_t' + str(trial) + '.log'
        model_name = file_name + '_t' + str(trial) + '.pt'
        num_epochs = config['num_epochs']
        torch.manual_seed(config['seed'])

        # dataframes
        df_train_loss = pd.DataFrame()
        df_test_acc = pd.DataFrame(columns=['epoch', 'test_acc', 'train_time'])
        df_lr = pd.DataFrame()

        # initialize network
        net = None
        net_desc = config['model']
        if net_desc in globals():
            klass = globals()[net_desc]
            net = klass(config)
        else:
            net = eval(net_desc)
        if trial == 0:
            print(net)
        device = "cpu"
        #device = "mps"
        if torch.cuda.is_available():
            device = "cuda:0"
            if torch.cuda.device_count() > 1:
                net = nn.DataParallel(net)
        net.to(device)

        # net params
        criterion = SF.mse_count_loss(correct_rate=config['correct_rate'], incorrect_rate=config['incorrect_rate'])
        optimizer, scheduler = optim_func(net, config)

        # early stopping condition
        if config['early_stopping']:
            early_stopping = EarlyStopping_acc(patience=config['patience'], verbose=True, path=model_name)
            early_stopping.early_stop = False
            early_stopping.best_score = None

        # load data
        trainset, testset = load_data(config)
        config['dataset_length'] = len(trainset)
        trainloader = DataLoader(trainset, batch_size=int(config["batch_size"]), shuffle=True)
        testloader = DataLoader(testset, batch_size=int(config["batch_size"]), shuffle=False)

        print(f"=======Trial: {trial}=======")

        for epoch in range(num_epochs):

            # train
            start_time = time.time()
            loss_list, lr_list = train(config, net, epoch, trainloader, testloader, criterion, optimizer, scheduler, device)
            epoch_time = time.time() - start_time

            # test
            test_acc = test_accuracy(config, net, testloader, device)
            print(f'Epoch: {epoch} \tTest Accuracy: {test_acc}')

            if config['df_lr']:
                df_lr = pd.concat([df_lr, pd.DataFrame(lr_list)])
            df_train_loss = pd.concat([df_train_loss, pd.DataFrame(loss_list)])
            test_data = pd.DataFrame([[epoch, test_acc, epoch_time]], columns = ['epoch', 'test_acc', 'train_time'])
            df_test_acc = pd.concat([df_test_acc, test_data])

            if SAVE_CSV:
                df_train_loss.to_csv('loss_' + csv_name, index=False)
                df_test_acc.to_csv('acc_' + csv_name, index=False)
                if config['df_lr']:
                    df_lr.to_csv('lr_' + csv_name, index=False)

            if config['early_stopping']:
                early_stopping(test_acc, net)

                if early_stopping.early_stop:
                    print("Early stopping")
                    early_stopping.early_stop = False
                    early_stopping.best_score = None
                    break

            if SAVE_MODEL and not config['early_stopping']:
                torch.save(net.state_dict(), model_name)

            if config['print_weights']:
                for name, param in net.named_parameters():
                    print(name, param)

            if config['post_quantize']:
                net_quantized = post_quantize(copy.deepcopy(net))
                test_acc = test_accuracy(config, net_quantized, testloader, device)
                print(f'Epoch: {epoch} \tTest Quantized Accuracy: {test_acc}')

        # net.load_state_dict(torch.load(model_name))


In [11]:
# snntorch
import snntorch as snn
from snntorch import surrogate

# torch
import torch
import torch.nn as nn
import torch.nn.functional as F

class NetConv(nn.Module):
    def __init__(self, config):
        super().__init__()

        self.thr1 = config['threshold1']
        self.thr2 = config['threshold2']
        self.thr3 = config['threshold3']
        slope = config['slope']
        beta = config['beta']
        self.num_steps = config['num_steps']
        self.batch_norm = config['enable_batch_norm']
        p1 = config['dropout1']
        self.binarize = config['binarize']
        self.binarize_input = config['binarize_input']
        self.bias = config['enable_bias']
        self.reset_mechanism = 'zero' if config['on_spike_reset_to_zero'] else 'subtract'

        spike_grad = surrogate.fast_sigmoid(slope)
        # Initialize layers with spike operator
        self.bconv1 = BinaryConv2d(1, 16, 5, bias=self.bias)
        self.conv1 = nn.Conv2d(1, 16, 5, bias=self.bias)
        self.conv1_bn = nn.BatchNorm2d(16)

        self.lif1 = snn.Leaky(beta, threshold=self.thr1, reset_mechanism=self.reset_mechanism, spike_grad=spike_grad)
        self.bconv2 = BinaryConv2d(16, 64, 5, bias=self.bias)
        self.conv2 = nn.Conv2d(16, 64, 5, bias=self.bias)
        self.conv2_bn = nn.BatchNorm2d(64)
        self.lif2 = snn.Leaky(beta, threshold=self.thr2, reset_mechanism=self.reset_mechanism, spike_grad=spike_grad)
        self.bfc1 = BinaryLinear(64 * 4 * 4, 10, bias=self.bias)
        self.fc1 = nn.Linear(64 * 4 * 4, 10, bias=self.bias)
        self.lif3 = snn.Leaky(beta, threshold=self.thr3, reset_mechanism=self.reset_mechanism, spike_grad=spike_grad)
        self.dropout = nn.Dropout(p1)

    def forward(self, x):

        # Initialize hidden states and outputs at t=0
        mem1 = self.lif1.init_leaky()
        mem2 = self.lif2.init_leaky()
        mem3 = self.lif3.init_leaky()

        # Record the final layer
        spk3_rec = []
        mem3_rec = []

        # Binarized
        if self.binarize:

            for step in range(self.num_steps):

                if self.binarize_input:
                    x = binarize_activations(x)
                cur1 = F.avg_pool2d(self.bconv1(x), 2)
                if self.batch_norm:
                    cur1 = self.conv1_bn(cur1)
                spk1, mem1 = self.lif1(cur1, mem1)
                cur2 = F.avg_pool2d(self.bconv2(spk1), 2)
                if self.batch_norm:
                    cur2 = self.conv2_bn(cur2)
                spk2, mem2 = self.lif2(cur2, mem2)
                cur3 = self.dropout(self.bfc1(spk2.flatten(1)))
                spk3, mem3 = self.lif3(cur3, mem3)

                spk3_rec.append(spk3)
                mem3_rec.append(mem3)

            return torch.stack(spk3_rec, dim=0), torch.stack(mem3_rec, dim=0)

        # Full Precision
        else:

            for step in range(self.num_steps):

                cur1 = F.avg_pool2d(self.conv1(x), 2)
                if self.batch_norm:
                    cur1 = self.conv1_bn(cur1)
                spk1, mem1 = self.lif1(cur1, mem1)
                cur2 = F.avg_pool2d(self.conv2(spk1), 2)
                if self.batch_norm:
                    cur2 = self.conv2_bn(cur2)
                spk2, mem2 = self.lif2(cur2, mem2)
                cur3 = self.dropout(self.fc1(spk2.flatten(1)))
                spk3, mem3 = self.lif3(cur3, mem3)

                spk3_rec.append(spk3)
                mem3_rec.append(mem3)

            return torch.stack(spk3_rec, dim=0), torch.stack(mem3_rec, dim=0)

class NetFC(nn.Module):
    def __init__(self, config, neurons=[256, 128], sparsity=[0.0, 0.0]):
        super().__init__()

        self.thr1 = config['threshold1']
        self.thr2 = config['threshold2']
        self.thr3 = config['threshold3']
        slope = config['slope']
        beta = config['beta']
        self.num_steps = config['num_steps']
        self.batch_norm = config['enable_batch_norm']
        p1 = config['dropout1']
        self.binarize = config['binarize']
        self.binarize_input = config['binarize_input']
        self.bias = config['enable_bias']
        self.reset_mechanism = 'zero' if config['on_spike_reset_to_zero'] else 'subtract'

        spike_grad = surrogate.fast_sigmoid(slope)
        # Initialize layers with spike operator
        if self.binarize:
            self.bfc1 = SparseBinaryLinear(MNIST_INPUT_RESOLUTION * MNIST_INPUT_RESOLUTION, neurons[0], sparsity[0], bias=(self.bias and not self.batch_norm))
        else:
            self.fc1 = nn.Linear(MNIST_INPUT_RESOLUTION * MNIST_INPUT_RESOLUTION, neurons[0], bias=(self.bias and not self.batch_norm))
        if self.batch_norm:
            self.bn1 = nn.BatchNorm1d(neurons[0])
        self.lif1 = snn.Leaky(beta, threshold=self.thr1, reset_mechanism=self.reset_mechanism, spike_grad=spike_grad)

        if self.binarize:
            self.bfc2 = SparseBinaryLinear(neurons[0], neurons[1], sparsity[1], bias=(self.bias and not self.batch_norm))
        else:
            self.fc2 = nn.Linear(neurons[0], neurons[1], bias=(self.bias and not self.batch_norm))
        if self.batch_norm:
            self.bn2 = nn.BatchNorm1d(neurons[1])
            #self.bn2 = QuantizedBatchNorm1d(neurons[1])
        self.lif2 = snn.Leaky(beta, threshold=self.thr2, reset_mechanism=self.reset_mechanism, spike_grad=spike_grad)

        if self.binarize:
            self.bfc3 = BinaryLinear(neurons[1], 10, bias=self.bias)
        else:
            self.fc3 = nn.Linear(neurons[1], 10, bias=self.bias)
        self.lif3 = snn.Leaky(beta, threshold=self.thr3, reset_mechanism=self.reset_mechanism, spike_grad=spike_grad)
        self.dropout = nn.Dropout(p1)

    def forward(self, x):

        # Initialize hidden states and outputs at t=0
        mem1 = self.lif1.init_leaky()
        mem2 = self.lif2.init_leaky()
        mem3 = self.lif3.init_leaky()

        # Record the final layer
        spk3_rec = []
        mem3_rec = []

        # Binarized
        if self.binarize:
            for step in range(self.num_steps):

                x = x.flatten(1)
                if self.binarize_input:
                    x = binarize_activations(x)
                cur1 = self.bfc1(x)
                if self.batch_norm:
                   cur1 = self.bn1(cur1)
                spk1, mem1 = self.lif1(cur1, mem1)
                cur2 = self.bfc2(spk1)
                if self.batch_norm:
                    cur2 = self.bn2(cur2)
                spk2, mem2 = self.lif2(cur2, mem2)
                cur3 = self.dropout(self.bfc3(spk2))
                spk3, mem3 = self.lif3(cur3, mem3)

                spk3_rec.append(spk3)
                mem3_rec.append(mem3)

            return torch.stack(spk3_rec, dim=0), torch.stack(mem3_rec, dim=0)

        # Full Precision
        else:

            for step in range(self.num_steps):

                cur1 = self.fc1(x.flatten(1))
                if self.batch_norm:
                    cur1 = self.bn1(cur1)
                spk1, mem1 = self.lif1(cur1, mem1)
                cur2 = self.fc2(spk1)
                if self.batch_norm:
                    cur2 = self.bn2(cur2)
                spk2, mem2 = self.lif2(cur2, mem2)
                cur3 = self.dropout(self.fc3(spk2))
                spk3, mem3 = self.lif3(cur3, mem3)

                spk3_rec.append(spk3)
                mem3_rec.append(mem3)

            return torch.stack(spk3_rec, dim=0), torch.stack(mem3_rec, dim=0)

class NetFC_FirstConv(nn.Module):
    def __init__(self, config, neurons=[16, 256]):
        super().__init__()

        self.thr1 = config['threshold1']
        self.thr2 = config['threshold2']
        self.thr3 = config['threshold3']
        slope = config['slope']

        beta = config['beta']
        self.num_steps = config['num_steps']
        self.batch_norm = config['enable_batch_norm']
        p1 = config['dropout1']
        self.binarize = config['binarize']
        self.binarize_input = config['binarize_input']
        self.bias = config['enable_bias']
        self.reset_mechanism = 'zero' if config['on_spike_reset_to_zero'] else 'subtract'

        spike_grad = surrogate.fast_sigmoid(slope)
        # Initialize layers with spike operator
        self.bconv1 = BinaryConv2d(1, neurons[0], 5, bias=self.bias)
        self.conv1 = nn.Conv2d(1, neurons[0], 5, bias=self.bias)
        self.conv1_bn = nn.BatchNorm2d(neurons[0])
        self.lif1 = snn.Leaky(beta, threshold=self.thr1, reset_mechanism=self.reset_mechanism, spike_grad=spike_grad)

        n = ((28-(5//2))/2)**2
        print(n)
        self.bfc2 = BinaryLinear(n, neurons[1], bias=self.bias)
        self.fc2 = nn.Linear(n, neurons[1], bias=self.bias)
        self.bn2 = nn.BatchNorm1d(neurons[1])
        self.lif2 = snn.Leaky(beta, threshold=self.thr2, reset_mechanism=self.reset_mechanism, spike_grad=spike_grad)

        self.bfc3 = BinaryLinear(neurons[1], 10, bias=self.bias)
        self.fc3 = nn.Linear(neurons[1], 10, bias=self.bias)
        self.lif3 = snn.Leaky(beta, threshold=self.thr3, reset_mechanism=self.reset_mechanism, spike_grad=spike_grad)
        self.dropout = nn.Dropout(p1)

    def forward(self, x):

        # Initialize hidden states and outputs at t=0
        mem1 = self.lif1.init_leaky()
        mem2 = self.lif2.init_leaky()
        mem3 = self.lif3.init_leaky()

        # Record the final layer
        spk3_rec = []
        mem3_rec = []

        # Binarized
        if self.binarize:

            for step in range(self.num_steps):

                if self.binarize_input:
                    x = binarize_activations(x)
                cur1 = F.avg_pool2d(self.bconv1(x), 2)
                if self.batch_norm:
                    cur1 = self.conv1_bn(cur1)
                spk1, mem1 = self.lif1(cur1, mem1)
                spk2, mem2 = self.lif2(self.bfc2(spk1.flatten(1)), mem2)
                if self.batch_norm:
                    spk2 = self.bn2(spk2)
                cur3 = self.dropout(self.bfc3(spk2))
                spk3, mem3 = self.lif3(cur3, mem3)

                spk3_rec.append(spk3)
                mem3_rec.append(mem3)

            return torch.stack(spk3_rec, dim=0), torch.stack(mem3_rec, dim=0)

        # Full Precision
        else:

            for step in range(self.num_steps):

                cur1 = F.avg_pool2d(self.conv1(x), 2)
                if self.batch_norm:
                    cur1 = self.conv1_bn(cur1)
                spk1, mem1 = self.lif1(cur1, mem1)
                spk2, mem2 = self.lif2(self.fc2(spk1.flatten(1)), mem2)
                if self.batch_norm:
                    spk2 = self.bn2(spk2)
                cur3 = self.dropout(self.fc3(spk2))
                spk3, mem3 = self.lif3(cur3, mem3)

                spk3_rec.append(spk3)
                mem3_rec.append(mem3)

            return torch.stack(spk3_rec, dim=0), torch.stack(mem3_rec, dim=0)

In [12]:
def quantize(input):
    #0.5, 0.75, 1, 1.5, 2, 3, 4, 6, 8
    output = input.new(input.size())
    output[input < (6.0+8.0)/2] = 8
    output[input < (4.0+6.0)/2] = 6
    output[input < (4.0+3.0)/2] = 4
    output[input < (3.0+4.0)/2] = 3
    output[input < (2.0+3.0)/2] = 2
    output[input < (1.5+2.0)/2] = 1.5
    output[input < (1.0+1.5)/2] = 1.0
    output[input < (0.75+1.0)/2] = 0.75
    output[input < (0.5+0.75)/2] = 0.5
    #output = input
    #print(output)
    return output
    #return input
    #return torch.ones_like(input)
    #return torch.round(torch.maximum(input, torch.ones_like(input)))


class QuantizedBatchNorm1d(nn.BatchNorm1d):
    def forward(self, input):
        if self.momentum is None:
            exponential_average_factor = 0.0
        else:
            exponential_average_factor = self.momentum

        if self.training and self.track_running_stats:
            # TODO: if statement only here to tell the jit to skip emitting this when it is None
            if self.num_batches_tracked is not None:  # type: ignore[has-type]
                self.num_batches_tracked.add_(1)  # type: ignore[has-type]
                if self.momentum is None:  # use cumulative moving average
                    exponential_average_factor = 1.0 / float(self.num_batches_tracked)
                else:  # use exponential moving average
                    exponential_average_factor = self.momentum

        if self.training:
            bn_training = True
        else:
            bn_training = (self.running_mean is None) and (self.running_var is None)

        F.batch_norm(
            input,
            # If buffers are not to be tracked, ensure that they won't be updated
            self.running_mean
            if not self.training or self.track_running_stats
            else None,
            self.running_var if not self.training or self.track_running_stats else None,
            self.weight,
            self.bias,
            bn_training,
            exponential_average_factor,
            self.eps,
        )

        return input.clone() * quantize(self.weight.clone()) + self.bias


In [13]:
def post_quantize(model):
    print("post quantize model")
    with torch.no_grad():
        #model.bfc1.weight = nn.Parameter(binarize(model.bfc1.weight) * model.bfc1.mask)
        for child in model.children():
            if type(child) == SparseBinaryLinear:
                print("sfc", child)
                child.weight = nn.Parameter(binarize(child.weight).to(child.weight.device) * child.mask.to(child.weight.device))
            if type(child) == BinaryLinear:
                print("bfc", child)
                child.weight = nn.Parameter(binarize(child.weight).to(child.weight.device))
            if type(child) == nn.BatchNorm1d:
                print("qbn", child)
                child.weight = nn.Parameter(quantize(child.weight).to(child.weight.device))
    # for n, p in model.named_parameters():
    #     print(n, p)
    return model
#net = NetFC(config)
#post_quantize(net)

In [14]:
cfg = config.copy()
cfg['model'] = 'NetFC'
#cfg['num_steps'] = 100
cfg['enable_batch_norm'] = True
cfg['enable_bias'] = True
#cfg['print_weights'] = True
run(cfg)

{'model': 'NetFC', 'exp_name': 'mnist_tha', 'num_trials': 5, 'num_epochs': 50, 'binarize': True, 'binarize_input': True, 'post_quantize': True, 'enable_bias': True, 'enable_batch_norm': True, 'enable_dropout': True, 'enable_threshold': True, 'enable_slope': True, 'on_spike_reset_to_zero': False, 'data_dir': '~/data/mnist', 'batch_size': 128, 'seed': 0, 'num_workers': 0, 'save_csv': True, 'save_model': True, 'early_stopping': True, 'patience': 100, 'grad_clip': False, 'weight_clip': False, 'dropout1': 0.02856, 'beta': 0.992187, 'lr': 0.00997, 'slope': 10.22, 'threshold1': 11.666, 'alpha_thr1': 0.024, 'thr_final1': 4.317, 'threshold2': 14.105, 'alpha_thr2': 0.119, 'thr_final2': 16.29, 'threshold3': 0.6656, 'alpha_thr3': 0.0011, 'thr_final3': 3.496, 'num_steps': 100, 'correct_rate': 0.8, 'incorrect_rate': 0.2, 'betas': (0.9, 0.999), 't_0': 4688, 'eta_min': 0, 'df_lr': True, 'print_weights': False}
tensor(1.)
tensor(1.)
NetFC(
  (bfc1): SparseBinaryLinear(in_features=256, out_features=256,

loss: 1.2687: 100%|██████████| 469/469 [02:26<00:00,  3.20it/s]


Epoch: 0 	Test Accuracy: 87.33
Test acc increased (0.000000 --> 87.330000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 0 	Test Quantized Accuracy: 87.39


loss: 1.2105: 100%|██████████| 469/469 [02:24<00:00,  3.24it/s]


Epoch: 1 	Test Accuracy: 88.67
Test acc increased (87.330000 --> 88.670000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 1 	Test Quantized Accuracy: 88.57


loss: 1.2066: 100%|██████████| 469/469 [02:26<00:00,  3.21it/s]


Epoch: 2 	Test Accuracy: 88.4
EarlyStopping counter: 1 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 2 	Test Quantized Accuracy: 86.18


loss: 1.2146: 100%|██████████| 469/469 [02:25<00:00,  3.22it/s]


Epoch: 3 	Test Accuracy: 88.34
EarlyStopping counter: 2 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 3 	Test Quantized Accuracy: 87.87


loss: 1.1451: 100%|██████████| 469/469 [02:24<00:00,  3.25it/s]


Epoch: 4 	Test Accuracy: 89.03
Test acc increased (88.670000 --> 89.030000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 4 	Test Quantized Accuracy: 88.14


loss: 1.1446: 100%|██████████| 469/469 [02:21<00:00,  3.32it/s]


Epoch: 5 	Test Accuracy: 89.56
Test acc increased (89.030000 --> 89.560000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 5 	Test Quantized Accuracy: 89.18


loss: 1.1457: 100%|██████████| 469/469 [02:20<00:00,  3.35it/s]


Epoch: 6 	Test Accuracy: 90.3
Test acc increased (89.560000 --> 90.300000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 6 	Test Quantized Accuracy: 89.97


loss: 1.1056: 100%|██████████| 469/469 [02:20<00:00,  3.33it/s]


Epoch: 7 	Test Accuracy: 90.55
Test acc increased (90.300000 --> 90.550000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 7 	Test Quantized Accuracy: 90.35


loss: 1.0908: 100%|██████████| 469/469 [02:20<00:00,  3.34it/s]


Epoch: 8 	Test Accuracy: 90.54
EarlyStopping counter: 1 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 8 	Test Quantized Accuracy: 89.73


loss: 0.9753: 100%|██████████| 469/469 [02:19<00:00,  3.35it/s]


Epoch: 9 	Test Accuracy: 90.8
Test acc increased (90.550000 --> 90.800000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 9 	Test Quantized Accuracy: 90.35


loss: 1.0746: 100%|██████████| 469/469 [02:21<00:00,  3.31it/s]


Epoch: 10 	Test Accuracy: 90.64
EarlyStopping counter: 1 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 10 	Test Quantized Accuracy: 90.2


loss: 1.0933: 100%|██████████| 469/469 [02:20<00:00,  3.33it/s]


Epoch: 11 	Test Accuracy: 90.15
EarlyStopping counter: 2 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 11 	Test Quantized Accuracy: 89.42


loss: 1.1009: 100%|██████████| 469/469 [02:19<00:00,  3.35it/s]


Epoch: 12 	Test Accuracy: 90.42
EarlyStopping counter: 3 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 12 	Test Quantized Accuracy: 89.92


loss: 1.1445: 100%|██████████| 469/469 [02:20<00:00,  3.33it/s]


Epoch: 13 	Test Accuracy: 89.96
EarlyStopping counter: 4 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 13 	Test Quantized Accuracy: 89.48


loss: 1.1570: 100%|██████████| 469/469 [02:20<00:00,  3.33it/s]


Epoch: 14 	Test Accuracy: 89.66
EarlyStopping counter: 5 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 14 	Test Quantized Accuracy: 89.12


loss: 1.1836: 100%|██████████| 469/469 [02:19<00:00,  3.36it/s]


Epoch: 15 	Test Accuracy: 89.8
EarlyStopping counter: 6 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 15 	Test Quantized Accuracy: 88.58


loss: 1.1137: 100%|██████████| 469/469 [02:20<00:00,  3.34it/s]


Epoch: 16 	Test Accuracy: 89.24
EarlyStopping counter: 7 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 16 	Test Quantized Accuracy: 87.53


loss: 1.1437: 100%|██████████| 469/469 [02:19<00:00,  3.35it/s]


Epoch: 17 	Test Accuracy: 90.53
EarlyStopping counter: 8 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 17 	Test Quantized Accuracy: 89.9


loss: 1.1271: 100%|██████████| 469/469 [02:19<00:00,  3.36it/s]


Epoch: 18 	Test Accuracy: 91.27
Test acc increased (90.800000 --> 91.270000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 18 	Test Quantized Accuracy: 88.87


loss: 1.1404: 100%|██████████| 469/469 [02:19<00:00,  3.37it/s]


Epoch: 19 	Test Accuracy: 91.09
EarlyStopping counter: 1 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 19 	Test Quantized Accuracy: 87.09


loss: 1.1291: 100%|██████████| 469/469 [02:21<00:00,  3.32it/s]


Epoch: 20 	Test Accuracy: 90.79
EarlyStopping counter: 2 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 20 	Test Quantized Accuracy: 81.24


loss: 1.0890: 100%|██████████| 469/469 [02:20<00:00,  3.34it/s]


Epoch: 21 	Test Accuracy: 91.0
EarlyStopping counter: 3 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 21 	Test Quantized Accuracy: 68.38


loss: 1.1359: 100%|██████████| 469/469 [02:23<00:00,  3.27it/s]


Epoch: 22 	Test Accuracy: 90.85
EarlyStopping counter: 4 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 22 	Test Quantized Accuracy: 63.93


loss: 1.1031: 100%|██████████| 469/469 [02:27<00:00,  3.17it/s]


Epoch: 23 	Test Accuracy: 90.7
EarlyStopping counter: 5 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 23 	Test Quantized Accuracy: 59.7


loss: 1.1427: 100%|██████████| 469/469 [02:28<00:00,  3.15it/s]


Epoch: 24 	Test Accuracy: 91.27
EarlyStopping counter: 6 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 24 	Test Quantized Accuracy: 55.47


loss: 1.0961: 100%|██████████| 469/469 [02:30<00:00,  3.11it/s]


Epoch: 25 	Test Accuracy: 91.49
Test acc increased (91.270000 --> 91.490000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 25 	Test Quantized Accuracy: 54.96


loss: 1.0844: 100%|██████████| 469/469 [02:29<00:00,  3.14it/s]


Epoch: 26 	Test Accuracy: 91.9
Test acc increased (91.490000 --> 91.900000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 26 	Test Quantized Accuracy: 44.46


loss: 1.0089: 100%|██████████| 469/469 [02:27<00:00,  3.19it/s]


Epoch: 27 	Test Accuracy: 91.94
Test acc increased (91.900000 --> 91.940000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 27 	Test Quantized Accuracy: 44.93


loss: 1.0664: 100%|██████████| 469/469 [02:22<00:00,  3.30it/s]


Epoch: 28 	Test Accuracy: 91.55
EarlyStopping counter: 1 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 28 	Test Quantized Accuracy: 43.16


loss: 0.9506: 100%|██████████| 469/469 [02:18<00:00,  3.38it/s]


Epoch: 29 	Test Accuracy: 91.9
EarlyStopping counter: 2 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 29 	Test Quantized Accuracy: 45.84


loss: 1.0771: 100%|██████████| 469/469 [02:23<00:00,  3.27it/s]


Epoch: 30 	Test Accuracy: 91.48
EarlyStopping counter: 3 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 30 	Test Quantized Accuracy: 60.98


loss: 1.0530: 100%|██████████| 469/469 [02:24<00:00,  3.25it/s]


Epoch: 31 	Test Accuracy: 91.64
EarlyStopping counter: 4 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 31 	Test Quantized Accuracy: 44.08


loss: 1.0921: 100%|██████████| 469/469 [02:24<00:00,  3.24it/s]


Epoch: 32 	Test Accuracy: 91.62
EarlyStopping counter: 5 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 32 	Test Quantized Accuracy: 45.86


loss: 1.1548: 100%|██████████| 469/469 [02:23<00:00,  3.27it/s]


Epoch: 33 	Test Accuracy: 91.1
EarlyStopping counter: 6 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 33 	Test Quantized Accuracy: 40.58


loss: 1.1507: 100%|██████████| 469/469 [02:21<00:00,  3.32it/s]


Epoch: 34 	Test Accuracy: 91.96
Test acc increased (91.940000 --> 91.960000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 34 	Test Quantized Accuracy: 43.58


loss: 1.0757: 100%|██████████| 469/469 [02:20<00:00,  3.33it/s]


Epoch: 35 	Test Accuracy: 91.91
EarlyStopping counter: 1 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 35 	Test Quantized Accuracy: 43.03


loss: 1.1368: 100%|██████████| 469/469 [02:22<00:00,  3.29it/s]


Epoch: 36 	Test Accuracy: 91.77
EarlyStopping counter: 2 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 36 	Test Quantized Accuracy: 46.63


loss: 1.0848: 100%|██████████| 469/469 [02:21<00:00,  3.31it/s]


Epoch: 37 	Test Accuracy: 91.8
EarlyStopping counter: 3 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 37 	Test Quantized Accuracy: 27.07


loss: 1.1265: 100%|██████████| 469/469 [02:22<00:00,  3.30it/s]


Epoch: 38 	Test Accuracy: 91.95
EarlyStopping counter: 4 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 38 	Test Quantized Accuracy: 21.67


loss: 1.1046: 100%|██████████| 469/469 [02:21<00:00,  3.32it/s]


Epoch: 39 	Test Accuracy: 91.97
Test acc increased (91.960000 --> 91.970000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 39 	Test Quantized Accuracy: 38.42


loss: 1.0895: 100%|██████████| 469/469 [02:21<00:00,  3.30it/s]


Epoch: 40 	Test Accuracy: 92.24
Test acc increased (91.970000 --> 92.240000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 40 	Test Quantized Accuracy: 26.86


loss: 1.1452: 100%|██████████| 469/469 [02:28<00:00,  3.15it/s]


Epoch: 41 	Test Accuracy: 91.69
EarlyStopping counter: 1 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 41 	Test Quantized Accuracy: 24.19


loss: 1.1178: 100%|██████████| 469/469 [02:30<00:00,  3.12it/s]


Epoch: 42 	Test Accuracy: 91.78
EarlyStopping counter: 2 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 42 	Test Quantized Accuracy: 18.91


loss: 1.1195: 100%|██████████| 469/469 [02:30<00:00,  3.11it/s]


Epoch: 43 	Test Accuracy: 92.18
EarlyStopping counter: 3 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 43 	Test Quantized Accuracy: 28.25


loss: 1.0643: 100%|██████████| 469/469 [02:30<00:00,  3.12it/s]


Epoch: 44 	Test Accuracy: 92.0
EarlyStopping counter: 4 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 44 	Test Quantized Accuracy: 24.21


loss: 1.1164: 100%|██████████| 469/469 [02:30<00:00,  3.12it/s]


Epoch: 45 	Test Accuracy: 91.9
EarlyStopping counter: 5 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 45 	Test Quantized Accuracy: 26.81


loss: 1.1084: 100%|██████████| 469/469 [02:31<00:00,  3.09it/s]


Epoch: 46 	Test Accuracy: 91.81
EarlyStopping counter: 6 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 46 	Test Quantized Accuracy: 16.98


loss: 1.0956: 100%|██████████| 469/469 [02:30<00:00,  3.11it/s]


Epoch: 47 	Test Accuracy: 92.2
EarlyStopping counter: 7 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 47 	Test Quantized Accuracy: 21.7


loss: 1.0251: 100%|██████████| 469/469 [02:30<00:00,  3.12it/s]


Epoch: 48 	Test Accuracy: 91.68
EarlyStopping counter: 8 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 48 	Test Quantized Accuracy: 21.66


loss: 0.9180: 100%|██████████| 469/469 [02:32<00:00,  3.08it/s]


Epoch: 49 	Test Accuracy: 92.73
Test acc increased (92.240000 --> 92.730000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 49 	Test Quantized Accuracy: 21.72
tensor(1.)
tensor(1.)


loss: 1.2687: 100%|██████████| 469/469 [02:31<00:00,  3.10it/s]


Epoch: 0 	Test Accuracy: 87.33
Test acc increased (0.000000 --> 87.330000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 0 	Test Quantized Accuracy: 87.39


loss: 1.2105: 100%|██████████| 469/469 [02:30<00:00,  3.11it/s]


Epoch: 1 	Test Accuracy: 88.67
Test acc increased (87.330000 --> 88.670000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 1 	Test Quantized Accuracy: 88.57


loss: 1.2066: 100%|██████████| 469/469 [02:32<00:00,  3.08it/s]


Epoch: 2 	Test Accuracy: 88.4
EarlyStopping counter: 1 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 2 	Test Quantized Accuracy: 86.18


loss: 1.2146: 100%|██████████| 469/469 [02:31<00:00,  3.10it/s]


Epoch: 3 	Test Accuracy: 88.34
EarlyStopping counter: 2 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 3 	Test Quantized Accuracy: 87.87


loss: 1.1451: 100%|██████████| 469/469 [02:31<00:00,  3.11it/s]


Epoch: 4 	Test Accuracy: 89.03
Test acc increased (88.670000 --> 89.030000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 4 	Test Quantized Accuracy: 88.14


loss: 1.1446: 100%|██████████| 469/469 [02:31<00:00,  3.10it/s]


Epoch: 5 	Test Accuracy: 89.56
Test acc increased (89.030000 --> 89.560000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 5 	Test Quantized Accuracy: 89.18


loss: 1.1457: 100%|██████████| 469/469 [02:30<00:00,  3.12it/s]


Epoch: 6 	Test Accuracy: 90.3
Test acc increased (89.560000 --> 90.300000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 6 	Test Quantized Accuracy: 89.97


loss: 1.1056: 100%|██████████| 469/469 [02:20<00:00,  3.35it/s]


Epoch: 7 	Test Accuracy: 90.55
Test acc increased (90.300000 --> 90.550000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 7 	Test Quantized Accuracy: 90.35


loss: 1.0908: 100%|██████████| 469/469 [02:20<00:00,  3.34it/s]


Epoch: 8 	Test Accuracy: 90.54
EarlyStopping counter: 1 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 8 	Test Quantized Accuracy: 89.73


loss: 0.9753: 100%|██████████| 469/469 [02:19<00:00,  3.35it/s]


Epoch: 9 	Test Accuracy: 90.8
Test acc increased (90.550000 --> 90.800000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 9 	Test Quantized Accuracy: 90.35


loss: 1.0746: 100%|██████████| 469/469 [02:20<00:00,  3.33it/s]


Epoch: 10 	Test Accuracy: 90.64
EarlyStopping counter: 1 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 10 	Test Quantized Accuracy: 90.2


loss: 1.0933: 100%|██████████| 469/469 [02:20<00:00,  3.34it/s]


Epoch: 11 	Test Accuracy: 90.15
EarlyStopping counter: 2 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 11 	Test Quantized Accuracy: 89.42


loss: 1.1009: 100%|██████████| 469/469 [02:21<00:00,  3.30it/s]


Epoch: 12 	Test Accuracy: 90.42
EarlyStopping counter: 3 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 12 	Test Quantized Accuracy: 89.92


loss: 1.1445: 100%|██████████| 469/469 [02:23<00:00,  3.28it/s]


Epoch: 13 	Test Accuracy: 89.96
EarlyStopping counter: 4 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 13 	Test Quantized Accuracy: 89.48


loss: 1.1570: 100%|██████████| 469/469 [02:21<00:00,  3.31it/s]


Epoch: 14 	Test Accuracy: 89.66
EarlyStopping counter: 5 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 14 	Test Quantized Accuracy: 89.12


loss: 1.1836: 100%|██████████| 469/469 [02:21<00:00,  3.30it/s]


Epoch: 15 	Test Accuracy: 89.8
EarlyStopping counter: 6 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 15 	Test Quantized Accuracy: 88.68


loss: 1.1137: 100%|██████████| 469/469 [02:21<00:00,  3.31it/s]


Epoch: 16 	Test Accuracy: 89.24
EarlyStopping counter: 7 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 16 	Test Quantized Accuracy: 87.53


loss: 1.1437: 100%|██████████| 469/469 [02:21<00:00,  3.32it/s]


Epoch: 17 	Test Accuracy: 90.53
EarlyStopping counter: 8 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 17 	Test Quantized Accuracy: 89.9


loss: 1.1271: 100%|██████████| 469/469 [02:20<00:00,  3.33it/s]


Epoch: 18 	Test Accuracy: 91.27
Test acc increased (90.800000 --> 91.270000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 18 	Test Quantized Accuracy: 88.75


loss: 1.1404: 100%|██████████| 469/469 [02:19<00:00,  3.36it/s]


Epoch: 19 	Test Accuracy: 91.09
EarlyStopping counter: 1 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 19 	Test Quantized Accuracy: 87.09


loss: 1.1291: 100%|██████████| 469/469 [02:19<00:00,  3.36it/s]


Epoch: 20 	Test Accuracy: 90.79
EarlyStopping counter: 2 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 20 	Test Quantized Accuracy: 82.54


loss: 1.0890: 100%|██████████| 469/469 [02:20<00:00,  3.34it/s]


Epoch: 21 	Test Accuracy: 91.0
EarlyStopping counter: 3 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 21 	Test Quantized Accuracy: 68.38


loss: 1.1359: 100%|██████████| 469/469 [02:19<00:00,  3.36it/s]


Epoch: 22 	Test Accuracy: 90.85
EarlyStopping counter: 4 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 22 	Test Quantized Accuracy: 66.73


loss: 1.1031: 100%|██████████| 469/469 [02:19<00:00,  3.36it/s]


Epoch: 23 	Test Accuracy: 90.7
EarlyStopping counter: 5 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 23 	Test Quantized Accuracy: 71.75


loss: 1.1427: 100%|██████████| 469/469 [02:19<00:00,  3.35it/s]


Epoch: 24 	Test Accuracy: 91.27
EarlyStopping counter: 6 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 24 	Test Quantized Accuracy: 52.79


loss: 1.0961: 100%|██████████| 469/469 [02:20<00:00,  3.33it/s]


Epoch: 25 	Test Accuracy: 91.49
Test acc increased (91.270000 --> 91.490000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 25 	Test Quantized Accuracy: 52.18


loss: 1.0844: 100%|██████████| 469/469 [02:20<00:00,  3.34it/s]


Epoch: 26 	Test Accuracy: 91.9
Test acc increased (91.490000 --> 91.900000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 26 	Test Quantized Accuracy: 41.95


loss: 1.0089: 100%|██████████| 469/469 [02:20<00:00,  3.33it/s]


Epoch: 27 	Test Accuracy: 91.94
Test acc increased (91.900000 --> 91.940000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 27 	Test Quantized Accuracy: 44.93


loss: 1.0664: 100%|██████████| 469/469 [02:21<00:00,  3.31it/s]


Epoch: 28 	Test Accuracy: 91.55
EarlyStopping counter: 1 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 28 	Test Quantized Accuracy: 41.02


loss: 0.9506: 100%|██████████| 469/469 [02:20<00:00,  3.34it/s]


Epoch: 29 	Test Accuracy: 91.9
EarlyStopping counter: 2 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 29 	Test Quantized Accuracy: 45.84


loss: 1.0771: 100%|██████████| 469/469 [02:23<00:00,  3.27it/s]


Epoch: 30 	Test Accuracy: 91.48
EarlyStopping counter: 3 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 30 	Test Quantized Accuracy: 48.78


loss: 1.0530: 100%|██████████| 469/469 [02:29<00:00,  3.15it/s]


Epoch: 31 	Test Accuracy: 91.64
EarlyStopping counter: 4 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 31 	Test Quantized Accuracy: 41.29


loss: 1.0921: 100%|██████████| 469/469 [02:28<00:00,  3.15it/s]


Epoch: 32 	Test Accuracy: 91.62
EarlyStopping counter: 5 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 32 	Test Quantized Accuracy: 42.69


loss: 1.1548: 100%|██████████| 469/469 [02:23<00:00,  3.26it/s]


Epoch: 33 	Test Accuracy: 91.1
EarlyStopping counter: 6 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 33 	Test Quantized Accuracy: 38.54


loss: 1.1507: 100%|██████████| 469/469 [02:20<00:00,  3.33it/s]


Epoch: 34 	Test Accuracy: 91.96
Test acc increased (91.940000 --> 91.960000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 34 	Test Quantized Accuracy: 41.15


loss: 1.0757: 100%|██████████| 469/469 [02:20<00:00,  3.33it/s]


Epoch: 35 	Test Accuracy: 91.91
EarlyStopping counter: 1 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 35 	Test Quantized Accuracy: 43.03


loss: 1.1368: 100%|██████████| 469/469 [02:18<00:00,  3.39it/s]


Epoch: 36 	Test Accuracy: 91.77
EarlyStopping counter: 2 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 36 	Test Quantized Accuracy: 29.96


loss: 1.0848: 100%|██████████| 469/469 [02:20<00:00,  3.35it/s]


Epoch: 37 	Test Accuracy: 91.8
EarlyStopping counter: 3 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 37 	Test Quantized Accuracy: 27.07


loss: 1.1265: 100%|██████████| 469/469 [02:22<00:00,  3.30it/s]


Epoch: 38 	Test Accuracy: 91.95
EarlyStopping counter: 4 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 38 	Test Quantized Accuracy: 23.06


loss: 1.1046: 100%|██████████| 469/469 [02:27<00:00,  3.18it/s]


Epoch: 39 	Test Accuracy: 91.97
Test acc increased (91.960000 --> 91.970000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 39 	Test Quantized Accuracy: 38.42


loss: 1.0895: 100%|██████████| 469/469 [02:27<00:00,  3.18it/s]


Epoch: 40 	Test Accuracy: 92.24
Test acc increased (91.970000 --> 92.240000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 40 	Test Quantized Accuracy: 26.86


loss: 1.1452: 100%|██████████| 469/469 [02:27<00:00,  3.17it/s]


Epoch: 41 	Test Accuracy: 91.69
EarlyStopping counter: 1 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 41 	Test Quantized Accuracy: 25.86


loss: 1.1178: 100%|██████████| 469/469 [02:28<00:00,  3.16it/s]


Epoch: 42 	Test Accuracy: 91.78
EarlyStopping counter: 2 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 42 	Test Quantized Accuracy: 27.64


loss: 1.1195: 100%|██████████| 469/469 [02:27<00:00,  3.18it/s]


Epoch: 43 	Test Accuracy: 92.18
EarlyStopping counter: 3 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 43 	Test Quantized Accuracy: 28.25


loss: 1.0643: 100%|██████████| 469/469 [02:27<00:00,  3.17it/s]


Epoch: 44 	Test Accuracy: 92.0
EarlyStopping counter: 4 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 44 	Test Quantized Accuracy: 26.61


loss: 1.1164: 100%|██████████| 469/469 [02:29<00:00,  3.14it/s]


Epoch: 45 	Test Accuracy: 91.9
EarlyStopping counter: 5 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 45 	Test Quantized Accuracy: 26.81


loss: 1.1084: 100%|██████████| 469/469 [02:27<00:00,  3.18it/s]


Epoch: 46 	Test Accuracy: 91.81
EarlyStopping counter: 6 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 46 	Test Quantized Accuracy: 19.72


loss: 1.0956: 100%|██████████| 469/469 [02:27<00:00,  3.18it/s]


Epoch: 47 	Test Accuracy: 92.2
EarlyStopping counter: 7 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 47 	Test Quantized Accuracy: 21.7


loss: 1.0251: 100%|██████████| 469/469 [02:27<00:00,  3.18it/s]


Epoch: 48 	Test Accuracy: 91.68
EarlyStopping counter: 8 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 48 	Test Quantized Accuracy: 21.66


loss: 0.9180: 100%|██████████| 469/469 [02:28<00:00,  3.16it/s]


Epoch: 49 	Test Accuracy: 92.73
Test acc increased (92.240000 --> 92.730000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 49 	Test Quantized Accuracy: 21.72
tensor(1.)
tensor(1.)


loss: 1.2687: 100%|██████████| 469/469 [02:29<00:00,  3.13it/s]


Epoch: 0 	Test Accuracy: 87.33
Test acc increased (0.000000 --> 87.330000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 0 	Test Quantized Accuracy: 87.39


loss: 1.2105: 100%|██████████| 469/469 [02:30<00:00,  3.12it/s]


Epoch: 1 	Test Accuracy: 88.67
Test acc increased (87.330000 --> 88.670000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 1 	Test Quantized Accuracy: 88.57


loss: 1.2066: 100%|██████████| 469/469 [02:30<00:00,  3.12it/s]


Epoch: 2 	Test Accuracy: 88.4
EarlyStopping counter: 1 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 2 	Test Quantized Accuracy: 86.18


loss: 1.2146: 100%|██████████| 469/469 [02:30<00:00,  3.11it/s]


Epoch: 3 	Test Accuracy: 88.34
EarlyStopping counter: 2 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 3 	Test Quantized Accuracy: 87.87


loss: 1.1451: 100%|██████████| 469/469 [02:30<00:00,  3.11it/s]


Epoch: 4 	Test Accuracy: 89.03
Test acc increased (88.670000 --> 89.030000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 4 	Test Quantized Accuracy: 88.14


loss: 1.1446: 100%|██████████| 469/469 [02:31<00:00,  3.10it/s]


Epoch: 5 	Test Accuracy: 89.56
Test acc increased (89.030000 --> 89.560000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 5 	Test Quantized Accuracy: 89.18


loss: 1.1457: 100%|██████████| 469/469 [02:32<00:00,  3.08it/s]


Epoch: 6 	Test Accuracy: 90.3
Test acc increased (89.560000 --> 90.300000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 6 	Test Quantized Accuracy: 89.97


loss: 1.1056: 100%|██████████| 469/469 [02:31<00:00,  3.09it/s]


Epoch: 7 	Test Accuracy: 90.55
Test acc increased (90.300000 --> 90.550000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 7 	Test Quantized Accuracy: 90.35


loss: 1.0908: 100%|██████████| 469/469 [02:30<00:00,  3.12it/s]


Epoch: 8 	Test Accuracy: 90.54
EarlyStopping counter: 1 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 8 	Test Quantized Accuracy: 89.73


loss: 0.9753: 100%|██████████| 469/469 [02:30<00:00,  3.11it/s]


Epoch: 9 	Test Accuracy: 90.8
Test acc increased (90.550000 --> 90.800000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 9 	Test Quantized Accuracy: 90.35


loss: 1.0746: 100%|██████████| 469/469 [02:30<00:00,  3.12it/s]


Epoch: 10 	Test Accuracy: 90.64
EarlyStopping counter: 1 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 10 	Test Quantized Accuracy: 90.2


loss: 1.0933: 100%|██████████| 469/469 [02:26<00:00,  3.20it/s]


Epoch: 11 	Test Accuracy: 90.15
EarlyStopping counter: 2 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 11 	Test Quantized Accuracy: 89.42


loss: 1.1009: 100%|██████████| 469/469 [02:20<00:00,  3.35it/s]


Epoch: 12 	Test Accuracy: 90.42
EarlyStopping counter: 3 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 12 	Test Quantized Accuracy: 89.92


loss: 1.1445: 100%|██████████| 469/469 [02:20<00:00,  3.34it/s]


Epoch: 13 	Test Accuracy: 89.96
EarlyStopping counter: 4 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 13 	Test Quantized Accuracy: 89.48


loss: 1.1570: 100%|██████████| 469/469 [02:19<00:00,  3.37it/s]


Epoch: 14 	Test Accuracy: 89.66
EarlyStopping counter: 5 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 14 	Test Quantized Accuracy: 89.12


loss: 1.1836: 100%|██████████| 469/469 [02:17<00:00,  3.41it/s]


Epoch: 15 	Test Accuracy: 89.8
EarlyStopping counter: 6 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 15 	Test Quantized Accuracy: 88.58


loss: 1.1137: 100%|██████████| 469/469 [02:17<00:00,  3.40it/s]


Epoch: 16 	Test Accuracy: 89.24
EarlyStopping counter: 7 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 16 	Test Quantized Accuracy: 87.53


loss: 1.1437: 100%|██████████| 469/469 [02:19<00:00,  3.36it/s]


Epoch: 17 	Test Accuracy: 90.53
EarlyStopping counter: 8 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 17 	Test Quantized Accuracy: 89.88


loss: 1.1271: 100%|██████████| 469/469 [02:22<00:00,  3.28it/s]


Epoch: 18 	Test Accuracy: 91.27
Test acc increased (90.800000 --> 91.270000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 18 	Test Quantized Accuracy: 88.87


loss: 1.1404: 100%|██████████| 469/469 [02:21<00:00,  3.32it/s]


Epoch: 19 	Test Accuracy: 91.09
EarlyStopping counter: 1 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 19 	Test Quantized Accuracy: 87.09


loss: 1.1291: 100%|██████████| 469/469 [02:21<00:00,  3.31it/s]


Epoch: 20 	Test Accuracy: 90.79
EarlyStopping counter: 2 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 20 	Test Quantized Accuracy: 82.54


loss: 1.0890: 100%|██████████| 469/469 [02:23<00:00,  3.27it/s]


Epoch: 21 	Test Accuracy: 91.0
EarlyStopping counter: 3 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 21 	Test Quantized Accuracy: 68.38


loss: 1.1359: 100%|██████████| 469/469 [02:23<00:00,  3.27it/s]


Epoch: 22 	Test Accuracy: 90.85
EarlyStopping counter: 4 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 22 	Test Quantized Accuracy: 66.73


loss: 1.1031: 100%|██████████| 469/469 [02:24<00:00,  3.25it/s]


Epoch: 23 	Test Accuracy: 90.7
EarlyStopping counter: 5 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 23 	Test Quantized Accuracy: 59.7


loss: 1.1427: 100%|██████████| 469/469 [02:23<00:00,  3.28it/s]


Epoch: 24 	Test Accuracy: 91.27
EarlyStopping counter: 6 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 24 	Test Quantized Accuracy: 52.79


loss: 1.0961: 100%|██████████| 469/469 [02:19<00:00,  3.36it/s]


Epoch: 25 	Test Accuracy: 91.49
Test acc increased (91.270000 --> 91.490000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 25 	Test Quantized Accuracy: 52.18


loss: 1.0844: 100%|██████████| 469/469 [02:18<00:00,  3.38it/s]


Epoch: 26 	Test Accuracy: 91.9
Test acc increased (91.490000 --> 91.900000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 26 	Test Quantized Accuracy: 44.46


loss: 1.0089: 100%|██████████| 469/469 [02:19<00:00,  3.35it/s]


Epoch: 27 	Test Accuracy: 91.94
Test acc increased (91.900000 --> 91.940000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 27 	Test Quantized Accuracy: 44.93


loss: 1.0664: 100%|██████████| 469/469 [02:19<00:00,  3.37it/s]


Epoch: 28 	Test Accuracy: 91.55
EarlyStopping counter: 1 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 28 	Test Quantized Accuracy: 55.25


loss: 0.9506: 100%|██████████| 469/469 [02:20<00:00,  3.33it/s]


Epoch: 29 	Test Accuracy: 91.9
EarlyStopping counter: 2 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 29 	Test Quantized Accuracy: 45.84


loss: 1.0771: 100%|██████████| 469/469 [02:19<00:00,  3.36it/s]


Epoch: 30 	Test Accuracy: 91.48
EarlyStopping counter: 3 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 30 	Test Quantized Accuracy: 60.98


loss: 1.0530: 100%|██████████| 469/469 [02:19<00:00,  3.36it/s]


Epoch: 31 	Test Accuracy: 91.64
EarlyStopping counter: 4 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 31 	Test Quantized Accuracy: 42.29


loss: 1.0921: 100%|██████████| 469/469 [02:19<00:00,  3.36it/s]


Epoch: 32 	Test Accuracy: 91.62
EarlyStopping counter: 5 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 32 	Test Quantized Accuracy: 45.86


loss: 1.1548: 100%|██████████| 469/469 [02:19<00:00,  3.36it/s]


Epoch: 33 	Test Accuracy: 91.1
EarlyStopping counter: 6 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 33 	Test Quantized Accuracy: 38.54


loss: 1.1507: 100%|██████████| 469/469 [02:22<00:00,  3.28it/s]


Epoch: 34 	Test Accuracy: 91.96
Test acc increased (91.940000 --> 91.960000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 34 	Test Quantized Accuracy: 43.58


loss: 1.0757: 100%|██████████| 469/469 [02:20<00:00,  3.33it/s]


Epoch: 35 	Test Accuracy: 91.91
EarlyStopping counter: 1 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 35 	Test Quantized Accuracy: 56.65


loss: 1.1368: 100%|██████████| 469/469 [02:19<00:00,  3.37it/s]


Epoch: 36 	Test Accuracy: 91.77
EarlyStopping counter: 2 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 36 	Test Quantized Accuracy: 30.52


loss: 1.0848: 100%|██████████| 469/469 [02:19<00:00,  3.35it/s]


Epoch: 37 	Test Accuracy: 91.8
EarlyStopping counter: 3 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 37 	Test Quantized Accuracy: 24.69


loss: 1.1265: 100%|██████████| 469/469 [02:20<00:00,  3.34it/s]


Epoch: 38 	Test Accuracy: 91.95
EarlyStopping counter: 4 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 38 	Test Quantized Accuracy: 23.06


loss: 1.1046: 100%|██████████| 469/469 [02:19<00:00,  3.36it/s]


Epoch: 39 	Test Accuracy: 91.97
Test acc increased (91.960000 --> 91.970000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 39 	Test Quantized Accuracy: 38.42


loss: 1.0895: 100%|██████████| 469/469 [02:20<00:00,  3.35it/s]


Epoch: 40 	Test Accuracy: 92.24
Test acc increased (91.970000 --> 92.240000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 40 	Test Quantized Accuracy: 26.13


loss: 1.1452: 100%|██████████| 469/469 [02:19<00:00,  3.37it/s]


Epoch: 41 	Test Accuracy: 91.69
EarlyStopping counter: 1 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 41 	Test Quantized Accuracy: 25.86


loss: 1.1178: 100%|██████████| 469/469 [02:19<00:00,  3.36it/s]


Epoch: 42 	Test Accuracy: 91.78
EarlyStopping counter: 2 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 42 	Test Quantized Accuracy: 18.91


loss: 1.1195: 100%|██████████| 469/469 [02:19<00:00,  3.36it/s]


Epoch: 43 	Test Accuracy: 92.18
EarlyStopping counter: 3 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 43 	Test Quantized Accuracy: 26.46


loss: 1.0643: 100%|██████████| 469/469 [02:19<00:00,  3.36it/s]


Epoch: 44 	Test Accuracy: 92.0
EarlyStopping counter: 4 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 44 	Test Quantized Accuracy: 23.94


loss: 1.1164: 100%|██████████| 469/469 [02:19<00:00,  3.35it/s]


Epoch: 45 	Test Accuracy: 91.9
EarlyStopping counter: 5 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 45 	Test Quantized Accuracy: 26.81


loss: 1.1084: 100%|██████████| 469/469 [02:21<00:00,  3.32it/s]


Epoch: 46 	Test Accuracy: 91.81
EarlyStopping counter: 6 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 46 	Test Quantized Accuracy: 19.72


loss: 1.0956: 100%|██████████| 469/469 [02:19<00:00,  3.36it/s]


Epoch: 47 	Test Accuracy: 92.2
EarlyStopping counter: 7 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 47 	Test Quantized Accuracy: 19.57


loss: 1.0251: 100%|██████████| 469/469 [02:19<00:00,  3.35it/s]


Epoch: 48 	Test Accuracy: 91.68
EarlyStopping counter: 8 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 48 	Test Quantized Accuracy: 18.88


loss: 0.9180: 100%|██████████| 469/469 [02:18<00:00,  3.40it/s]


Epoch: 49 	Test Accuracy: 92.73
Test acc increased (92.240000 --> 92.730000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 49 	Test Quantized Accuracy: 18.35
tensor(1.)
tensor(1.)


loss: 1.2687: 100%|██████████| 469/469 [02:18<00:00,  3.39it/s]


Epoch: 0 	Test Accuracy: 87.33
Test acc increased (0.000000 --> 87.330000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 0 	Test Quantized Accuracy: 87.39


loss: 1.2105: 100%|██████████| 469/469 [02:18<00:00,  3.38it/s]


Epoch: 1 	Test Accuracy: 88.67
Test acc increased (87.330000 --> 88.670000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 1 	Test Quantized Accuracy: 88.57


loss: 1.2066: 100%|██████████| 469/469 [02:18<00:00,  3.38it/s]


Epoch: 2 	Test Accuracy: 88.4
EarlyStopping counter: 1 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 2 	Test Quantized Accuracy: 86.18


loss: 1.2146: 100%|██████████| 469/469 [02:18<00:00,  3.38it/s]


Epoch: 3 	Test Accuracy: 88.34
EarlyStopping counter: 2 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 3 	Test Quantized Accuracy: 87.87


loss: 1.1451: 100%|██████████| 469/469 [02:20<00:00,  3.33it/s]


Epoch: 4 	Test Accuracy: 89.03
Test acc increased (88.670000 --> 89.030000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 4 	Test Quantized Accuracy: 88.14


loss: 1.1446: 100%|██████████| 469/469 [02:22<00:00,  3.29it/s]


Epoch: 5 	Test Accuracy: 89.56
Test acc increased (89.030000 --> 89.560000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 5 	Test Quantized Accuracy: 89.18


loss: 1.1457: 100%|██████████| 469/469 [02:28<00:00,  3.16it/s]


KeyboardInterrupt: ignored

In [16]:
cfg = config.copy()
cfg['model'] = 'NetFC(config,sparsity=[0.25, 0.5])'
#cfg['num_steps'] = 100
cfg['enable_batch_norm'] = True
cfg['enable_bias'] = True
#cfg['print_weights'] = True
run(cfg)

{'model': 'NetFC(config,sparsity=[0.25, 0.5])', 'exp_name': 'mnist_tha', 'num_trials': 5, 'num_epochs': 50, 'binarize': True, 'binarize_input': True, 'post_quantize': True, 'enable_bias': True, 'enable_batch_norm': True, 'enable_dropout': True, 'enable_threshold': True, 'enable_slope': True, 'on_spike_reset_to_zero': False, 'data_dir': '~/data/mnist', 'batch_size': 128, 'seed': 0, 'num_workers': 0, 'save_csv': True, 'save_model': True, 'early_stopping': True, 'patience': 100, 'grad_clip': False, 'weight_clip': False, 'dropout1': 0.02856, 'beta': 0.992187, 'lr': 0.00997, 'slope': 10.22, 'threshold1': 11.666, 'alpha_thr1': 0.024, 'thr_final1': 4.317, 'threshold2': 14.105, 'alpha_thr2': 0.119, 'thr_final2': 16.29, 'threshold3': 0.6656, 'alpha_thr3': 0.0011, 'thr_final3': 3.496, 'num_steps': 100, 'correct_rate': 0.8, 'incorrect_rate': 0.2, 'betas': (0.9, 0.999), 't_0': 4688, 'eta_min': 0, 'df_lr': True, 'print_weights': False}
tensor(0.7486)
tensor(0.4983)
NetFC(
  (bfc1): SparseBinaryLine

loss: 1.5706:  17%|█▋        | 82/469 [00:28<02:12,  2.92it/s]


KeyboardInterrupt: ignored

In [None]:
cfg = config.copy()
cfg['model'] = 'NetFC(config,sparsity=[0.125, 0.5])'
#cfg['num_steps'] = 100
cfg['enable_batch_norm'] = True
cfg['enable_bias'] = True
#cfg['print_weights'] = True
run(cfg)

{'model': 'NetFC(config,sparsity=[0.125, 0.5])', 'exp_name': 'mnist_tha', 'num_trials': 5, 'num_epochs': 50, 'binarize': True, 'binarize_input': True, 'post_quantize': True, 'enable_bias': True, 'enable_batch_norm': True, 'enable_dropout': True, 'enable_threshold': True, 'enable_slope': True, 'on_spike_reset_to_zero': False, 'data_dir': '~/data/mnist', 'batch_size': 128, 'seed': 0, 'num_workers': 0, 'save_csv': True, 'save_model': True, 'early_stopping': True, 'patience': 100, 'grad_clip': False, 'weight_clip': False, 'dropout1': 0.02856, 'beta': 0.992187, 'lr': 0.00997, 'slope': 10.22, 'threshold1': 11.666, 'alpha_thr1': 0.024, 'thr_final1': 4.317, 'threshold2': 14.105, 'alpha_thr2': 0.119, 'thr_final2': 16.29, 'threshold3': 0.6656, 'alpha_thr3': 0.0011, 'thr_final3': 3.496, 'num_steps': 100, 'correct_rate': 0.8, 'incorrect_rate': 0.2, 'betas': (0.9, 0.999), 't_0': 4688, 'eta_min': 0, 'df_lr': True, 'print_weights': False}
tensor(0.8753)
tensor(0.4983)
NetFC(
  (bfc1): SparseBinaryLin

100%|██████████| 9912422/9912422 [00:00<00:00, 95368291.35it/s]


Extracting /root/data/mnist/MNIST/raw/train-images-idx3-ubyte.gz to /root/data/mnist/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to /root/data/mnist/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 27828094.15it/s]

Extracting /root/data/mnist/MNIST/raw/train-labels-idx1-ubyte.gz to /root/data/mnist/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to /root/data/mnist/MNIST/raw/t10k-images-idx3-ubyte.gz



100%|██████████| 1648877/1648877 [00:00<00:00, 25904543.47it/s]


Extracting /root/data/mnist/MNIST/raw/t10k-images-idx3-ubyte.gz to /root/data/mnist/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to /root/data/mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 4156781.32it/s]


Extracting /root/data/mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz to /root/data/mnist/MNIST/raw



loss: 1.4420: 100%|██████████| 469/469 [02:29<00:00,  3.15it/s]


Epoch: 0 	Test Accuracy: 85.52
Test acc increased (0.000000 --> 85.520000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 0 	Test Quantized Accuracy: 85.45


loss: 1.4387: 100%|██████████| 469/469 [02:26<00:00,  3.20it/s]


Epoch: 1 	Test Accuracy: 86.11
Test acc increased (85.520000 --> 86.110000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 1 	Test Quantized Accuracy: 85.83


loss: 1.4139: 100%|██████████| 469/469 [02:26<00:00,  3.20it/s]


Epoch: 2 	Test Accuracy: 86.49
Test acc increased (86.110000 --> 86.490000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=256, bias=False)
qbn BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 2 	Test Quantized Accuracy: 86.24


loss: 1.3275:  37%|███▋      | 175/469 [00:53<01:28,  3.31it/s]

In [17]:
cfg = config.copy()
cfg['model'] = 'NetFC(config, neurons=[128, 128], sparsity=[0.5, 0.5])'
#cfg['num_steps'] = 100
cfg['enable_batch_norm'] = True
cfg['enable_bias'] = True
#cfg['print_weights'] = True
run(cfg)

{'model': 'NetFC(config, neurons=[128, 128], sparsity=[0.5, 0.5])', 'exp_name': 'mnist_tha', 'num_trials': 5, 'num_epochs': 50, 'binarize': True, 'binarize_input': True, 'post_quantize': True, 'enable_bias': True, 'enable_batch_norm': True, 'enable_dropout': True, 'enable_threshold': True, 'enable_slope': True, 'on_spike_reset_to_zero': False, 'data_dir': '~/data/mnist', 'batch_size': 128, 'seed': 0, 'num_workers': 0, 'save_csv': True, 'save_model': True, 'early_stopping': True, 'patience': 100, 'grad_clip': False, 'weight_clip': False, 'dropout1': 0.02856, 'beta': 0.992187, 'lr': 0.00997, 'slope': 10.22, 'threshold1': 11.666, 'alpha_thr1': 0.024, 'thr_final1': 4.317, 'threshold2': 14.105, 'alpha_thr2': 0.119, 'thr_final2': 16.29, 'threshold3': 0.6656, 'alpha_thr3': 0.0011, 'thr_final3': 3.496, 'num_steps': 100, 'correct_rate': 0.8, 'incorrect_rate': 0.2, 'betas': (0.9, 0.999), 't_0': 4688, 'eta_min': 0, 'df_lr': True, 'print_weights': False}
tensor(0.5037)
tensor(0.4942)
NetFC(
  (bfc

loss: 1.7288: 100%|██████████| 469/469 [02:33<00:00,  3.06it/s]


Epoch: 0 	Test Accuracy: 80.93
Test acc increased (0.000000 --> 80.930000).  Saving model ...
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=128, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 0 	Test Quantized Accuracy: 81.11


loss: 1.6848: 100%|██████████| 469/469 [02:29<00:00,  3.13it/s]


Epoch: 1 	Test Accuracy: 80.82
EarlyStopping counter: 1 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=128, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)
Epoch: 1 	Test Quantized Accuracy: 80.31


loss: 1.6855: 100%|██████████| 469/469 [02:31<00:00,  3.10it/s]


Epoch: 2 	Test Accuracy: 80.76
EarlyStopping counter: 2 out of 100
post quantize model
sfc SparseBinaryLinear(in_features=256, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
sfc SparseBinaryLinear(in_features=128, out_features=128, bias=False)
qbn BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
bfc BinaryLinear(in_features=128, out_features=10, bias=True)


KeyboardInterrupt: ignored

In [None]:
# bn2.weight Parameter containing:
# tensor([1.9990, 2.1707, 2.0983, 1.9927, 1.9176, 1.5810, 1.7940, 2.1587, 1.9185,
#         1.7227, 1.9048, 2.1231, 1.8211, 1.9565, 2.4194, 1.8210, 1.8736, 2.0975,
#         2.1799, 2.1952, 1.8520, 1.7353, 1.8138, 2.1197, 1.8280, 1.9596, 2.3288,
#         2.4837, 1.7084, 1.9816, 1.7141, 2.0004, 2.3567, 1.6929, 1.8008, 1.8295,
#         1.8755, 2.2669, 2.1998, 2.1144, 2.2599, 1.7555, 1.9367, 1.7001, 1.7601,
#         2.3178, 2.0300, 2.2487, 1.9230, 1.9055, 2.1833, 2.0571, 1.9264, 1.8677,
#         2.1385, 1.3140, 1.9290, 1.7938, 2.1273, 2.0558, 1.7818, 1.9466, 1.8962,
#         2.0188, 1.9801, 1.6973, 1.8316, 2.1479, 1.7928, 1.6150, 2.1462, 2.0311,
#         2.0386, 2.0987, 2.0303, 1.9735, 1.8485, 1.7809, 2.1801, 1.8313, 1.8320,
#         1.8230, 2.2195, 2.3736, 2.1923, 2.2304, 2.1159, 1.7465, 2.1706, 1.8011,
#         1.6034, 1.7145, 2.1939, 1.9043, 2.1121, 1.8974, 1.9840, 1.7288, 1.7071,
#         2.1992, 2.0279, 2.1737, 1.8198, 2.2155, 1.6175, 2.3012, 1.8829, 2.0970,
#         2.0961, 2.3487, 1.7499, 1.8511, 2.1443, 1.9805, 1.5925, 1.9896, 1.9556,
#         2.1271, 2.0737, 2.2048, 2.0302, 1.5531, 2.0186, 1.8186, 1.9623, 2.0262,
#         1.6641, 1.9470], device='cuda:0', requires_grad=True)
# bn2.bias Parameter containing:
# tensor([0.6498, 0.3817, 0.2181, 0.2873, 0.4080, 0.1720, 0.0317, 0.5161, 0.5260,
#         0.2901, 0.3405, 0.4849, 0.6289, 0.3044, 0.6084, 0.3359, 0.3509, 0.4086,
#         0.3281, 0.4347, 0.2080, 0.2665, 0.3921, 0.4583, 0.2833, 0.4716, 0.1986,
#         0.8519, 0.2374, 0.4891, 0.3759, 0.5366, 0.2983, 0.1438, 0.4705, 0.2080,
#         0.3375, 0.7551, 0.6978, 0.4066, 0.5279, 0.5771, 0.1845, 0.3678, 0.3259,
#         0.3678, 0.5667, 0.4188, 0.3600, 0.2092, 0.1839, 0.3777, 0.4884, 0.3750,
#         0.2550, 0.1124, 0.5827, 0.3344, 0.5152, 0.2674, 0.3826, 0.2437, 0.2454,
#         0.4080, 0.5506, 0.3877, 0.4804, 0.3189, 0.5685, 0.2249, 0.4660, 0.2988,
#         0.6129, 0.5870, 0.2916, 0.6732, 0.4578, 0.6597, 0.3098, 0.5236, 0.3821,
#         0.4044, 0.2389, 0.5632, 0.5674, 0.6477, 0.6750, 0.2220, 0.5916, 0.5613,
#         0.2781, 0.2813, 0.4886, 0.3720, 0.4638, 0.3268, 0.4190, 0.2757, 0.5542,
#         0.4846, 0.2732, 0.6278, 0.3644, 0.5544, 0.3397, 0.5387, 0.1307, 0.4701,
#         0.3537, 0.6017, 0.6172, 0.4602, 0.5736, 0.2729, 0.3987, 0.1734, 0.3378,
#         0.4532, 0.4848, 0.6180, 0.3999, 0.2453, 0.4765, 0.2346, 0.3375, 0.2310,
#         0.3223, 0.2671], device='cuda:0', requires_grad=True)

 #after some training
# bn2.weight Parameter containing:
# tensor([4.9099, 5.0962, 5.5888, 4.3900, 4.5788, 4.9550, 4.9641, 5.0355, 4.5730,
#         4.9639, 4.4276, 5.3666, 5.2169, 5.1509, 4.9650, 5.1409, 4.9874, 5.3097,
#         4.7464, 5.5738, 5.2571, 4.5298, 4.9318, 5.7625, 4.4019, 5.1249, 5.2300,
#         5.1870, 4.4773, 4.3227, 4.5650, 5.7379, 6.0565, 5.0293, 4.8255, 4.7608,
#         4.9786, 5.3047, 4.7430, 5.6802, 5.2579, 4.1642, 5.4533, 4.4647, 4.4885,
#         5.7688, 5.6993, 5.0135, 4.8656, 5.1775, 5.5288, 4.8154, 4.0566, 4.9560,
#         4.9754, 4.7329, 4.7370, 4.6920, 4.9248, 4.5154, 5.0307, 4.6023, 5.2160,
#         4.7534, 4.2167, 4.3286, 4.5850, 6.0492, 4.4160, 4.6090, 4.8223, 5.0231,
#         4.9384, 5.3263, 5.1866, 4.2420, 4.2443, 4.4652, 5.2192, 4.4080, 4.8573,
#         4.8766, 6.3240, 5.5299, 5.2524, 5.3087, 4.4232, 5.0209, 5.1746, 4.4969,
#         4.3094, 4.1203, 5.6062, 5.0312, 4.7815, 4.6387, 5.1705, 4.7637, 4.6557,
#         5.3890, 5.1286, 4.8427, 4.1010, 5.2434, 4.7017, 5.3186, 5.0355, 4.7969,
#         5.3591, 5.7636, 4.5301, 4.6631, 5.0067, 5.4933, 4.2991, 4.8119, 4.8928,
#         5.4807, 5.7014, 4.8133, 5.0600, 4.0004, 4.8092, 4.6587, 5.4306, 5.6190,
#         4.8491, 5.6172], device='cuda:0', requires_grad=True)
# bn2.bias Parameter containing:
# tensor([ 0.9776,  0.0102, -0.3288, -0.1998,  0.1523,  0.0830,  0.0378,  0.4451,
#          0.4870, -0.1236,  0.0895,  0.3199,  1.1172, -0.0968,  0.3388,  0.6111,
#          0.3126,  0.1159, -0.2493, -0.0373, -0.1575,  0.0478,  0.3408,  0.2455,
#          0.3665,  0.2020, -0.6231,  0.5952, -0.1245,  0.2104,  0.0185,  0.6761,
#         -0.2078,  0.2258,  0.4542, -0.1995,  0.4682,  0.8909,  0.6613,  0.4796,
#          0.2698,  0.5167, -0.0313,  0.7828,  0.2144, -0.0322,  0.9767,  0.2942,
#          0.1549, -0.2499, -0.2597,  0.0301,  0.5320,  0.2145,  0.0200,  0.0041,
#          1.0410, -0.0218,  0.5749, -0.3903,  0.5178, -0.0901,  0.4260, -0.1015,
#          0.2714,  0.6575,  0.5759,  0.4551,  0.4005, -0.0026,  0.3148,  0.1734,
#          0.5360,  0.0206, -0.2983,  0.6451,  0.5158,  1.1592, -0.0688,  0.5039,
#          0.3904, -0.0353, -0.0839,  0.2897,  0.3479,  0.5520,  0.1291,  0.0839,
#          0.1894,  0.1085,  0.3328, -0.0146,  0.3279,  0.0778,  0.2786,  0.1371,
#          0.4661, -0.0742,  1.1181,  0.6645, -0.1441,  0.3702,  0.2503,  0.2494,
#          0.4468,  0.2508, -0.2617, -0.0379,  0.2208,  0.4755,  0.5585,  0.5917,
#          0.5314,  0.2416,  0.3132, -0.1299,  0.1267,  0.3698,  0.6280,  0.7991,
#         -0.1371,  0.1800,  0.2070, -0.0646,  0.2283,  0.1236,  0.2599,  0.3201],
#        device='cuda:0', requires_grad=True)

In [47]:
cfg = config.copy()
cfg['model'] = 'NetFC'
cfg['num_steps'] = 10
cfg['enable_batch_norm'] = False
cfg['enable_bias'] = False
run(cfg)

{'model': 'NetFC', 'exp_name': 'mnist_tha', 'num_trials': 5, 'num_epochs': 10, 'binarize': True, 'binarize_input': True, 'enable_bias': False, 'enable_batchnorm': True, 'enable_dropout': True, 'enable_threshold': True, 'enable_slope': True, 'data_dir': '~/data/mnist', 'batch_size': 128, 'seed': 0, 'num_workers': 0, 'save_csv': True, 'save_model': True, 'early_stopping': True, 'patience': 100, 'grad_clip': False, 'weight_clip': False, 'batch_norm': False, 'dropout1': 0.02856, 'beta': 0.99, 'lr': 0.00997, 'slope': 10.22, 'threshold1': 11.666, 'alpha_thr1': 0.024, 'thr_final1': 4.317, 'threshold2': 14.105, 'alpha_thr2': 0.119, 'thr_final2': 16.29, 'threshold3': 0.6656, 'alpha_thr3': 0.0011, 'thr_final3': 3.496, 'num_steps': 10, 'correct_rate': 0.8, 'incorrect_rate': 0.2, 'betas': (0.9, 0.999), 't_0': 4688, 'eta_min': 0, 'df_lr': True}
tensor(1.)
tensor(1.)
NetFC(
  (bfc1): SparseBinaryLinear(in_features=256, out_features=256, bias=False)
  (fc1): Linear(in_features=256, out_features=256, 

loss: 0.5350: 100%|██████████| 469/469 [00:31<00:00, 14.96it/s]


Epoch: 0 	Test Accuracy: 63.7
Test acc increased (0.000000 --> 63.700000).  Saving model ...


loss: 0.2998: 100%|██████████| 469/469 [00:31<00:00, 14.86it/s]


Epoch: 1 	Test Accuracy: 59.04
EarlyStopping counter: 1 out of 100


loss: 0.3431: 100%|██████████| 469/469 [00:32<00:00, 14.45it/s]


Epoch: 2 	Test Accuracy: 63.63
EarlyStopping counter: 2 out of 100


loss: 0.3111:   4%|▍         | 18/469 [00:01<00:32, 13.88it/s]


KeyboardInterrupt: ignored

In [62]:
cfg = config.copy()
cfg['model'] = 'NetFC'
cfg['num_steps'] = 10
cfg['enable_batch_norm'] = False
cfg['enable_bias'] = True
run(cfg)

{'model': 'NetFC', 'exp_name': 'mnist_tha', 'num_trials': 5, 'num_epochs': 10, 'binarize': True, 'binarize_input': True, 'enable_bias': True, 'enable_batchnorm': True, 'enable_dropout': True, 'enable_threshold': True, 'enable_slope': True, 'data_dir': '~/data/mnist', 'batch_size': 128, 'seed': 0, 'num_workers': 0, 'save_csv': True, 'save_model': True, 'early_stopping': True, 'patience': 100, 'grad_clip': False, 'weight_clip': False, 'dropout1': 0.02856, 'beta': 0.99, 'lr': 0.00997, 'slope': 10.22, 'threshold1': 11.666, 'alpha_thr1': 0.024, 'thr_final1': 4.317, 'threshold2': 14.105, 'alpha_thr2': 0.119, 'thr_final2': 16.29, 'threshold3': 0.6656, 'alpha_thr3': 0.0011, 'thr_final3': 3.496, 'num_steps': 10, 'correct_rate': 0.8, 'incorrect_rate': 0.2, 'betas': (0.9, 0.999), 't_0': 4688, 'eta_min': 0, 'df_lr': True, 'enable_batch_norm': False}
tensor(1.)
tensor(1.)
NetFC(
  (bfc1): SparseBinaryLinear(in_features=256, out_features=256, bias=True)
  (fc1): Linear(in_features=256, out_features=

loss: 0.2818: 100%|██████████| 469/469 [00:31<00:00, 14.69it/s]


Epoch: 0 	Test Accuracy: 56.2
Test acc increased (0.000000 --> 56.200000).  Saving model ...


loss: 0.2309: 100%|██████████| 469/469 [00:32<00:00, 14.60it/s]


Epoch: 1 	Test Accuracy: 68.83
Test acc increased (56.200000 --> 68.830000).  Saving model ...


loss: 0.2095: 100%|██████████| 469/469 [00:33<00:00, 14.07it/s]


Epoch: 2 	Test Accuracy: 72.61
Test acc increased (68.830000 --> 72.610000).  Saving model ...


loss: 0.1916: 100%|██████████| 469/469 [00:32<00:00, 14.49it/s]


Epoch: 3 	Test Accuracy: 74.09
Test acc increased (72.610000 --> 74.090000).  Saving model ...


loss: 0.1828: 100%|██████████| 469/469 [00:32<00:00, 14.47it/s]


Epoch: 4 	Test Accuracy: 76.61
Test acc increased (74.090000 --> 76.610000).  Saving model ...


loss: 0.1858: 100%|██████████| 469/469 [00:32<00:00, 14.42it/s]


Epoch: 5 	Test Accuracy: 80.09
Test acc increased (76.610000 --> 80.090000).  Saving model ...


loss: 0.1777: 100%|██████████| 469/469 [00:32<00:00, 14.32it/s]


Epoch: 6 	Test Accuracy: 81.8
Test acc increased (80.090000 --> 81.800000).  Saving model ...


loss: 0.1673: 100%|██████████| 469/469 [00:32<00:00, 14.65it/s]


Epoch: 7 	Test Accuracy: 82.58
Test acc increased (81.800000 --> 82.580000).  Saving model ...


loss: 0.1676: 100%|██████████| 469/469 [00:31<00:00, 14.99it/s]


Epoch: 8 	Test Accuracy: 83.35
Test acc increased (82.580000 --> 83.350000).  Saving model ...


loss: 0.1630: 100%|██████████| 469/469 [00:31<00:00, 14.87it/s]


Epoch: 9 	Test Accuracy: 83.68
Test acc increased (83.350000 --> 83.680000).  Saving model ...
tensor(1.)
tensor(1.)


loss: 0.2818: 100%|██████████| 469/469 [00:31<00:00, 14.94it/s]


Epoch: 0 	Test Accuracy: 56.2
Test acc increased (0.000000 --> 56.200000).  Saving model ...


loss: 0.2309: 100%|██████████| 469/469 [00:31<00:00, 14.85it/s]


Epoch: 1 	Test Accuracy: 68.83
Test acc increased (56.200000 --> 68.830000).  Saving model ...


loss: 0.2095: 100%|██████████| 469/469 [00:32<00:00, 14.59it/s]


Epoch: 2 	Test Accuracy: 72.61
Test acc increased (68.830000 --> 72.610000).  Saving model ...


loss: 0.1916: 100%|██████████| 469/469 [00:32<00:00, 14.43it/s]


Epoch: 3 	Test Accuracy: 74.09
Test acc increased (72.610000 --> 74.090000).  Saving model ...


loss: 0.1828: 100%|██████████| 469/469 [00:32<00:00, 14.25it/s]


Epoch: 4 	Test Accuracy: 76.61
Test acc increased (74.090000 --> 76.610000).  Saving model ...


loss: 0.1858: 100%|██████████| 469/469 [00:32<00:00, 14.37it/s]


Epoch: 5 	Test Accuracy: 80.09
Test acc increased (76.610000 --> 80.090000).  Saving model ...


loss: 0.1777: 100%|██████████| 469/469 [00:32<00:00, 14.59it/s]


Epoch: 6 	Test Accuracy: 81.8
Test acc increased (80.090000 --> 81.800000).  Saving model ...


loss: 0.1673: 100%|██████████| 469/469 [00:31<00:00, 14.78it/s]


Epoch: 7 	Test Accuracy: 82.58
Test acc increased (81.800000 --> 82.580000).  Saving model ...


loss: 0.1676: 100%|██████████| 469/469 [00:31<00:00, 14.73it/s]


Epoch: 8 	Test Accuracy: 83.35
Test acc increased (82.580000 --> 83.350000).  Saving model ...


loss: 0.1630: 100%|██████████| 469/469 [00:31<00:00, 14.76it/s]


Epoch: 9 	Test Accuracy: 83.68
Test acc increased (83.350000 --> 83.680000).  Saving model ...
tensor(1.)
tensor(1.)


loss: 0.2818: 100%|██████████| 469/469 [00:31<00:00, 14.68it/s]


Epoch: 0 	Test Accuracy: 56.2
Test acc increased (0.000000 --> 56.200000).  Saving model ...


loss: 0.2309: 100%|██████████| 469/469 [00:31<00:00, 14.87it/s]


Epoch: 1 	Test Accuracy: 68.83
Test acc increased (56.200000 --> 68.830000).  Saving model ...


loss: 0.2095: 100%|██████████| 469/469 [00:31<00:00, 14.96it/s]


Epoch: 2 	Test Accuracy: 72.61
Test acc increased (68.830000 --> 72.610000).  Saving model ...


loss: 0.1916: 100%|██████████| 469/469 [00:31<00:00, 15.02it/s]


Epoch: 3 	Test Accuracy: 74.09
Test acc increased (72.610000 --> 74.090000).  Saving model ...


loss: 0.1828: 100%|██████████| 469/469 [00:31<00:00, 15.11it/s]


Epoch: 4 	Test Accuracy: 76.61
Test acc increased (74.090000 --> 76.610000).  Saving model ...


loss: 0.1858: 100%|██████████| 469/469 [00:30<00:00, 15.21it/s]


Epoch: 5 	Test Accuracy: 80.09
Test acc increased (76.610000 --> 80.090000).  Saving model ...


loss: 0.1777: 100%|██████████| 469/469 [00:31<00:00, 15.04it/s]


Epoch: 6 	Test Accuracy: 81.8
Test acc increased (80.090000 --> 81.800000).  Saving model ...


loss: 0.1673: 100%|██████████| 469/469 [00:31<00:00, 14.99it/s]


Epoch: 7 	Test Accuracy: 82.58
Test acc increased (81.800000 --> 82.580000).  Saving model ...


loss: 0.1676: 100%|██████████| 469/469 [00:30<00:00, 15.23it/s]


Epoch: 8 	Test Accuracy: 83.35
Test acc increased (82.580000 --> 83.350000).  Saving model ...


loss: 0.1630: 100%|██████████| 469/469 [00:30<00:00, 15.19it/s]


Epoch: 9 	Test Accuracy: 83.68
Test acc increased (83.350000 --> 83.680000).  Saving model ...
tensor(1.)
tensor(1.)


loss: 0.2818: 100%|██████████| 469/469 [00:30<00:00, 15.27it/s]


Epoch: 0 	Test Accuracy: 56.2
Test acc increased (0.000000 --> 56.200000).  Saving model ...


loss: 0.2309: 100%|██████████| 469/469 [00:30<00:00, 15.23it/s]


Epoch: 1 	Test Accuracy: 68.83
Test acc increased (56.200000 --> 68.830000).  Saving model ...


loss: 0.2095: 100%|██████████| 469/469 [00:30<00:00, 15.17it/s]


Epoch: 2 	Test Accuracy: 72.61
Test acc increased (68.830000 --> 72.610000).  Saving model ...


loss: 0.1916: 100%|██████████| 469/469 [00:30<00:00, 15.24it/s]


Epoch: 3 	Test Accuracy: 74.09
Test acc increased (72.610000 --> 74.090000).  Saving model ...


loss: 0.1828: 100%|██████████| 469/469 [00:31<00:00, 15.12it/s]


Epoch: 4 	Test Accuracy: 76.61
Test acc increased (74.090000 --> 76.610000).  Saving model ...


loss: 0.1858: 100%|██████████| 469/469 [00:30<00:00, 15.29it/s]


Epoch: 5 	Test Accuracy: 80.09
Test acc increased (76.610000 --> 80.090000).  Saving model ...


loss: 0.1777: 100%|██████████| 469/469 [00:31<00:00, 15.12it/s]


Epoch: 6 	Test Accuracy: 81.8
Test acc increased (80.090000 --> 81.800000).  Saving model ...


loss: 0.1673: 100%|██████████| 469/469 [00:31<00:00, 14.87it/s]


Epoch: 7 	Test Accuracy: 82.58
Test acc increased (81.800000 --> 82.580000).  Saving model ...


loss: 0.1676: 100%|██████████| 469/469 [00:31<00:00, 14.69it/s]


Epoch: 8 	Test Accuracy: 83.35
Test acc increased (82.580000 --> 83.350000).  Saving model ...


loss: 0.1630: 100%|██████████| 469/469 [00:31<00:00, 14.77it/s]


KeyboardInterrupt: ignored

In [49]:
cfg = config.copy()
cfg['model'] = 'NetFC'
cfg['num_steps'] = 10
cfg['enable_batch_norm'] = True
cfg['enable_bias'] = False
run(cfg)

{'model': 'NetFC', 'exp_name': 'mnist_tha', 'num_trials': 5, 'num_epochs': 10, 'binarize': True, 'binarize_input': True, 'enable_bias': False, 'enable_batchnorm': True, 'enable_dropout': True, 'enable_threshold': True, 'enable_slope': True, 'data_dir': '~/data/mnist', 'batch_size': 128, 'seed': 0, 'num_workers': 0, 'save_csv': True, 'save_model': True, 'early_stopping': True, 'patience': 100, 'grad_clip': False, 'weight_clip': False, 'batch_norm': True, 'dropout1': 0.02856, 'beta': 0.99, 'lr': 0.00997, 'slope': 10.22, 'threshold1': 11.666, 'alpha_thr1': 0.024, 'thr_final1': 4.317, 'threshold2': 14.105, 'alpha_thr2': 0.119, 'thr_final2': 16.29, 'threshold3': 0.6656, 'alpha_thr3': 0.0011, 'thr_final3': 3.496, 'num_steps': 10, 'correct_rate': 0.8, 'incorrect_rate': 0.2, 'betas': (0.9, 0.999), 't_0': 4688, 'eta_min': 0, 'df_lr': True, 'enable_batch_norm': True}
tensor(1.)
tensor(1.)
NetFC(
  (bfc1): SparseBinaryLinear(in_features=256, out_features=256, bias=False)
  (fc1): Linear(in_featur

loss: 0.4230: 100%|██████████| 469/469 [00:32<00:00, 14.61it/s]


Epoch: 0 	Test Accuracy: 11.37
Test acc increased (0.000000 --> 11.370000).  Saving model ...


loss: 0.3242: 100%|██████████| 469/469 [00:31<00:00, 14.68it/s]


Epoch: 1 	Test Accuracy: 20.87
Test acc increased (11.370000 --> 20.870000).  Saving model ...


loss: 0.3067: 100%|██████████| 469/469 [00:32<00:00, 14.65it/s]


Epoch: 2 	Test Accuracy: 34.23
Test acc increased (20.870000 --> 34.230000).  Saving model ...


loss: 0.2928: 100%|██████████| 469/469 [00:31<00:00, 14.88it/s]


Epoch: 3 	Test Accuracy: 49.65
Test acc increased (34.230000 --> 49.650000).  Saving model ...


loss: 0.2761: 100%|██████████| 469/469 [00:31<00:00, 15.09it/s]


Epoch: 4 	Test Accuracy: 47.5
EarlyStopping counter: 1 out of 100


loss: 0.2742: 100%|██████████| 469/469 [00:31<00:00, 14.94it/s]


Epoch: 5 	Test Accuracy: 45.11
EarlyStopping counter: 2 out of 100


loss: 0.2703: 100%|██████████| 469/469 [00:30<00:00, 15.23it/s]


Epoch: 6 	Test Accuracy: 48.17
EarlyStopping counter: 3 out of 100


loss: 0.2730: 100%|██████████| 469/469 [00:31<00:00, 15.08it/s]


Epoch: 7 	Test Accuracy: 52.54
Test acc increased (49.650000 --> 52.540000).  Saving model ...


loss: 0.2699: 100%|██████████| 469/469 [00:30<00:00, 15.20it/s]


Epoch: 8 	Test Accuracy: 51.32
EarlyStopping counter: 1 out of 100


loss: 0.2684: 100%|██████████| 469/469 [00:31<00:00, 15.11it/s]


Epoch: 9 	Test Accuracy: 52.0
EarlyStopping counter: 2 out of 100
tensor(1.)
tensor(1.)


loss: 0.4230: 100%|██████████| 469/469 [00:31<00:00, 15.06it/s]


Epoch: 0 	Test Accuracy: 11.37
Test acc increased (0.000000 --> 11.370000).  Saving model ...


loss: 0.3242: 100%|██████████| 469/469 [00:31<00:00, 15.07it/s]


Epoch: 1 	Test Accuracy: 20.87
Test acc increased (11.370000 --> 20.870000).  Saving model ...


loss: 0.3067: 100%|██████████| 469/469 [00:31<00:00, 15.03it/s]


Epoch: 2 	Test Accuracy: 34.23
Test acc increased (20.870000 --> 34.230000).  Saving model ...


loss: 0.2928: 100%|██████████| 469/469 [00:30<00:00, 15.18it/s]


Epoch: 3 	Test Accuracy: 49.65
Test acc increased (34.230000 --> 49.650000).  Saving model ...


loss: 0.2761: 100%|██████████| 469/469 [00:31<00:00, 15.09it/s]


Epoch: 4 	Test Accuracy: 47.5
EarlyStopping counter: 1 out of 100


loss: 0.2742: 100%|██████████| 469/469 [00:31<00:00, 14.95it/s]


Epoch: 5 	Test Accuracy: 45.11
EarlyStopping counter: 2 out of 100


loss: 0.2703: 100%|██████████| 469/469 [00:30<00:00, 15.17it/s]


Epoch: 6 	Test Accuracy: 48.17
EarlyStopping counter: 3 out of 100


loss: 0.2730: 100%|██████████| 469/469 [00:30<00:00, 15.14it/s]


Epoch: 7 	Test Accuracy: 52.54
Test acc increased (49.650000 --> 52.540000).  Saving model ...


loss: 0.2699: 100%|██████████| 469/469 [00:30<00:00, 15.17it/s]


Epoch: 8 	Test Accuracy: 51.32
EarlyStopping counter: 1 out of 100


loss: 0.2684: 100%|██████████| 469/469 [00:30<00:00, 15.14it/s]


Epoch: 9 	Test Accuracy: 52.0
EarlyStopping counter: 2 out of 100
tensor(1.)
tensor(1.)


loss: 0.4230: 100%|██████████| 469/469 [00:31<00:00, 14.83it/s]


Epoch: 0 	Test Accuracy: 11.37
Test acc increased (0.000000 --> 11.370000).  Saving model ...


loss: 0.3482:  54%|█████▍    | 254/469 [00:17<00:14, 14.50it/s]


KeyboardInterrupt: ignored

In [13]:
#binarized inputs
cfg = config
cfg['model'] = 'NetFC'
run(cfg)

{'model': 'NetFC', 'exp_name': 'mnist_tha', 'num_trials': 5, 'num_epochs': 500, 'binarize': True, 'data_dir': '~/data/mnist', 'batch_size': 128, 'seed': 0, 'num_workers': 0, 'save_csv': True, 'save_model': True, 'early_stopping': True, 'patience': 100, 'grad_clip': False, 'weight_clip': False, 'batch_norm': True, 'dropout1': 0.02856, 'beta': 0.99, 'lr': 0.00997, 'slope': 10.22, 'threshold1': 11.666, 'alpha_thr1': 0.024, 'thr_final1': 4.317, 'threshold2': 14.105, 'alpha_thr2': 0.119, 'thr_final2': 16.29, 'threshold3': 0.6656, 'alpha_thr3': 0.0011, 'thr_final3': 3.496, 'num_steps': 100, 'correct_rate': 0.8, 'incorrect_rate': 0.2, 'betas': (0.9, 0.999), 't_0': 4688, 'eta_min': 0, 'df_lr': True}
NetFC(
  (bfc1): SparseBinaryLinear(in_features=256, out_features=256, bias=False)
  (fc1): Linear(in_features=256, out_features=256, bias=False)
  (bn1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (lif1): Leaky()
  (bfc2): SparseBinaryLinear(in_features=256,

loss: 0.8944: 100%|██████████| 469/469 [02:55<00:00,  2.67it/s]


Epoch: 0 	Test Accuracy: 91.56
Test acc increased (0.000000 --> 91.560000).  Saving model ...


loss: 0.8133: 100%|██████████| 469/469 [02:55<00:00,  2.67it/s]


Epoch: 1 	Test Accuracy: 93.5
Test acc increased (91.560000 --> 93.500000).  Saving model ...


loss: 0.7441:  27%|██▋       | 125/469 [00:46<02:08,  2.69it/s]


KeyboardInterrupt: ignored

In [14]:
cfg = config
cfg['model'] = 'NetFC(config, sparsity=[0.5, 0.5])'
run(cfg)

{'model': 'NetFC(config, sparsity=0.5)', 'exp_name': 'mnist_tha', 'num_trials': 5, 'num_epochs': 500, 'binarize': True, 'data_dir': '~/data/mnist', 'batch_size': 128, 'seed': 0, 'num_workers': 0, 'save_csv': True, 'save_model': True, 'early_stopping': True, 'patience': 100, 'grad_clip': False, 'weight_clip': False, 'batch_norm': True, 'dropout1': 0.02856, 'beta': 0.99, 'lr': 0.00997, 'slope': 10.22, 'threshold1': 11.666, 'alpha_thr1': 0.024, 'thr_final1': 15.983, 'threshold2': 14.105, 'alpha_thr2': 0.119, 'thr_final2': 30.395, 'threshold3': 0.6656, 'alpha_thr3': 0.0011, 'thr_final3': 4.1616, 'num_steps': 100, 'correct_rate': 0.8, 'incorrect_rate': 0.2, 'betas': (0.9, 0.999), 't_0': 4688, 'eta_min': 0, 'df_lr': True, 'dataset_length': 60000}
NetFC(
  (bfc1): SparseBinaryLinear(in_features=256, out_features=256, bias=False)
  (fc1): Linear(in_features=256, out_features=256, bias=False)
  (bn1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (lif1): Lea

loss: 1.0576: 100%|██████████| 469/469 [02:57<00:00,  2.64it/s]


Epoch: 0 	Test Accuracy: 89.58
Test acc increased (0.000000 --> 89.580000).  Saving model ...


loss: 0.9438: 100%|██████████| 469/469 [02:57<00:00,  2.64it/s]


Epoch: 1 	Test Accuracy: 90.51
Test acc increased (89.580000 --> 90.510000).  Saving model ...


loss: 0.9071: 100%|██████████| 469/469 [02:57<00:00,  2.64it/s]


Epoch: 2 	Test Accuracy: 91.0
Test acc increased (90.510000 --> 91.000000).  Saving model ...


loss: 0.8651: 100%|██████████| 469/469 [02:57<00:00,  2.64it/s]


Epoch: 3 	Test Accuracy: 91.85
Test acc increased (91.000000 --> 91.850000).  Saving model ...


loss: 0.8304: 100%|██████████| 469/469 [02:53<00:00,  2.71it/s]


Epoch: 4 	Test Accuracy: 92.0
Test acc increased (91.850000 --> 92.000000).  Saving model ...


loss: 0.8051: 100%|██████████| 469/469 [02:52<00:00,  2.72it/s]


Epoch: 5 	Test Accuracy: 92.3
Test acc increased (92.000000 --> 92.300000).  Saving model ...


loss: 0.7615: 100%|██████████| 469/469 [02:50<00:00,  2.74it/s]


Epoch: 6 	Test Accuracy: 92.17
EarlyStopping counter: 1 out of 100


loss: 0.7586: 100%|██████████| 469/469 [02:49<00:00,  2.77it/s]


Epoch: 7 	Test Accuracy: 92.73
Test acc increased (92.300000 --> 92.730000).  Saving model ...


loss: 0.7067: 100%|██████████| 469/469 [02:47<00:00,  2.80it/s]


Epoch: 8 	Test Accuracy: 92.95
Test acc increased (92.730000 --> 92.950000).  Saving model ...


loss: 0.6712: 100%|██████████| 469/469 [02:44<00:00,  2.85it/s]


Epoch: 9 	Test Accuracy: 92.91
EarlyStopping counter: 1 out of 100


loss: 0.7154:  30%|███       | 141/469 [00:50<01:58,  2.78it/s]


KeyboardInterrupt: ignored

In [None]:
cfg = config
cfg['model'] = 'NetFC(config, sparsity=[0.9, 0.9])'
run(cfg)

{'model': 'NetFC(config, sparsity=0.9)', 'exp_name': 'mnist_tha', 'num_trials': 5, 'num_epochs': 10, 'binarize': True, 'binarize_input': True, 'data_dir': '~/data/mnist', 'batch_size': 128, 'seed': 0, 'num_workers': 0, 'save_csv': True, 'save_model': True, 'early_stopping': True, 'patience': 100, 'grad_clip': False, 'weight_clip': False, 'batch_norm': True, 'dropout1': 0.02856, 'beta': 0.99, 'lr': 0.00997, 'slope': 10.22, 'threshold1': 11.666, 'alpha_thr1': 0.024, 'thr_final1': 4.317, 'threshold2': 14.105, 'alpha_thr2': 0.119, 'thr_final2': 16.29, 'threshold3': 0.6656, 'alpha_thr3': 0.0011, 'thr_final3': 3.496, 'num_steps': 100, 'correct_rate': 0.8, 'incorrect_rate': 0.2, 'betas': (0.9, 0.999), 't_0': 4688, 'eta_min': 0, 'df_lr': True}
tensor(0.1001)
tensor(0.1024)
NetFC(
  (bfc1): SparseBinaryLinear(in_features=256, out_features=256, bias=False)
  (fc1): Linear(in_features=256, out_features=256, bias=False)
  (bn1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_

loss: 1.6109: 100%|██████████| 469/469 [02:47<00:00,  2.80it/s]


Epoch: 0 	Test Accuracy: 85.25
Test acc increased (0.000000 --> 85.250000).  Saving model ...


loss: 1.4953: 100%|██████████| 469/469 [02:46<00:00,  2.82it/s]


Epoch: 1 	Test Accuracy: 84.37
EarlyStopping counter: 1 out of 100


loss: 1.4063: 100%|██████████| 469/469 [02:48<00:00,  2.79it/s]


Epoch: 2 	Test Accuracy: 85.62
Test acc increased (85.250000 --> 85.620000).  Saving model ...


loss: 1.3945: 100%|██████████| 469/469 [02:48<00:00,  2.78it/s]


Epoch: 3 	Test Accuracy: 86.7
Test acc increased (85.620000 --> 86.700000).  Saving model ...


loss: 1.4162: 100%|██████████| 469/469 [02:46<00:00,  2.82it/s]


Epoch: 4 	Test Accuracy: 87.12
Test acc increased (86.700000 --> 87.120000).  Saving model ...


loss: 1.3976: 100%|██████████| 469/469 [02:46<00:00,  2.82it/s]


Epoch: 5 	Test Accuracy: 87.33
Test acc increased (87.120000 --> 87.330000).  Saving model ...


loss: 1.2937: 100%|██████████| 469/469 [02:48<00:00,  2.79it/s]


Epoch: 6 	Test Accuracy: 87.65
Test acc increased (87.330000 --> 87.650000).  Saving model ...


loss: 1.3561:  95%|█████████▍| 445/469 [02:38<00:08,  2.76it/s]

In [None]:
cfg = config
cfg['model'] = 'NetFC(config, [128, 128])'
run(cfg)