In [1]:
from __future__ import print_function
import numpy as np
import argparse
import torch
import torch.nn.functional as F
from optimizer import PruneAdam
from model import LeNet, AlexNet
from utils import regularized_nll_loss, admm_loss, \
    initialize_Z_and_U, update_X, update_Z, update_Z_l1, update_U, \
    print_convergence, print_prune, apply_prune, apply_l1_prune
from torchvision import datasets, transforms, models
from tqdm import tqdm
from Fed import FedAvg
import copy

from torch.utils.tensorboard import SummaryWriter
%load_ext tensorboard

In [2]:
# Training settings
parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
parser.add_argument('--dataset', type=str, default="cifar10", choices=["mnist", "cifar10"],
                    metavar='D', help='training dataset (mnist or cifar10)')
parser.add_argument('--batch-size', type=int, default=64, metavar='N',
                    help='input batch size for training (default: 64)')
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
                    help='input batch size for testing (default: 1000)')
parser.add_argument('--percent', type=list, default=[0.8, 0.92, 0.991, 0.93],
                    metavar='P', help='pruning percentage (default: 0.8)')
parser.add_argument('--alpha', type=float, default=5e-4, metavar='L',
                    help='l2 norm weight (default: 5e-4)')
parser.add_argument('--rho', type=float, default=1e-2, metavar='R',
                    help='cardinality weight (default: 1e-2)')
parser.add_argument('--l1', default=False, action='store_true',
                    help='prune weights with l1 regularization instead of cardinality')
parser.add_argument('--l2', default=False, action='store_true',
                    help='apply l2 regularization')
parser.add_argument('--num_pre_epochs', type=int, default=3, metavar='P',
                    help='number of epochs to pretrain (default: 3)')
parser.add_argument('--num_epochs', type=int, default=10, metavar='N',
                    help='number of epochs to train (default: 10)')
parser.add_argument('--num_re_epochs', type=int, default=3, metavar='R',
                    help='number of epochs to retrain (default: 3)')
parser.add_argument('--lr', type=float, default=1e-3, metavar='LR',
                    help='learning rate (default: 1e-2)')
parser.add_argument('--adam_epsilon', type=float, default=1e-8, metavar='E',
                    help='adam epsilon (default: 1e-8)')
parser.add_argument('--no-cuda', action='store_true', default=False,
                    help='disables CUDA training')
parser.add_argument('--seed', type=int, default=1, metavar='S',
                    help='random seed (default: 1)')
parser.add_argument('--save-model', action='store_true', default=False,
                    help='For Saving the current Model')
parser.add_argument('--num_users', action='store_true', default=1,
                    help='Number of users in network')
parser.add_argument('--model', action='store_true', default='resnet',
                    help='Model to train')
args = parser.parse_args(args=[])

In [3]:
classDict = {'plane':0, 'car':1, 'bird':2, 'cat':3, 'deer':4, 'dog':5, 'frog':6, 'horse':7, 'ship':8, 'truck':9}

# Define a function to separate CIFAR classes by class index

def get_class_i(x, y, i):
    """
    x: trainset.train_data or testset.test_data
    y: trainset.train_labels or testset.test_labels
    i: class label, a number between 0 to 9
    return: x_i
    """
    # Convert to a numpy array
    y = np.array(y)
    # Locate position of labels that equal to i
    pos_i = np.argwhere(y == i)
    # Convert the result into a 1-D list
    pos_i = list(pos_i[:,0])
    # Collect all data that match the desired label
#     x_i = [x[j] for j in pos_i]
    
    return pos_i

In [4]:
def create_dict(dataset_train):
    
    frac = int(len(dataset_train.data) * 0.05)
    x_reserve = dataset_train.data[:frac]
    y_reserve = dataset_train.targets[:frac]
    x_train = dataset_train.data[frac:]
    y_train = dataset_train.targets[frac:]
    
    reserved = get_class_i(x_reserve, y_reserve, classDict['plane']) \
               +get_class_i(x_reserve, y_reserve, classDict['car']) \
               +get_class_i(x_reserve, y_reserve, classDict['bird']) \
               +get_class_i(x_reserve, y_reserve, classDict['cat']) \
               +get_class_i(x_reserve, y_reserve, classDict['deer']) \
               +get_class_i(x_reserve, y_reserve, classDict['dog']) \
               +get_class_i(x_reserve, y_reserve, classDict['frog']) \
               +get_class_i(x_reserve, y_reserve, classDict['horse']) \
               +get_class_i(x_reserve, y_reserve, classDict['ship']) \
               +get_class_i(x_reserve, y_reserve, classDict['truck'])

    train1 = get_class_i(x_train, y_train, classDict['plane']) \
             +get_class_i(x_train, y_train, classDict['car']) \
             +get_class_i(x_train, y_train, classDict['bird'])

    train2 = get_class_i(x_train, y_train, classDict['cat']) \
             +get_class_i(x_train, y_train, classDict['deer']) \
             +get_class_i(x_train, y_train, classDict['dog'])
    train3 = get_class_i(x_train, y_train, classDict['frog']) \
             +get_class_i(x_train, y_train, classDict['horse']) \
             +get_class_i(x_train, y_train, classDict['ship']) \
             +get_class_i(x_train, y_train, classDict['truck'])
    
    dict_users = {0: set(reserved+train3), 1:set(train1), 2:set(train2)}
    return dict_users

class DatasetSplit(torch.utils.data.Dataset):
    def __init__(self, dataset, idxs):
        self.dataset = dataset
        self.idxs = list(idxs)

    def __len__(self):
        return len(self.idxs)

    def __getitem__(self, item):
        image, label = self.dataset[self.idxs[item]]
        return image, label

In [5]:
def pretrain(args, model, device, train_loader, test_loader, optimizer):
    for epoch in range(args.num_pre_epochs):
        print('Pre epoch: {}'.format(epoch + 1))
        model.train()
        for batch_idx, (data, target) in enumerate(tqdm(train_loader)):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = regularized_nll_loss(args, model, output, target)
            loss.backward()
            optimizer.step()
        test(args, model, device, test_loader)


def train(args, model, device, train_loader, test_loader, optimizer, Z, U, report=False):
    model.train()
    print('Epoch: {}'.format(epoch + 1))
    for batch_idx, (data, target) in enumerate(tqdm(train_loader)):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = admm_loss(args, device, model, Z, U, output, target)
        loss.backward()
        optimizer.step()
    X = update_X(model)
    Z = update_Z_l1(X, U, args) if args.l1 else update_Z(X, U, args)
    U = update_U(U, X, Z)
    print_convergence(model, X, Z)
    test(args, model, device, test_loader, report)


iter = 0
def test(args, model, device, test_loader, report=False):
    model.eval()
    test_loss = 0
    correct = 0
    global iter
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    
    if report:
        writer.add_scalar('train/loss_gossip_admm4', test_loss, iter)
        writer.add_scalar('valid/accuracy_gossip_admm4', correct / len(test_loader.dataset), iter)
        iter+=1


def retrain(args, model, mask, device, train_loader, test_loader, optimizer):
    for epoch in range(args.num_re_epochs):
        print('Re epoch: {}'.format(epoch + 1))
        model.train()
        for batch_idx, (data, target) in enumerate(tqdm(train_loader)):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = F.nll_loss(output, target)
            loss.backward()
            optimizer.prune_step(mask)

        test(args, model, device, test_loader)

In [6]:
### MAIN

writer = SummaryWriter('../../runs/') 
use_cuda = not args.no_cuda and torch.cuda.is_available()
torch.manual_seed(args.seed)
device = torch.device("cuda" if use_cuda else "cpu")
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

args.percent = [0.8, 0.92, 0.93, 0.94, 0.95, 0.99, 0.99, 0.93]
args.num_pre_epochs = 3
args.num_epochs = 125
args.num_re_epochs = 5
args.num_users = 3
args.dataset = 'cifar10'
args.model = 'alex'
args.l1 = True
args.l2 = False

trainset = datasets.CIFAR10('data', train=True, download=True,
                         transform=transforms.Compose([
                             transforms.ToTensor(),
                             transforms.Normalize((0.49139968, 0.48215827, 0.44653124),
                                                  (0.24703233, 0.24348505, 0.26158768))
                         ]))
dict_users = create_dict(trainset)

test_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10('data', train=False, download=True,
                     transform=transforms.Compose([
                         transforms.ToTensor(),
                         transforms.Normalize((0.49139968, 0.48215827, 0.44653124),
                                              (0.24703233, 0.24348505, 0.26158768))
                     ])), shuffle=True, batch_size=args.test_batch_size, **kwargs)

Files already downloaded and verified
Files already downloaded and verified


In [7]:
mdlz = dict()
for usr in range(args.num_users):
    train_loader = torch.utils.data.DataLoader(DatasetSplit(trainset, dict_users[usr]), batch_size=args.batch_size, shuffle=True, **kwargs)
    if args.model == 'resnet':
        model = models.resnet18(pretrained=False).to(device)
    else:
        model = LeNet().to(device) if args.dataset == "mnist" else AlexNet().to(device)
    
    optimizer = PruneAdam(model.named_parameters(), lr=args.lr, eps=args.adam_epsilon)
    pretrain(args, model, device, train_loader, test_loader, optimizer)
    Z, U = initialize_Z_and_U(model)
    mdlz[usr] = (model, optimizer, Z, U)
    
for epoch in range(args.num_epochs):
    w_locals=[]
    idxs_users = np.random.choice(range(args.num_users), 2, replace=False)
    for usr in idxs_users:
        report = True if usr == 0 else False
        train_loader = torch.utils.data.DataLoader(DatasetSplit(trainset, dict_users[usr]), batch_size=args.batch_size, shuffle=True, **kwargs)
        train(args, mdlz[usr][0], device, train_loader, test_loader, mdlz[usr][1], mdlz[usr][2], mdlz[usr][3], report=report)
        w = mdlz[usr][0].state_dict()
        w_locals.append(copy.deepcopy(w))

    # update global weights
    w_glob = FedAvg(w_locals)

    for idx in idxs_users:
        # copy weight to net_glob
        mdlz[idx][0].load_state_dict(w_glob)

  0%|          | 0/320 [00:00<?, ?it/s]

Pre epoch: 1


100%|██████████| 320/320 [00:09<00:00, 32.96it/s]
  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.7212, Accuracy: 3303/10000 (33%)

Pre epoch: 2


100%|██████████| 320/320 [00:09<00:00, 34.03it/s]
  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.3760, Accuracy: 4976/10000 (50%)

Pre epoch: 3


100%|██████████| 320/320 [00:09<00:00, 33.87it/s]



Test set: Average loss: 1.2656, Accuracy: 5450/10000 (54%)



  0%|          | 0/223 [00:00<?, ?it/s]

Pre epoch: 1


100%|██████████| 223/223 [00:06<00:00, 34.85it/s]
  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.7352, Accuracy: 3288/10000 (33%)

Pre epoch: 2


100%|██████████| 223/223 [00:06<00:00, 33.40it/s]
  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.4950, Accuracy: 4249/10000 (42%)

Pre epoch: 3


100%|██████████| 223/223 [00:06<00:00, 34.21it/s]



Test set: Average loss: 1.3776, Accuracy: 4931/10000 (49%)



  0%|          | 0/223 [00:00<?, ?it/s]

Pre epoch: 1


100%|██████████| 223/223 [00:06<00:00, 34.63it/s]
  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.6896, Accuracy: 3171/10000 (32%)

Pre epoch: 2


100%|██████████| 223/223 [00:06<00:00, 33.88it/s]
  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.4810, Accuracy: 4374/10000 (44%)

Pre epoch: 3


100%|██████████| 223/223 [00:06<00:00, 34.18it/s]
  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.3359, Accuracy: 5047/10000 (50%)

Epoch: 1


100%|██████████| 320/320 [00:28<00:00, 11.28it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3968
(features.3.weight): 0.8672
(features.6.weight): 0.8712
(features.8.weight): 0.8799
(features.10.weight): 0.8947
(classifier.1.weight): 0.9052
(classifier.4.weight): 0.9611
(classifier.6.weight): 0.9439


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.2266, Accuracy: 5595/10000 (56%)

Epoch: 1


100%|██████████| 223/223 [00:19<00:00, 11.13it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3991
(features.3.weight): 0.9142
(features.6.weight): 0.9174
(features.8.weight): 0.9124
(features.10.weight): 0.9288
(classifier.1.weight): 0.9339
(classifier.4.weight): 0.9725
(classifier.6.weight): 0.9580


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.2977, Accuracy: 5281/10000 (53%)

Epoch: 2


100%|██████████| 223/223 [00:18<00:00, 11.55it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5125
(features.3.weight): 0.9384
(features.6.weight): 0.9578
(features.8.weight): 0.9650
(features.10.weight): 0.9702
(classifier.1.weight): 0.9573
(classifier.4.weight): 0.9811
(classifier.6.weight): 0.9498


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.5797, Accuracy: 3787/10000 (38%)

Epoch: 2


100%|██████████| 223/223 [00:20<00:00, 11.74it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4060
(features.3.weight): 0.8989
(features.6.weight): 0.9104
(features.8.weight): 0.9142
(features.10.weight): 0.9303
(classifier.1.weight): 0.9350
(classifier.4.weight): 0.9762
(classifier.6.weight): 0.9683


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.3353, Accuracy: 5195/10000 (52%)

Epoch: 3


100%|██████████| 223/223 [00:19<00:00, 11.26it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5762
(features.3.weight): 0.9518
(features.6.weight): 0.9531
(features.8.weight): 0.9619
(features.10.weight): 0.9697
(classifier.1.weight): 0.9642
(classifier.4.weight): 0.9841
(classifier.6.weight): 0.9599


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.6416, Accuracy: 3931/10000 (39%)

Epoch: 3


100%|██████████| 223/223 [00:19<00:00, 11.16it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5810
(features.3.weight): 0.9466
(features.6.weight): 0.9706
(features.8.weight): 0.9744
(features.10.weight): 0.9776
(classifier.1.weight): 0.9667
(classifier.4.weight): 0.9830
(classifier.6.weight): 0.9517


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.7193, Accuracy: 3354/10000 (34%)

Epoch: 4


100%|██████████| 320/320 [00:27<00:00, 11.45it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5127
(features.3.weight): 0.9185
(features.6.weight): 0.9206
(features.8.weight): 0.9384
(features.10.weight): 0.9483
(classifier.1.weight): 0.9374
(classifier.4.weight): 0.9693
(classifier.6.weight): 0.9501


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.3942, Accuracy: 4878/10000 (49%)

Epoch: 4


100%|██████████| 223/223 [00:20<00:00, 11.07it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5726
(features.3.weight): 0.9395
(features.6.weight): 0.9631
(features.8.weight): 0.9645
(features.10.weight): 0.9689
(classifier.1.weight): 0.9611
(classifier.4.weight): 0.9812
(classifier.6.weight): 0.9219


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.4828, Accuracy: 4357/10000 (44%)

Epoch: 5


100%|██████████| 223/223 [00:20<00:00, 10.21it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5691
(features.3.weight): 0.9498
(features.6.weight): 0.9641
(features.8.weight): 0.9694
(features.10.weight): 0.9714
(classifier.1.weight): 0.9616
(classifier.4.weight): 0.9842
(classifier.6.weight): 0.9341


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.4319, Accuracy: 4642/10000 (46%)

Epoch: 5


100%|██████████| 320/320 [00:28<00:00, 11.25it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5815
(features.3.weight): 0.9247
(features.6.weight): 0.9440
(features.8.weight): 0.9493
(features.10.weight): 0.9541
(classifier.1.weight): 0.9434
(classifier.4.weight): 0.9705
(classifier.6.weight): 0.9270


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.3378, Accuracy: 5148/10000 (51%)

Epoch: 6


100%|██████████| 223/223 [00:20<00:00, 10.75it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5813
(features.3.weight): 0.9299
(features.6.weight): 0.9551
(features.8.weight): 0.9666
(features.10.weight): 0.9690
(classifier.1.weight): 0.9610
(classifier.4.weight): 0.9848
(classifier.6.weight): 0.9228


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.4060, Accuracy: 4809/10000 (48%)

Epoch: 6


100%|██████████| 223/223 [00:19<00:00, 11.76it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5881
(features.3.weight): 0.9439
(features.6.weight): 0.9513
(features.8.weight): 0.9625
(features.10.weight): 0.9676
(classifier.1.weight): 0.9604
(classifier.4.weight): 0.9808
(classifier.6.weight): 0.9314


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.4555, Accuracy: 4612/10000 (46%)

Epoch: 7


100%|██████████| 223/223 [00:20<00:00, 11.03it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5816
(features.3.weight): 0.9335
(features.6.weight): 0.9586
(features.8.weight): 0.9665
(features.10.weight): 0.9706
(classifier.1.weight): 0.9657
(classifier.4.weight): 0.9852
(classifier.6.weight): 0.9144


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.2801, Accuracy: 5297/10000 (53%)

Epoch: 7


100%|██████████| 223/223 [00:19<00:00, 11.30it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5806
(features.3.weight): 0.9320
(features.6.weight): 0.9506
(features.8.weight): 0.9626
(features.10.weight): 0.9691
(classifier.1.weight): 0.9633
(classifier.4.weight): 0.9819
(classifier.6.weight): 0.9091


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.3920, Accuracy: 4800/10000 (48%)

Epoch: 8


100%|██████████| 320/320 [00:28<00:00, 11.38it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5781
(features.3.weight): 0.9196
(features.6.weight): 0.9437
(features.8.weight): 0.9520
(features.10.weight): 0.9579
(classifier.1.weight): 0.9452
(classifier.4.weight): 0.9708
(classifier.6.weight): 0.9165


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.2926, Accuracy: 5319/10000 (53%)

Epoch: 8


100%|██████████| 223/223 [00:19<00:00, 11.49it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5704
(features.3.weight): 0.9229
(features.6.weight): 0.9457
(features.8.weight): 0.9579
(features.10.weight): 0.9681
(classifier.1.weight): 0.9639
(classifier.4.weight): 0.9809
(classifier.6.weight): 0.8986


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.2110, Accuracy: 5637/10000 (56%)

Epoch: 9


100%|██████████| 320/320 [00:28<00:00, 11.41it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5614
(features.3.weight): 0.9069
(features.6.weight): 0.9355
(features.8.weight): 0.9462
(features.10.weight): 0.9532
(classifier.1.weight): 0.9451
(classifier.4.weight): 0.9696
(classifier.6.weight): 0.8964


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.1861, Accuracy: 5753/10000 (58%)

Epoch: 9


100%|██████████| 223/223 [00:20<00:00, 11.01it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5654
(features.3.weight): 0.9228
(features.6.weight): 0.9467
(features.8.weight): 0.9582
(features.10.weight): 0.9674
(classifier.1.weight): 0.9614
(classifier.4.weight): 0.9802
(classifier.6.weight): 0.8946


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.1810, Accuracy: 5741/10000 (57%)

Epoch: 10


100%|██████████| 223/223 [00:19<00:00, 11.34it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5702
(features.3.weight): 0.9160
(features.6.weight): 0.9475
(features.8.weight): 0.9602
(features.10.weight): 0.9685
(classifier.1.weight): 0.9654
(classifier.4.weight): 0.9841
(classifier.6.weight): 0.9030


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.2712, Accuracy: 5269/10000 (53%)

Epoch: 10


100%|██████████| 223/223 [00:20<00:00, 10.88it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5546
(features.3.weight): 0.9082
(features.6.weight): 0.9331
(features.8.weight): 0.9491
(features.10.weight): 0.9609
(classifier.1.weight): 0.9611
(classifier.4.weight): 0.9792
(classifier.6.weight): 0.8738


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.1976, Accuracy: 5757/10000 (58%)

Epoch: 11


100%|██████████| 223/223 [00:19<00:00, 11.34it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5534
(features.3.weight): 0.9062
(features.6.weight): 0.9370
(features.8.weight): 0.9511
(features.10.weight): 0.9636
(classifier.1.weight): 0.9621
(classifier.4.weight): 0.9783
(classifier.6.weight): 0.8743


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.1351, Accuracy: 5975/10000 (60%)

Epoch: 11


100%|██████████| 320/320 [00:28<00:00, 11.40it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5514
(features.3.weight): 0.8933
(features.6.weight): 0.9252
(features.8.weight): 0.9389
(features.10.weight): 0.9499
(classifier.1.weight): 0.9415
(classifier.4.weight): 0.9675
(classifier.6.weight): 0.8858


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.1630, Accuracy: 5875/10000 (59%)

Epoch: 12


100%|██████████| 320/320 [00:29<00:00, 10.98it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5415
(features.3.weight): 0.8815
(features.6.weight): 0.9140
(features.8.weight): 0.9316
(features.10.weight): 0.9447
(classifier.1.weight): 0.9401
(classifier.4.weight): 0.9671
(classifier.6.weight): 0.8733


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.1097, Accuracy: 6157/10000 (62%)

Epoch: 12


100%|██████████| 223/223 [00:18<00:00, 11.57it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5542
(features.3.weight): 0.9032
(features.6.weight): 0.9348
(features.8.weight): 0.9521
(features.10.weight): 0.9646
(classifier.1.weight): 0.9648
(classifier.4.weight): 0.9835
(classifier.6.weight): 0.8830


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.1286, Accuracy: 5976/10000 (60%)

Epoch: 13


100%|██████████| 223/223 [00:18<00:00, 10.92it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5446
(features.3.weight): 0.8951
(features.6.weight): 0.9257
(features.8.weight): 0.9430
(features.10.weight): 0.9592
(classifier.1.weight): 0.9605
(classifier.4.weight): 0.9775
(classifier.6.weight): 0.8615


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.1567, Accuracy: 5858/10000 (59%)

Epoch: 13


100%|██████████| 320/320 [00:27<00:00, 11.40it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5373
(features.3.weight): 0.8789
(features.6.weight): 0.9126
(features.8.weight): 0.9291
(features.10.weight): 0.9426
(classifier.1.weight): 0.9385
(classifier.4.weight): 0.9665
(classifier.6.weight): 0.8659


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.0740, Accuracy: 6251/10000 (63%)

Epoch: 14


100%|██████████| 223/223 [00:18<00:00, 12.26it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5407
(features.3.weight): 0.8802
(features.6.weight): 0.9153
(features.8.weight): 0.9441
(features.10.weight): 0.9598
(classifier.1.weight): 0.9625
(classifier.4.weight): 0.9824
(classifier.6.weight): 0.8605


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.0669, Accuracy: 6247/10000 (62%)

Epoch: 14


100%|██████████| 320/320 [00:26<00:00, 11.32it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5313
(features.3.weight): 0.8685
(features.6.weight): 0.9054
(features.8.weight): 0.9222
(features.10.weight): 0.9374
(classifier.1.weight): 0.9353
(classifier.4.weight): 0.9650
(classifier.6.weight): 0.8588


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.1003, Accuracy: 6277/10000 (63%)

Epoch: 15


100%|██████████| 223/223 [00:18<00:00, 11.27it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5341
(features.3.weight): 0.8827
(features.6.weight): 0.9131
(features.8.weight): 0.9354
(features.10.weight): 0.9533
(classifier.1.weight): 0.9592
(classifier.4.weight): 0.9769
(classifier.6.weight): 0.8493


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.0514, Accuracy: 6388/10000 (64%)

Epoch: 15


100%|██████████| 223/223 [00:18<00:00, 11.96it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5303
(features.3.weight): 0.8663
(features.6.weight): 0.9026
(features.8.weight): 0.9356
(features.10.weight): 0.9553
(classifier.1.weight): 0.9617
(classifier.4.weight): 0.9818
(classifier.6.weight): 0.8460


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.0369, Accuracy: 6301/10000 (63%)

Epoch: 16


100%|██████████| 223/223 [00:20<00:00, 10.93it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5259
(features.3.weight): 0.8605
(features.6.weight): 0.8995
(features.8.weight): 0.9316
(features.10.weight): 0.9528
(classifier.1.weight): 0.9618
(classifier.4.weight): 0.9808
(classifier.6.weight): 0.8401


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.0665, Accuracy: 6225/10000 (62%)

Epoch: 16


100%|██████████| 223/223 [00:19<00:00, 11.85it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5252
(features.3.weight): 0.8678
(features.6.weight): 0.9002
(features.8.weight): 0.9288
(features.10.weight): 0.9500
(classifier.1.weight): 0.9584
(classifier.4.weight): 0.9758
(classifier.6.weight): 0.8405


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.0132, Accuracy: 6492/10000 (65%)

Epoch: 17


100%|██████████| 223/223 [00:19<00:00, 11.15it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5187
(features.3.weight): 0.8493
(features.6.weight): 0.8882
(features.8.weight): 0.9253
(features.10.weight): 0.9503
(classifier.1.weight): 0.9611
(classifier.4.weight): 0.9802
(classifier.6.weight): 0.8318


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.0010, Accuracy: 6537/10000 (65%)

Epoch: 17


100%|██████████| 320/320 [00:27<00:00, 11.16it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5277
(features.3.weight): 0.8634
(features.6.weight): 0.9002
(features.8.weight): 0.9183
(features.10.weight): 0.9342
(classifier.1.weight): 0.9327
(classifier.4.weight): 0.9643
(classifier.6.weight): 0.8576


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.0341, Accuracy: 6458/10000 (65%)

Epoch: 18


100%|██████████| 223/223 [00:19<00:00, 11.43it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5197
(features.3.weight): 0.8560
(features.6.weight): 0.8907
(features.8.weight): 0.9235
(features.10.weight): 0.9474
(classifier.1.weight): 0.9552
(classifier.4.weight): 0.9748
(classifier.6.weight): 0.8366


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.0259, Accuracy: 6414/10000 (64%)

Epoch: 18


100%|██████████| 223/223 [00:19<00:00, 11.23it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5166
(features.3.weight): 0.8479
(features.6.weight): 0.8885
(features.8.weight): 0.9247
(features.10.weight): 0.9483
(classifier.1.weight): 0.9601
(classifier.4.weight): 0.9800
(classifier.6.weight): 0.8292


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.0263, Accuracy: 6465/10000 (65%)

Epoch: 19


100%|██████████| 223/223 [00:19<00:00, 11.19it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5140
(features.3.weight): 0.8453
(features.6.weight): 0.8821
(features.8.weight): 0.9157
(features.10.weight): 0.9411
(classifier.1.weight): 0.9543
(classifier.4.weight): 0.9747
(classifier.6.weight): 0.8248


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.0230, Accuracy: 6477/10000 (65%)

Epoch: 19


100%|██████████| 320/320 [00:28<00:00, 11.11it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5149
(features.3.weight): 0.8407
(features.6.weight): 0.8836
(features.8.weight): 0.9078
(features.10.weight): 0.9283
(classifier.1.weight): 0.9317
(classifier.4.weight): 0.9644
(classifier.6.weight): 0.8465


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 0.9630, Accuracy: 6685/10000 (67%)

Epoch: 20


100%|██████████| 223/223 [00:20<00:00, 10.97it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5125
(features.3.weight): 0.8378
(features.6.weight): 0.8817
(features.8.weight): 0.9204
(features.10.weight): 0.9465
(classifier.1.weight): 0.9591
(classifier.4.weight): 0.9792
(classifier.6.weight): 0.8219


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 0.9983, Accuracy: 6585/10000 (66%)

Epoch: 20


100%|██████████| 223/223 [00:20<00:00, 11.08it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5088
(features.3.weight): 0.8345
(features.6.weight): 0.8763
(features.8.weight): 0.9140
(features.10.weight): 0.9426
(classifier.1.weight): 0.9537
(classifier.4.weight): 0.9742
(classifier.6.weight): 0.8222


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.0459, Accuracy: 6415/10000 (64%)

Epoch: 21


100%|██████████| 223/223 [00:18<00:00, 11.75it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5055
(features.3.weight): 0.8274
(features.6.weight): 0.8725
(features.8.weight): 0.9143
(features.10.weight): 0.9436
(classifier.1.weight): 0.9586
(classifier.4.weight): 0.9789
(classifier.6.weight): 0.8123


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.0253, Accuracy: 6522/10000 (65%)

Epoch: 21


100%|██████████| 320/320 [00:26<00:00, 11.34it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5070
(features.3.weight): 0.8290
(features.6.weight): 0.8722
(features.8.weight): 0.9011
(features.10.weight): 0.9249
(classifier.1.weight): 0.9307
(classifier.4.weight): 0.9643
(classifier.6.weight): 0.8391


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 0.9588, Accuracy: 6672/10000 (67%)

Epoch: 22


100%|██████████| 223/223 [00:18<00:00, 11.17it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5055
(features.3.weight): 0.8304
(features.6.weight): 0.8711
(features.8.weight): 0.9101
(features.10.weight): 0.9392
(classifier.1.weight): 0.9520
(classifier.4.weight): 0.9739
(classifier.6.weight): 0.8165


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 0.9820, Accuracy: 6670/10000 (67%)

Epoch: 22


100%|██████████| 223/223 [00:19<00:00, 11.89it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.5013
(features.3.weight): 0.8190
(features.6.weight): 0.8648
(features.8.weight): 0.9100
(features.10.weight): 0.9408
(classifier.1.weight): 0.9563
(classifier.4.weight): 0.9784
(classifier.6.weight): 0.8100


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.0005, Accuracy: 6574/10000 (66%)

Epoch: 23


100%|██████████| 320/320 [00:27<00:00, 11.45it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4997
(features.3.weight): 0.8155
(features.6.weight): 0.8609
(features.8.weight): 0.8950
(features.10.weight): 0.9200
(classifier.1.weight): 0.9292
(classifier.4.weight): 0.9641
(classifier.6.weight): 0.8309


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 0.9520, Accuracy: 6767/10000 (68%)

Epoch: 23


100%|██████████| 223/223 [00:18<00:00, 11.90it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4979
(features.3.weight): 0.8128
(features.6.weight): 0.8603
(features.8.weight): 0.9071
(features.10.weight): 0.9385
(classifier.1.weight): 0.9551
(classifier.4.weight): 0.9777
(classifier.6.weight): 0.8017


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.0099, Accuracy: 6688/10000 (67%)

Epoch: 24


100%|██████████| 223/223 [00:20<00:00, 12.12it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4989
(features.3.weight): 0.8154
(features.6.weight): 0.8607
(features.8.weight): 0.9033
(features.10.weight): 0.9350
(classifier.1.weight): 0.9517
(classifier.4.weight): 0.9736
(classifier.6.weight): 0.8093


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 0.9683, Accuracy: 6736/10000 (67%)

Epoch: 24


100%|██████████| 320/320 [00:28<00:00, 11.21it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4926
(features.3.weight): 0.8054
(features.6.weight): 0.8509
(features.8.weight): 0.8874
(features.10.weight): 0.9164
(classifier.1.weight): 0.9271
(classifier.4.weight): 0.9637
(classifier.6.weight): 0.8212


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 0.9843, Accuracy: 6653/10000 (67%)

Epoch: 25


100%|██████████| 320/320 [00:29<00:00, 11.30it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4902
(features.3.weight): 0.8002
(features.6.weight): 0.8471
(features.8.weight): 0.8856
(features.10.weight): 0.9149
(classifier.1.weight): 0.9271
(classifier.4.weight): 0.9638
(classifier.6.weight): 0.8208


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 0.9739, Accuracy: 6750/10000 (68%)

Epoch: 25


100%|██████████| 223/223 [00:19<00:00, 11.53it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4919
(features.3.weight): 0.8046
(features.6.weight): 0.8498
(features.8.weight): 0.8975
(features.10.weight): 0.9329
(classifier.1.weight): 0.9503
(classifier.4.weight): 0.9734
(classifier.6.weight): 0.8028


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 0.9529, Accuracy: 6807/10000 (68%)

Epoch: 26


100%|██████████| 320/320 [00:26<00:00, 11.87it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4842
(features.3.weight): 0.7900
(features.6.weight): 0.8378
(features.8.weight): 0.8777
(features.10.weight): 0.9090
(classifier.1.weight): 0.9251
(classifier.4.weight): 0.9633
(classifier.6.weight): 0.8130


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 0.9740, Accuracy: 6752/10000 (68%)

Epoch: 26


100%|██████████| 223/223 [00:20<00:00, 11.77it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4855
(features.3.weight): 0.7945
(features.6.weight): 0.8411
(features.8.weight): 0.8908
(features.10.weight): 0.9267
(classifier.1.weight): 0.9479
(classifier.4.weight): 0.9730
(classifier.6.weight): 0.8005


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 0.9530, Accuracy: 6837/10000 (68%)

Epoch: 27


100%|██████████| 320/320 [00:28<00:00, 11.17it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4784
(features.3.weight): 0.7812
(features.6.weight): 0.8288
(features.8.weight): 0.8700
(features.10.weight): 0.9040
(classifier.1.weight): 0.9228
(classifier.4.weight): 0.9626
(classifier.6.weight): 0.8066


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 0.9645, Accuracy: 6846/10000 (68%)

Epoch: 27


100%|██████████| 223/223 [00:19<00:00, 11.64it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4934
(features.3.weight): 0.8068
(features.6.weight): 0.8546
(features.8.weight): 0.9041
(features.10.weight): 0.9368
(classifier.1.weight): 0.9529
(classifier.4.weight): 0.9773
(classifier.6.weight): 0.8087


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.0545, Accuracy: 6575/10000 (66%)

Epoch: 28


100%|██████████| 320/320 [00:29<00:00, 11.01it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4794
(features.3.weight): 0.7834
(features.6.weight): 0.8341
(features.8.weight): 0.8759
(features.10.weight): 0.9078
(classifier.1.weight): 0.9234
(classifier.4.weight): 0.9626
(classifier.6.weight): 0.8082


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.0068, Accuracy: 6671/10000 (67%)

Epoch: 28


100%|██████████| 223/223 [00:19<00:00, 10.69it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4803
(features.3.weight): 0.7867
(features.6.weight): 0.8315
(features.8.weight): 0.8845
(features.10.weight): 0.9249
(classifier.1.weight): 0.9468
(classifier.4.weight): 0.9727
(classifier.6.weight): 0.7986


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 0.9865, Accuracy: 6731/10000 (67%)

Epoch: 29


100%|██████████| 223/223 [00:20<00:00, 13.22it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4759
(features.3.weight): 0.7785
(features.6.weight): 0.8278
(features.8.weight): 0.8826
(features.10.weight): 0.9241
(classifier.1.weight): 0.9465
(classifier.4.weight): 0.9726
(classifier.6.weight): 0.7930


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 0.9238, Accuracy: 6905/10000 (69%)

Epoch: 29


100%|██████████| 223/223 [00:19<00:00, 11.38it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4814
(features.3.weight): 0.7887
(features.6.weight): 0.8401
(features.8.weight): 0.8963
(features.10.weight): 0.9338
(classifier.1.weight): 0.9537
(classifier.4.weight): 0.9775
(classifier.6.weight): 0.7948


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 0.9795, Accuracy: 6724/10000 (67%)

Epoch: 30


100%|██████████| 320/320 [00:27<00:00, 11.49it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4742
(features.3.weight): 0.7754
(features.6.weight): 0.8235
(features.8.weight): 0.8669
(features.10.weight): 0.9015
(classifier.1.weight): 0.9220
(classifier.4.weight): 0.9626
(classifier.6.weight): 0.8015


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 0.9586, Accuracy: 6947/10000 (69%)

Epoch: 30


100%|██████████| 223/223 [00:20<00:00, 11.52it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4749
(features.3.weight): 0.7751
(features.6.weight): 0.8291
(features.8.weight): 0.8890
(features.10.weight): 0.9305
(classifier.1.weight): 0.9528
(classifier.4.weight): 0.9769
(classifier.6.weight): 0.7839


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 0.9411, Accuracy: 6912/10000 (69%)

Epoch: 31


100%|██████████| 223/223 [00:19<00:00, 11.30it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4706
(features.3.weight): 0.7687
(features.6.weight): 0.8236
(features.8.weight): 0.8844
(features.10.weight): 0.9265
(classifier.1.weight): 0.9520
(classifier.4.weight): 0.9770
(classifier.6.weight): 0.7845


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 0.9575, Accuracy: 6833/10000 (68%)

Epoch: 31


100%|██████████| 223/223 [00:19<00:00, 10.94it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4748
(features.3.weight): 0.7737
(features.6.weight): 0.8269
(features.8.weight): 0.8817
(features.10.weight): 0.9229
(classifier.1.weight): 0.9463
(classifier.4.weight): 0.9726
(classifier.6.weight): 0.7900


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 0.9554, Accuracy: 6864/10000 (69%)

Epoch: 32


100%|██████████| 223/223 [00:20<00:00, 11.05it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4677
(features.3.weight): 0.7633
(features.6.weight): 0.8172
(features.8.weight): 0.8774
(features.10.weight): 0.9230
(classifier.1.weight): 0.9460
(classifier.4.weight): 0.9726
(classifier.6.weight): 0.7851


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.0017, Accuracy: 6855/10000 (69%)

Epoch: 32


100%|██████████| 320/320 [00:28<00:00, 11.18it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4680
(features.3.weight): 0.7656
(features.6.weight): 0.8170
(features.8.weight): 0.8641
(features.10.weight): 0.9004
(classifier.1.weight): 0.9221
(classifier.4.weight): 0.9625
(classifier.6.weight): 0.7963


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 0.9571, Accuracy: 6990/10000 (70%)

Epoch: 33


100%|██████████| 320/320 [00:28<00:00, 11.15it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4624
(features.3.weight): 0.7552
(features.6.weight): 0.8102
(features.8.weight): 0.8587
(features.10.weight): 0.8991
(classifier.1.weight): 0.9225
(classifier.4.weight): 0.9629
(classifier.6.weight): 0.7900


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 0.9775, Accuracy: 6923/10000 (69%)

Epoch: 33


100%|██████████| 223/223 [00:19<00:00, 11.58it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4692
(features.3.weight): 0.7636
(features.6.weight): 0.8203
(features.8.weight): 0.8806
(features.10.weight): 0.9227
(classifier.1.weight): 0.9506
(classifier.4.weight): 0.9766
(classifier.6.weight): 0.7808


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 0.9875, Accuracy: 6830/10000 (68%)

Epoch: 34


100%|██████████| 223/223 [00:18<00:00, 12.12it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4641
(features.3.weight): 0.7571
(features.6.weight): 0.8111
(features.8.weight): 0.8718
(features.10.weight): 0.9179
(classifier.1.weight): 0.9444
(classifier.4.weight): 0.9723
(classifier.6.weight): 0.7823


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 0.9683, Accuracy: 6944/10000 (69%)

Epoch: 34


100%|██████████| 223/223 [00:19<00:00, 11.56it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4620
(features.3.weight): 0.7547
(features.6.weight): 0.8118
(features.8.weight): 0.8764
(features.10.weight): 0.9226
(classifier.1.weight): 0.9495
(classifier.4.weight): 0.9766
(classifier.6.weight): 0.7735


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.0268, Accuracy: 6808/10000 (68%)

Epoch: 35


100%|██████████| 223/223 [00:19<00:00, 10.29it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4603
(features.3.weight): 0.7508
(features.6.weight): 0.8087
(features.8.weight): 0.8748
(features.10.weight): 0.9221
(classifier.1.weight): 0.9496
(classifier.4.weight): 0.9764
(classifier.6.weight): 0.7695


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 0.9646, Accuracy: 6912/10000 (69%)

Epoch: 35


100%|██████████| 223/223 [00:19<00:00, 11.00it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4594
(features.3.weight): 0.7488
(features.6.weight): 0.8039
(features.8.weight): 0.8681
(features.10.weight): 0.9163
(classifier.1.weight): 0.9453
(classifier.4.weight): 0.9726
(classifier.6.weight): 0.7756


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 0.9761, Accuracy: 6902/10000 (69%)

Epoch: 36


100%|██████████| 223/223 [00:19<00:00, 11.24it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4548
(features.3.weight): 0.7416
(features.6.weight): 0.8019
(features.8.weight): 0.8687
(features.10.weight): 0.9173
(classifier.1.weight): 0.9478
(classifier.4.weight): 0.9763
(classifier.6.weight): 0.7667


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 0.9996, Accuracy: 6894/10000 (69%)

Epoch: 36


100%|██████████| 223/223 [00:20<00:00, 11.02it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4557
(features.3.weight): 0.7428
(features.6.weight): 0.7975
(features.8.weight): 0.8627
(features.10.weight): 0.9130
(classifier.1.weight): 0.9431
(classifier.4.weight): 0.9721
(classifier.6.weight): 0.7712


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.0395, Accuracy: 6882/10000 (69%)

Epoch: 37


100%|██████████| 223/223 [00:19<00:00, 10.82it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4515
(features.3.weight): 0.7350
(features.6.weight): 0.7942
(features.8.weight): 0.8629
(features.10.weight): 0.9141
(classifier.1.weight): 0.9466
(classifier.4.weight): 0.9761
(classifier.6.weight): 0.7582


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.0877, Accuracy: 6858/10000 (69%)

Epoch: 37


100%|██████████| 320/320 [00:28<00:00, 11.51it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4600
(features.3.weight): 0.7532
(features.6.weight): 0.8066
(features.8.weight): 0.8594
(features.10.weight): 0.9001
(classifier.1.weight): 0.9218
(classifier.4.weight): 0.9626
(classifier.6.weight): 0.7928


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 0.9351, Accuracy: 6975/10000 (70%)

Epoch: 38


100%|██████████| 320/320 [00:27<00:00, 11.62it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4510
(features.3.weight): 0.7389
(features.6.weight): 0.7940
(features.8.weight): 0.8504
(features.10.weight): 0.8961
(classifier.1.weight): 0.9229
(classifier.4.weight): 0.9631
(classifier.6.weight): 0.7820


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 0.9459, Accuracy: 7073/10000 (71%)

Epoch: 38


100%|██████████| 223/223 [00:19<00:00, 12.73it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4526
(features.3.weight): 0.7398
(features.6.weight): 0.7991
(features.8.weight): 0.8678
(features.10.weight): 0.9185
(classifier.1.weight): 0.9468
(classifier.4.weight): 0.9763
(classifier.6.weight): 0.7638


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.0004, Accuracy: 6880/10000 (69%)

Epoch: 39


100%|██████████| 223/223 [00:19<00:00, 11.28it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4479
(features.3.weight): 0.7326
(features.6.weight): 0.7923
(features.8.weight): 0.8629
(features.10.weight): 0.9147
(classifier.1.weight): 0.9456
(classifier.4.weight): 0.9761
(classifier.6.weight): 0.7622


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.0636, Accuracy: 6822/10000 (68%)

Epoch: 39


100%|██████████| 223/223 [00:19<00:00, 12.58it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4517
(features.3.weight): 0.7362
(features.6.weight): 0.7902
(features.8.weight): 0.8574
(features.10.weight): 0.9105
(classifier.1.weight): 0.9430
(classifier.4.weight): 0.9719
(classifier.6.weight): 0.7673


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 0.9888, Accuracy: 6937/10000 (69%)

Epoch: 40


100%|██████████| 320/320 [00:29<00:00, 11.00it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4474
(features.3.weight): 0.7322
(features.6.weight): 0.7877
(features.8.weight): 0.8458
(features.10.weight): 0.8927
(classifier.1.weight): 0.9203
(classifier.4.weight): 0.9625
(classifier.6.weight): 0.7806


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 0.9713, Accuracy: 6936/10000 (69%)

Epoch: 40


100%|██████████| 223/223 [00:19<00:00, 11.68it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4462
(features.3.weight): 0.7285
(features.6.weight): 0.7901
(features.8.weight): 0.8607
(features.10.weight): 0.9127
(classifier.1.weight): 0.9461
(classifier.4.weight): 0.9758
(classifier.6.weight): 0.7523


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.0621, Accuracy: 6891/10000 (69%)

Epoch: 41


100%|██████████| 223/223 [00:19<00:00, 11.20it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4435
(features.3.weight): 0.7238
(features.6.weight): 0.7852
(features.8.weight): 0.8584
(features.10.weight): 0.9112
(classifier.1.weight): 0.9452
(classifier.4.weight): 0.9761
(classifier.6.weight): 0.7585


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.0203, Accuracy: 6968/10000 (70%)

Epoch: 41


100%|██████████| 223/223 [00:19<00:00, 11.31it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4458
(features.3.weight): 0.7292
(features.6.weight): 0.7860
(features.8.weight): 0.8541
(features.10.weight): 0.9092
(classifier.1.weight): 0.9433
(classifier.4.weight): 0.9720
(classifier.6.weight): 0.7602


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.0233, Accuracy: 6976/10000 (70%)

Epoch: 42


100%|██████████| 320/320 [00:28<00:00, 11.18it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4420
(features.3.weight): 0.7249
(features.6.weight): 0.7820
(features.8.weight): 0.8426
(features.10.weight): 0.8927
(classifier.1.weight): 0.9224
(classifier.4.weight): 0.9628
(classifier.6.weight): 0.7708


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 0.9581, Accuracy: 7064/10000 (71%)

Epoch: 42


100%|██████████| 223/223 [00:19<00:00, 11.60it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4410
(features.3.weight): 0.7217
(features.6.weight): 0.7786
(features.8.weight): 0.8496
(features.10.weight): 0.9085
(classifier.1.weight): 0.9431
(classifier.4.weight): 0.9720
(classifier.6.weight): 0.7574


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.0471, Accuracy: 6946/10000 (69%)

Epoch: 43


100%|██████████| 320/320 [00:28<00:00, 11.08it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4377
(features.3.weight): 0.7170
(features.6.weight): 0.7749
(features.8.weight): 0.8385
(features.10.weight): 0.8915
(classifier.1.weight): 0.9219
(classifier.4.weight): 0.9626
(classifier.6.weight): 0.7697


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.0673, Accuracy: 6798/10000 (68%)

Epoch: 43


100%|██████████| 223/223 [00:18<00:00, 13.24it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4410
(features.3.weight): 0.7202
(features.6.weight): 0.7827
(features.8.weight): 0.8542
(features.10.weight): 0.9085
(classifier.1.weight): 0.9449
(classifier.4.weight): 0.9758
(classifier.6.weight): 0.7494


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.0566, Accuracy: 6986/10000 (70%)

Epoch: 44


100%|██████████| 320/320 [00:27<00:00, 11.49it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4353
(features.3.weight): 0.7146
(features.6.weight): 0.7738
(features.8.weight): 0.8374
(features.10.weight): 0.8901
(classifier.1.weight): 0.9220
(classifier.4.weight): 0.9628
(classifier.6.weight): 0.7660


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 0.9978, Accuracy: 6953/10000 (70%)

Epoch: 44


100%|██████████| 223/223 [00:19<00:00, 11.25it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4363
(features.3.weight): 0.7143
(features.6.weight): 0.7771
(features.8.weight): 0.8508
(features.10.weight): 0.9074
(classifier.1.weight): 0.9452
(classifier.4.weight): 0.9758
(classifier.6.weight): 0.7490


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.0953, Accuracy: 6951/10000 (70%)

Epoch: 45


100%|██████████| 320/320 [00:28<00:00, 10.85it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4306
(features.3.weight): 0.7098
(features.6.weight): 0.7671
(features.8.weight): 0.8324
(features.10.weight): 0.8860
(classifier.1.weight): 0.9206
(classifier.4.weight): 0.9621
(classifier.6.weight): 0.7633


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.0772, Accuracy: 6894/10000 (69%)

Epoch: 45


100%|██████████| 223/223 [00:19<00:00, 11.57it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4390
(features.3.weight): 0.7192
(features.6.weight): 0.7761
(features.8.weight): 0.8477
(features.10.weight): 0.9070
(classifier.1.weight): 0.9429
(classifier.4.weight): 0.9719
(classifier.6.weight): 0.7568


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.0205, Accuracy: 6997/10000 (70%)

Epoch: 46


100%|██████████| 320/320 [00:28<00:00, 11.20it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4304
(features.3.weight): 0.7068
(features.6.weight): 0.7678
(features.8.weight): 0.8338
(features.10.weight): 0.8884
(classifier.1.weight): 0.9200
(classifier.4.weight): 0.9624
(classifier.6.weight): 0.7654


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 0.9682, Accuracy: 7097/10000 (71%)

Epoch: 46


100%|██████████| 223/223 [00:19<00:00, 11.30it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4319
(features.3.weight): 0.7085
(features.6.weight): 0.7669
(features.8.weight): 0.8410
(features.10.weight): 0.9024
(classifier.1.weight): 0.9423
(classifier.4.weight): 0.9722
(classifier.6.weight): 0.7566


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.0129, Accuracy: 6998/10000 (70%)

Epoch: 47


100%|██████████| 223/223 [00:19<00:00, 11.38it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4326
(features.3.weight): 0.7084
(features.6.weight): 0.7707
(features.8.weight): 0.8476
(features.10.weight): 0.9057
(classifier.1.weight): 0.9438
(classifier.4.weight): 0.9759
(classifier.6.weight): 0.7502


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.0921, Accuracy: 6995/10000 (70%)

Epoch: 47


100%|██████████| 223/223 [00:19<00:00, 10.86it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4290
(features.3.weight): 0.7039
(features.6.weight): 0.7614
(features.8.weight): 0.8360
(features.10.weight): 0.8991
(classifier.1.weight): 0.9406
(classifier.4.weight): 0.9717
(classifier.6.weight): 0.7561


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.0221, Accuracy: 6970/10000 (70%)

Epoch: 48


100%|██████████| 223/223 [00:19<00:00, 11.25it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4276
(features.3.weight): 0.7015
(features.6.weight): 0.7649
(features.8.weight): 0.8434
(features.10.weight): 0.9046
(classifier.1.weight): 0.9447
(classifier.4.weight): 0.9759
(classifier.6.weight): 0.7432


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.0690, Accuracy: 6989/10000 (70%)

Epoch: 48


100%|██████████| 223/223 [00:20<00:00, 12.16it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4277
(features.3.weight): 0.7008
(features.6.weight): 0.7601
(features.8.weight): 0.8367
(features.10.weight): 0.9009
(classifier.1.weight): 0.9424
(classifier.4.weight): 0.9723
(classifier.6.weight): 0.7535


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.0201, Accuracy: 6934/10000 (69%)

Epoch: 49


100%|██████████| 223/223 [00:19<00:00, 10.75it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4250
(features.3.weight): 0.6958
(features.6.weight): 0.7594
(features.8.weight): 0.8397
(features.10.weight): 0.9017
(classifier.1.weight): 0.9442
(classifier.4.weight): 0.9759
(classifier.6.weight): 0.7404


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.1289, Accuracy: 6997/10000 (70%)

Epoch: 49


100%|██████████| 320/320 [00:28<00:00, 11.18it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4273
(features.3.weight): 0.7023
(features.6.weight): 0.7622
(features.8.weight): 0.8304
(features.10.weight): 0.8861
(classifier.1.weight): 0.9199
(classifier.4.weight): 0.9620
(classifier.6.weight): 0.7574


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.0050, Accuracy: 7019/10000 (70%)

Epoch: 50


100%|██████████| 320/320 [00:28<00:00, 11.41it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4223
(features.3.weight): 0.6953
(features.6.weight): 0.7557
(features.8.weight): 0.8265
(features.10.weight): 0.8843
(classifier.1.weight): 0.9201
(classifier.4.weight): 0.9623
(classifier.6.weight): 0.7552


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.0811, Accuracy: 6890/10000 (69%)

Epoch: 50


100%|██████████| 223/223 [00:20<00:00, 11.12it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4244
(features.3.weight): 0.6974
(features.6.weight): 0.7559
(features.8.weight): 0.8336
(features.10.weight): 0.8994
(classifier.1.weight): 0.9418
(classifier.4.weight): 0.9718
(classifier.6.weight): 0.7470


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.0897, Accuracy: 6938/10000 (69%)

Epoch: 51


100%|██████████| 223/223 [00:20<00:00, 11.14it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4207
(features.3.weight): 0.6909
(features.6.weight): 0.7510
(features.8.weight): 0.8296
(features.10.weight): 0.8977
(classifier.1.weight): 0.9415
(classifier.4.weight): 0.9719
(classifier.6.weight): 0.7470


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.1058, Accuracy: 6935/10000 (69%)

Epoch: 51


100%|██████████| 223/223 [00:19<00:00, 10.43it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4242
(features.3.weight): 0.6929
(features.6.weight): 0.7583
(features.8.weight): 0.8401
(features.10.weight): 0.9036
(classifier.1.weight): 0.9439
(classifier.4.weight): 0.9759
(classifier.6.weight): 0.7486


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.1022, Accuracy: 6863/10000 (69%)

Epoch: 52


100%|██████████| 223/223 [00:19<00:00, 10.78it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4194
(features.3.weight): 0.6889
(features.6.weight): 0.7488
(features.8.weight): 0.8281
(features.10.weight): 0.8976
(classifier.1.weight): 0.9416
(classifier.4.weight): 0.9720
(classifier.6.weight): 0.7504


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.0513, Accuracy: 7010/10000 (70%)

Epoch: 52


100%|██████████| 320/320 [00:27<00:00, 11.44it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4195
(features.3.weight): 0.6897
(features.6.weight): 0.7518
(features.8.weight): 0.8245
(features.10.weight): 0.8842
(classifier.1.weight): 0.9215
(classifier.4.weight): 0.9625
(classifier.6.weight): 0.7528


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.0130, Accuracy: 7006/10000 (70%)

Epoch: 53


100%|██████████| 320/320 [00:27<00:00, 11.65it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4158
(features.3.weight): 0.6849
(features.6.weight): 0.7468
(features.8.weight): 0.8192
(features.10.weight): 0.8790
(classifier.1.weight): 0.9208
(classifier.4.weight): 0.9623
(classifier.6.weight): 0.7517


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.0629, Accuracy: 7009/10000 (70%)

Epoch: 53


100%|██████████| 223/223 [00:19<00:00, 11.19it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4193
(features.3.weight): 0.6872
(features.6.weight): 0.7524
(features.8.weight): 0.8359
(features.10.weight): 0.9013
(classifier.1.weight): 0.9440
(classifier.4.weight): 0.9759
(classifier.6.weight): 0.7381


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.1248, Accuracy: 6873/10000 (69%)

Epoch: 54


100%|██████████| 223/223 [00:20<00:00, 10.75it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4156
(features.3.weight): 0.6803
(features.6.weight): 0.7468
(features.8.weight): 0.8323
(features.10.weight): 0.9006
(classifier.1.weight): 0.9439
(classifier.4.weight): 0.9759
(classifier.6.weight): 0.7379


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.1014, Accuracy: 7033/10000 (70%)

Epoch: 54


100%|██████████| 320/320 [00:28<00:00, 12.14it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4141
(features.3.weight): 0.6843
(features.6.weight): 0.7456
(features.8.weight): 0.8202
(features.10.weight): 0.8819
(classifier.1.weight): 0.9210
(classifier.4.weight): 0.9626
(classifier.6.weight): 0.7534


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.0557, Accuracy: 6974/10000 (70%)

Epoch: 55


100%|██████████| 223/223 [00:19<00:00, 11.27it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4166
(features.3.weight): 0.6866
(features.6.weight): 0.7456
(features.8.weight): 0.8259
(features.10.weight): 0.8963
(classifier.1.weight): 0.9411
(classifier.4.weight): 0.9719
(classifier.6.weight): 0.7462


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.0601, Accuracy: 7066/10000 (71%)

Epoch: 55


100%|██████████| 223/223 [00:19<00:00, 11.42it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4125
(features.3.weight): 0.6764
(features.6.weight): 0.7427
(features.8.weight): 0.8291
(features.10.weight): 0.8980
(classifier.1.weight): 0.9421
(classifier.4.weight): 0.9756
(classifier.6.weight): 0.7382


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.2176, Accuracy: 6894/10000 (69%)

Epoch: 56


100%|██████████| 223/223 [00:19<00:00, 11.07it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4114
(features.3.weight): 0.6794
(features.6.weight): 0.7387
(features.8.weight): 0.8217
(features.10.weight): 0.8949
(classifier.1.weight): 0.9420
(classifier.4.weight): 0.9718
(classifier.6.weight): 0.7429


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.1209, Accuracy: 6976/10000 (70%)

Epoch: 56


100%|██████████| 320/320 [00:28<00:00, 11.13it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4112
(features.3.weight): 0.6792
(features.6.weight): 0.7408
(features.8.weight): 0.8155
(features.10.weight): 0.8794
(classifier.1.weight): 0.9195
(classifier.4.weight): 0.9621
(classifier.6.weight): 0.7459


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.0337, Accuracy: 7015/10000 (70%)

Epoch: 57


100%|██████████| 223/223 [00:19<00:00, 12.00it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4124
(features.3.weight): 0.6766
(features.6.weight): 0.7432
(features.8.weight): 0.8297
(features.10.weight): 0.9010
(classifier.1.weight): 0.9447
(classifier.4.weight): 0.9758
(classifier.6.weight): 0.7322


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.0898, Accuracy: 7057/10000 (71%)

Epoch: 57


100%|██████████| 223/223 [00:19<00:00, 11.40it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4090
(features.3.weight): 0.6748
(features.6.weight): 0.7342
(features.8.weight): 0.8180
(features.10.weight): 0.8926
(classifier.1.weight): 0.9402
(classifier.4.weight): 0.9718
(classifier.6.weight): 0.7434


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.0770, Accuracy: 7010/10000 (70%)

Epoch: 58


100%|██████████| 223/223 [00:20<00:00, 11.02it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4078
(features.3.weight): 0.6716
(features.6.weight): 0.7384
(features.8.weight): 0.8250
(features.10.weight): 0.8964
(classifier.1.weight): 0.9445
(classifier.4.weight): 0.9759
(classifier.6.weight): 0.7321


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.1055, Accuracy: 7011/10000 (70%)

Epoch: 58


100%|██████████| 223/223 [00:20<00:00, 11.05it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4089
(features.3.weight): 0.6737
(features.6.weight): 0.7333
(features.8.weight): 0.8178
(features.10.weight): 0.8936
(classifier.1.weight): 0.9408
(classifier.4.weight): 0.9716
(classifier.6.weight): 0.7394


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.1449, Accuracy: 6945/10000 (69%)

Epoch: 59


100%|██████████| 320/320 [00:28<00:00, 11.27it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4078
(features.3.weight): 0.6732
(features.6.weight): 0.7369
(features.8.weight): 0.8125
(features.10.weight): 0.8790
(classifier.1.weight): 0.9200
(classifier.4.weight): 0.9621
(classifier.6.weight): 0.7418


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.0549, Accuracy: 7089/10000 (71%)

Epoch: 59


100%|██████████| 223/223 [00:19<00:00, 10.89it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4060
(features.3.weight): 0.6719
(features.6.weight): 0.7299
(features.8.weight): 0.8138
(features.10.weight): 0.8908
(classifier.1.weight): 0.9411
(classifier.4.weight): 0.9718
(classifier.6.weight): 0.7337


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.2000, Accuracy: 6947/10000 (69%)

Epoch: 60


100%|██████████| 320/320 [00:28<00:00, 11.35it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4034
(features.3.weight): 0.6666
(features.6.weight): 0.7304
(features.8.weight): 0.8088
(features.10.weight): 0.8772
(classifier.1.weight): 0.9210
(classifier.4.weight): 0.9625
(classifier.6.weight): 0.7401


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.1197, Accuracy: 6896/10000 (69%)

Epoch: 60


100%|██████████| 223/223 [00:20<00:00, 11.12it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4046
(features.3.weight): 0.6689
(features.6.weight): 0.7285
(features.8.weight): 0.8132
(features.10.weight): 0.8904
(classifier.1.weight): 0.9402
(classifier.4.weight): 0.9716
(classifier.6.weight): 0.7370


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.1300, Accuracy: 6953/10000 (70%)

Epoch: 61


100%|██████████| 223/223 [00:19<00:00, 10.86it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4025
(features.3.weight): 0.6663
(features.6.weight): 0.7260
(features.8.weight): 0.8113
(features.10.weight): 0.8899
(classifier.1.weight): 0.9402
(classifier.4.weight): 0.9716
(classifier.6.weight): 0.7329


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.1725, Accuracy: 7044/10000 (70%)

Epoch: 61


100%|██████████| 320/320 [00:29<00:00, 10.94it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4019
(features.3.weight): 0.6638
(features.6.weight): 0.7272
(features.8.weight): 0.8077
(features.10.weight): 0.8772
(classifier.1.weight): 0.9201
(classifier.4.weight): 0.9622
(classifier.6.weight): 0.7388


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.1130, Accuracy: 7018/10000 (70%)

Epoch: 62


100%|██████████| 320/320 [00:28<00:00, 11.95it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3989
(features.3.weight): 0.6590
(features.6.weight): 0.7233
(features.8.weight): 0.8015
(features.10.weight): 0.8715
(classifier.1.weight): 0.9186
(classifier.4.weight): 0.9621
(classifier.6.weight): 0.7355


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.1050, Accuracy: 7092/10000 (71%)

Epoch: 62


100%|██████████| 223/223 [00:20<00:00, 10.78it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3998
(features.3.weight): 0.6620
(features.6.weight): 0.7218
(features.8.weight): 0.8073
(features.10.weight): 0.8884
(classifier.1.weight): 0.9388
(classifier.4.weight): 0.9712
(classifier.6.weight): 0.7381


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.1832, Accuracy: 6987/10000 (70%)

Epoch: 63


100%|██████████| 223/223 [00:19<00:00, 11.60it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.4058
(features.3.weight): 0.6684
(features.6.weight): 0.7340
(features.8.weight): 0.8234
(features.10.weight): 0.8963
(classifier.1.weight): 0.9442
(classifier.4.weight): 0.9760
(classifier.6.weight): 0.7297


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.1619, Accuracy: 6906/10000 (69%)

Epoch: 63


100%|██████████| 223/223 [00:19<00:00, 10.78it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3976
(features.3.weight): 0.6571
(features.6.weight): 0.7150
(features.8.weight): 0.8022
(features.10.weight): 0.8836
(classifier.1.weight): 0.9379
(classifier.4.weight): 0.9713
(classifier.6.weight): 0.7391


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.1500, Accuracy: 6896/10000 (69%)

Epoch: 64


100%|██████████| 320/320 [00:27<00:00, 11.74it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3967
(features.3.weight): 0.6565
(features.6.weight): 0.7187
(features.8.weight): 0.8016
(features.10.weight): 0.8756
(classifier.1.weight): 0.9184
(classifier.4.weight): 0.9616
(classifier.6.weight): 0.7340


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.2653, Accuracy: 6843/10000 (68%)

Epoch: 64


100%|██████████| 223/223 [00:19<00:00, 10.31it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3991
(features.3.weight): 0.6608
(features.6.weight): 0.7213
(features.8.weight): 0.8090
(features.10.weight): 0.8894
(classifier.1.weight): 0.9413
(classifier.4.weight): 0.9719
(classifier.6.weight): 0.7330


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.1868, Accuracy: 6980/10000 (70%)

Epoch: 65


100%|██████████| 320/320 [00:28<00:00, 11.36it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3950
(features.3.weight): 0.6545
(features.6.weight): 0.7183
(features.8.weight): 0.8003
(features.10.weight): 0.8746
(classifier.1.weight): 0.9184
(classifier.4.weight): 0.9618
(classifier.6.weight): 0.7336


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.1775, Accuracy: 6972/10000 (70%)

Epoch: 65


100%|██████████| 223/223 [00:19<00:00, 11.21it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3996
(features.3.weight): 0.6576
(features.6.weight): 0.7253
(features.8.weight): 0.8173
(features.10.weight): 0.8951
(classifier.1.weight): 0.9449
(classifier.4.weight): 0.9762
(classifier.6.weight): 0.7283


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.2384, Accuracy: 6949/10000 (69%)

Epoch: 66


100%|██████████| 223/223 [00:20<00:00, 11.38it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3956
(features.3.weight): 0.6494
(features.6.weight): 0.7168
(features.8.weight): 0.8117
(features.10.weight): 0.8912
(classifier.1.weight): 0.9435
(classifier.4.weight): 0.9760
(classifier.6.weight): 0.7311


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.1372, Accuracy: 7012/10000 (70%)

Epoch: 66


100%|██████████| 320/320 [00:28<00:00, 11.05it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3948
(features.3.weight): 0.6542
(features.6.weight): 0.7182
(features.8.weight): 0.8007
(features.10.weight): 0.8762
(classifier.1.weight): 0.9209
(classifier.4.weight): 0.9622
(classifier.6.weight): 0.7313


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.1551, Accuracy: 6952/10000 (70%)

Epoch: 67


100%|██████████| 223/223 [00:20<00:00, 11.56it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3952
(features.3.weight): 0.6540
(features.6.weight): 0.7153
(features.8.weight): 0.8032
(features.10.weight): 0.8865
(classifier.1.weight): 0.9394
(classifier.4.weight): 0.9714
(classifier.6.weight): 0.7319


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.1803, Accuracy: 6991/10000 (70%)

Epoch: 67


100%|██████████| 320/320 [00:28<00:00, 11.33it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3919
(features.3.weight): 0.6508
(features.6.weight): 0.7148
(features.8.weight): 0.7979
(features.10.weight): 0.8724
(classifier.1.weight): 0.9192
(classifier.4.weight): 0.9617
(classifier.6.weight): 0.7297


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.2038, Accuracy: 7047/10000 (70%)

Epoch: 68


100%|██████████| 223/223 [00:19<00:00, 11.27it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3930
(features.3.weight): 0.6479
(features.6.weight): 0.7142
(features.8.weight): 0.8092
(features.10.weight): 0.8906
(classifier.1.weight): 0.9434
(classifier.4.weight): 0.9758
(classifier.6.weight): 0.7280


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.1635, Accuracy: 7028/10000 (70%)

Epoch: 68


100%|██████████| 320/320 [00:28<00:00, 11.06it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3908
(features.3.weight): 0.6503
(features.6.weight): 0.7144
(features.8.weight): 0.7972
(features.10.weight): 0.8753
(classifier.1.weight): 0.9208
(classifier.4.weight): 0.9620
(classifier.6.weight): 0.7302


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.1795, Accuracy: 6949/10000 (69%)

Epoch: 69


100%|██████████| 223/223 [00:20<00:00, 10.87it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3914
(features.3.weight): 0.6502
(features.6.weight): 0.7103
(features.8.weight): 0.7993
(features.10.weight): 0.8854
(classifier.1.weight): 0.9395
(classifier.4.weight): 0.9714
(classifier.6.weight): 0.7341


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.2159, Accuracy: 6973/10000 (70%)

Epoch: 69


100%|██████████| 320/320 [00:27<00:00, 11.44it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3887
(features.3.weight): 0.6475
(features.6.weight): 0.7115
(features.8.weight): 0.7961
(features.10.weight): 0.8744
(classifier.1.weight): 0.9191
(classifier.4.weight): 0.9618
(classifier.6.weight): 0.7279


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.1999, Accuracy: 7049/10000 (70%)

Epoch: 70


100%|██████████| 223/223 [00:19<00:00, 12.51it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3886
(features.3.weight): 0.6459
(features.6.weight): 0.7053
(features.8.weight): 0.7967
(features.10.weight): 0.8845
(classifier.1.weight): 0.9394
(classifier.4.weight): 0.9714
(classifier.6.weight): 0.7356


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.2161, Accuracy: 7052/10000 (71%)

Epoch: 70


100%|██████████| 320/320 [00:27<00:00, 11.66it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3873
(features.3.weight): 0.6450
(features.6.weight): 0.7094
(features.8.weight): 0.7933
(features.10.weight): 0.8732
(classifier.1.weight): 0.9188
(classifier.4.weight): 0.9616
(classifier.6.weight): 0.7266


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.2582, Accuracy: 6952/10000 (70%)

Epoch: 71


100%|██████████| 223/223 [00:19<00:00, 10.92it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3862
(features.3.weight): 0.6421
(features.6.weight): 0.7007
(features.8.weight): 0.7939
(features.10.weight): 0.8827
(classifier.1.weight): 0.9383
(classifier.4.weight): 0.9713
(classifier.6.weight): 0.7313


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.3209, Accuracy: 6897/10000 (69%)

Epoch: 71


100%|██████████| 320/320 [00:29<00:00, 11.29it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3850
(features.3.weight): 0.6419
(features.6.weight): 0.7053
(features.8.weight): 0.7901
(features.10.weight): 0.8690
(classifier.1.weight): 0.9174
(classifier.4.weight): 0.9616
(classifier.6.weight): 0.7277


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.2490, Accuracy: 6945/10000 (69%)

Epoch: 72


100%|██████████| 223/223 [00:19<00:00, 11.65it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3833
(features.3.weight): 0.6375
(features.6.weight): 0.6967
(features.8.weight): 0.7900
(features.10.weight): 0.8800
(classifier.1.weight): 0.9381
(classifier.4.weight): 0.9715
(classifier.6.weight): 0.7339


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.2808, Accuracy: 6912/10000 (69%)

Epoch: 72


100%|██████████| 320/320 [00:28<00:00, 11.26it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3828
(features.3.weight): 0.6383
(features.6.weight): 0.7033
(features.8.weight): 0.7892
(features.10.weight): 0.8703
(classifier.1.weight): 0.9179
(classifier.4.weight): 0.9614
(classifier.6.weight): 0.7270


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.2626, Accuracy: 7029/10000 (70%)

Epoch: 73


100%|██████████| 223/223 [00:19<00:00, 11.19it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3822
(features.3.weight): 0.6350
(features.6.weight): 0.6941
(features.8.weight): 0.7871
(features.10.weight): 0.8787
(classifier.1.weight): 0.9379
(classifier.4.weight): 0.9713
(classifier.6.weight): 0.7337


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.2869, Accuracy: 6909/10000 (69%)

Epoch: 73


100%|██████████| 223/223 [00:19<00:00, 11.51it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3901
(features.3.weight): 0.6449
(features.6.weight): 0.7112
(features.8.weight): 0.8071
(features.10.weight): 0.8903
(classifier.1.weight): 0.9431
(classifier.4.weight): 0.9755
(classifier.6.weight): 0.7277


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.1995, Accuracy: 6969/10000 (70%)

Epoch: 74


100%|██████████| 223/223 [00:19<00:00, 10.81it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3838
(features.3.weight): 0.6393
(features.6.weight): 0.6986
(features.8.weight): 0.7918
(features.10.weight): 0.8834
(classifier.1.weight): 0.9407
(classifier.4.weight): 0.9716
(classifier.6.weight): 0.7288


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.2663, Accuracy: 6993/10000 (70%)

Epoch: 74


100%|██████████| 320/320 [00:28<00:00, 11.30it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3805
(features.3.weight): 0.6353
(features.6.weight): 0.6996
(features.8.weight): 0.7855
(features.10.weight): 0.8680
(classifier.1.weight): 0.9169
(classifier.4.weight): 0.9616
(classifier.6.weight): 0.7271


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.2335, Accuracy: 6963/10000 (70%)

Epoch: 75


100%|██████████| 223/223 [00:19<00:00, 11.24it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3843
(features.3.weight): 0.6344
(features.6.weight): 0.7033
(features.8.weight): 0.8012
(features.10.weight): 0.8882
(classifier.1.weight): 0.9426
(classifier.4.weight): 0.9757
(classifier.6.weight): 0.7269


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.1307, Accuracy: 7007/10000 (70%)

Epoch: 75


100%|██████████| 223/223 [00:19<00:00, 11.46it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3810
(features.3.weight): 0.6336
(features.6.weight): 0.6930
(features.8.weight): 0.7878
(features.10.weight): 0.8824
(classifier.1.weight): 0.9401
(classifier.4.weight): 0.9715
(classifier.6.weight): 0.7261


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.3637, Accuracy: 6953/10000 (70%)

Epoch: 76


100%|██████████| 223/223 [00:19<00:00, 12.31it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3806
(features.3.weight): 0.6329
(features.6.weight): 0.6930
(features.8.weight): 0.7893
(features.10.weight): 0.8840
(classifier.1.weight): 0.9403
(classifier.4.weight): 0.9716
(classifier.6.weight): 0.7306


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.2559, Accuracy: 6976/10000 (70%)

Epoch: 76


100%|██████████| 320/320 [00:28<00:00, 11.19it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3805
(features.3.weight): 0.6361
(features.6.weight): 0.6996
(features.8.weight): 0.7874
(features.10.weight): 0.8715
(classifier.1.weight): 0.9189
(classifier.4.weight): 0.9618
(classifier.6.weight): 0.7259


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.3047, Accuracy: 6940/10000 (69%)

Epoch: 77


100%|██████████| 223/223 [00:19<00:00, 11.20it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3815
(features.3.weight): 0.6297
(features.6.weight): 0.6983
(features.8.weight): 0.7982
(features.10.weight): 0.8873
(classifier.1.weight): 0.9444
(classifier.4.weight): 0.9760
(classifier.6.weight): 0.7227


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.1938, Accuracy: 6949/10000 (69%)

Epoch: 77


100%|██████████| 320/320 [00:29<00:00, 10.56it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3777
(features.3.weight): 0.6337
(features.6.weight): 0.6982
(features.8.weight): 0.7851
(features.10.weight): 0.8717
(classifier.1.weight): 0.9200
(classifier.4.weight): 0.9620
(classifier.6.weight): 0.7264


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.2174, Accuracy: 6962/10000 (70%)

Epoch: 78


100%|██████████| 223/223 [00:20<00:00, 11.15it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3783
(features.3.weight): 0.6318
(features.6.weight): 0.6911
(features.8.weight): 0.7866
(features.10.weight): 0.8827
(classifier.1.weight): 0.9397
(classifier.4.weight): 0.9715
(classifier.6.weight): 0.7267


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.3054, Accuracy: 6956/10000 (70%)

Epoch: 78


100%|██████████| 320/320 [00:28<00:00, 11.24it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3778
(features.3.weight): 0.6311
(features.6.weight): 0.6956
(features.8.weight): 0.7851
(features.10.weight): 0.8720
(classifier.1.weight): 0.9199
(classifier.4.weight): 0.9618
(classifier.6.weight): 0.7251


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.3724, Accuracy: 6906/10000 (69%)

Epoch: 79


100%|██████████| 320/320 [00:28<00:00, 11.11it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3753
(features.3.weight): 0.6289
(features.6.weight): 0.6941
(features.8.weight): 0.7825
(features.10.weight): 0.8705
(classifier.1.weight): 0.9190
(classifier.4.weight): 0.9620
(classifier.6.weight): 0.7258


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.2118, Accuracy: 7003/10000 (70%)

Epoch: 79


100%|██████████| 223/223 [00:19<00:00, 11.44it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3784
(features.3.weight): 0.6253
(features.6.weight): 0.6930
(features.8.weight): 0.7947
(features.10.weight): 0.8876
(classifier.1.weight): 0.9433
(classifier.4.weight): 0.9757
(classifier.6.weight): 0.7246


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.1800, Accuracy: 6984/10000 (70%)

Epoch: 80


100%|██████████| 320/320 [00:27<00:00, 11.44it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3744
(features.3.weight): 0.6270
(features.6.weight): 0.6928
(features.8.weight): 0.7808
(features.10.weight): 0.8690
(classifier.1.weight): 0.9181
(classifier.4.weight): 0.9614
(classifier.6.weight): 0.7270


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.3516, Accuracy: 6937/10000 (69%)

Epoch: 80


100%|██████████| 223/223 [00:19<00:00, 11.35it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3764
(features.3.weight): 0.6291
(features.6.weight): 0.6873
(features.8.weight): 0.7835
(features.10.weight): 0.8827
(classifier.1.weight): 0.9394
(classifier.4.weight): 0.9716
(classifier.6.weight): 0.7298


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.3191, Accuracy: 6960/10000 (70%)

Epoch: 81


100%|██████████| 223/223 [00:19<00:00, 11.30it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3742
(features.3.weight): 0.6250
(features.6.weight): 0.6839
(features.8.weight): 0.7803
(features.10.weight): 0.8804
(classifier.1.weight): 0.9390
(classifier.4.weight): 0.9715
(classifier.6.weight): 0.7306


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.2601, Accuracy: 6979/10000 (70%)

Epoch: 81


100%|██████████| 223/223 [00:19<00:00, 12.96it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3755
(features.3.weight): 0.6234
(features.6.weight): 0.6915
(features.8.weight): 0.7924
(features.10.weight): 0.8854
(classifier.1.weight): 0.9420
(classifier.4.weight): 0.9755
(classifier.6.weight): 0.7248


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.1613, Accuracy: 7075/10000 (71%)

Epoch: 82


100%|██████████| 223/223 [00:20<00:00, 11.07it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3729
(features.3.weight): 0.6182
(features.6.weight): 0.6868
(features.8.weight): 0.7890
(features.10.weight): 0.8860
(classifier.1.weight): 0.9435
(classifier.4.weight): 0.9760
(classifier.6.weight): 0.7223


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.1651, Accuracy: 7082/10000 (71%)

Epoch: 82


100%|██████████| 320/320 [00:27<00:00, 11.71it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3735
(features.3.weight): 0.6256
(features.6.weight): 0.6898
(features.8.weight): 0.7787
(features.10.weight): 0.8685
(classifier.1.weight): 0.9184
(classifier.4.weight): 0.9617
(classifier.6.weight): 0.7229


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.3988, Accuracy: 6961/10000 (70%)

Epoch: 83


100%|██████████| 223/223 [00:20<00:00, 11.36it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3714
(features.3.weight): 0.6179
(features.6.weight): 0.6851
(features.8.weight): 0.7882
(features.10.weight): 0.8839
(classifier.1.weight): 0.9430
(classifier.4.weight): 0.9758
(classifier.6.weight): 0.7252


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.1965, Accuracy: 7054/10000 (71%)

Epoch: 83


100%|██████████| 320/320 [00:28<00:00, 11.40it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3705
(features.3.weight): 0.6224
(features.6.weight): 0.6870
(features.8.weight): 0.7766
(features.10.weight): 0.8682
(classifier.1.weight): 0.9187
(classifier.4.weight): 0.9616
(classifier.6.weight): 0.7224


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.3816, Accuracy: 6969/10000 (70%)

Epoch: 84


100%|██████████| 320/320 [00:28<00:00, 11.33it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3690
(features.3.weight): 0.6207
(features.6.weight): 0.6857
(features.8.weight): 0.7767
(features.10.weight): 0.8695
(classifier.1.weight): 0.9180
(classifier.4.weight): 0.9616
(classifier.6.weight): 0.7241


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.3515, Accuracy: 6909/10000 (69%)

Epoch: 84


100%|██████████| 223/223 [00:18<00:00, 11.08it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3692
(features.3.weight): 0.6156
(features.6.weight): 0.6837
(features.8.weight): 0.7856
(features.10.weight): 0.8839
(classifier.1.weight): 0.9427
(classifier.4.weight): 0.9757
(classifier.6.weight): 0.7240


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.2753, Accuracy: 6928/10000 (69%)

Epoch: 85


100%|██████████| 223/223 [00:19<00:00, 11.43it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3736
(features.3.weight): 0.6220
(features.6.weight): 0.6812
(features.8.weight): 0.7796
(features.10.weight): 0.8811
(classifier.1.weight): 0.9403
(classifier.4.weight): 0.9716
(classifier.6.weight): 0.7273


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.3150, Accuracy: 7021/10000 (70%)

Epoch: 85


100%|██████████| 320/320 [00:28<00:00, 11.15it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3665
(features.3.weight): 0.6185
(features.6.weight): 0.6828
(features.8.weight): 0.7726
(features.10.weight): 0.8651
(classifier.1.weight): 0.9173
(classifier.4.weight): 0.9615
(classifier.6.weight): 0.7244


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.4305, Accuracy: 6795/10000 (68%)

Epoch: 86


100%|██████████| 223/223 [00:19<00:00, 11.54it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3691
(features.3.weight): 0.6170
(features.6.weight): 0.6754
(features.8.weight): 0.7745
(features.10.weight): 0.8795
(classifier.1.weight): 0.9396
(classifier.4.weight): 0.9714
(classifier.6.weight): 0.7315


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.2948, Accuracy: 7005/10000 (70%)

Epoch: 86


100%|██████████| 320/320 [00:28<00:00, 11.33it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3675
(features.3.weight): 0.6184
(features.6.weight): 0.6828
(features.8.weight): 0.7748
(features.10.weight): 0.8698
(classifier.1.weight): 0.9195
(classifier.4.weight): 0.9615
(classifier.6.weight): 0.7156


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.4077, Accuracy: 7051/10000 (71%)

Epoch: 87


100%|██████████| 223/223 [00:19<00:00, 11.31it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3671
(features.3.weight): 0.6150
(features.6.weight): 0.6729
(features.8.weight): 0.7732
(features.10.weight): 0.8787
(classifier.1.weight): 0.9388
(classifier.4.weight): 0.9714
(classifier.6.weight): 0.7273


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.3252, Accuracy: 6920/10000 (69%)

Epoch: 87


100%|██████████| 320/320 [00:28<00:00, 11.25it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3661
(features.3.weight): 0.6172
(features.6.weight): 0.6808
(features.8.weight): 0.7723
(features.10.weight): 0.8668
(classifier.1.weight): 0.9173
(classifier.4.weight): 0.9615
(classifier.6.weight): 0.7228


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.3209, Accuracy: 6949/10000 (69%)

Epoch: 88


100%|██████████| 320/320 [00:28<00:00, 11.13it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3642
(features.3.weight): 0.6155
(features.6.weight): 0.6801
(features.8.weight): 0.7711
(features.10.weight): 0.8677
(classifier.1.weight): 0.9178
(classifier.4.weight): 0.9615
(classifier.6.weight): 0.7231


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.3816, Accuracy: 6826/10000 (68%)

Epoch: 88


100%|██████████| 223/223 [00:19<00:00, 11.45it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3674
(features.3.weight): 0.6139
(features.6.weight): 0.6818
(features.8.weight): 0.7840
(features.10.weight): 0.8840
(classifier.1.weight): 0.9417
(classifier.4.weight): 0.9755
(classifier.6.weight): 0.7182


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.2766, Accuracy: 6964/10000 (70%)

Epoch: 89


100%|██████████| 223/223 [00:19<00:00, 11.04it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3655
(features.3.weight): 0.6139
(features.6.weight): 0.6714
(features.8.weight): 0.7715
(features.10.weight): 0.8775
(classifier.1.weight): 0.9394
(classifier.4.weight): 0.9716
(classifier.6.weight): 0.7261


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.4355, Accuracy: 6860/10000 (69%)

Epoch: 89


100%|██████████| 223/223 [00:19<00:00, 10.40it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3646
(features.3.weight): 0.6100
(features.6.weight): 0.6766
(features.8.weight): 0.7814
(features.10.weight): 0.8834
(classifier.1.weight): 0.9417
(classifier.4.weight): 0.9756
(classifier.6.weight): 0.7262


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.2521, Accuracy: 6996/10000 (70%)

Epoch: 90


100%|██████████| 320/320 [00:28<00:00, 11.22it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3647
(features.3.weight): 0.6151
(features.6.weight): 0.6784
(features.8.weight): 0.7727
(features.10.weight): 0.8709
(classifier.1.weight): 0.9178
(classifier.4.weight): 0.9617
(classifier.6.weight): 0.7206


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.2860, Accuracy: 6912/10000 (69%)

Epoch: 90


100%|██████████| 223/223 [00:19<00:00, 11.05it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3635
(features.3.weight): 0.6085
(features.6.weight): 0.6671
(features.8.weight): 0.7700
(features.10.weight): 0.8782
(classifier.1.weight): 0.9395
(classifier.4.weight): 0.9715
(classifier.6.weight): 0.7260


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.3742, Accuracy: 6988/10000 (70%)

Epoch: 91


100%|██████████| 320/320 [00:29<00:00, 11.78it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3623
(features.3.weight): 0.6117
(features.6.weight): 0.6749
(features.8.weight): 0.7693
(features.10.weight): 0.8695
(classifier.1.weight): 0.9186
(classifier.4.weight): 0.9618
(classifier.6.weight): 0.7212


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.3283, Accuracy: 7018/10000 (70%)

Epoch: 91


100%|██████████| 223/223 [00:19<00:00, 11.16it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3639
(features.3.weight): 0.6072
(features.6.weight): 0.6740
(features.8.weight): 0.7799
(features.10.weight): 0.8839
(classifier.1.weight): 0.9434
(classifier.4.weight): 0.9758
(classifier.6.weight): 0.7243


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.2794, Accuracy: 6979/10000 (70%)

Epoch: 92


100%|██████████| 223/223 [00:19<00:00, 11.41it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3626
(features.3.weight): 0.6092
(features.6.weight): 0.6665
(features.8.weight): 0.7693
(features.10.weight): 0.8788
(classifier.1.weight): 0.9388
(classifier.4.weight): 0.9713
(classifier.6.weight): 0.7279


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.3360, Accuracy: 6913/10000 (69%)

Epoch: 92


100%|██████████| 320/320 [00:27<00:00, 11.59it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3607
(features.3.weight): 0.6101
(features.6.weight): 0.6742
(features.8.weight): 0.7683
(features.10.weight): 0.8692
(classifier.1.weight): 0.9187
(classifier.4.weight): 0.9615
(classifier.6.weight): 0.7189


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.3916, Accuracy: 6913/10000 (69%)

Epoch: 93


100%|██████████| 223/223 [00:20<00:00, 11.01it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3599
(features.3.weight): 0.6079
(features.6.weight): 0.6651
(features.8.weight): 0.7676
(features.10.weight): 0.8795
(classifier.1.weight): 0.9394
(classifier.4.weight): 0.9714
(classifier.6.weight): 0.7254


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.3815, Accuracy: 7017/10000 (70%)

Epoch: 93


100%|██████████| 223/223 [00:19<00:00, 11.33it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3612
(features.3.weight): 0.6039
(features.6.weight): 0.6708
(features.8.weight): 0.7781
(features.10.weight): 0.8847
(classifier.1.weight): 0.9433
(classifier.4.weight): 0.9758
(classifier.6.weight): 0.7223


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.3214, Accuracy: 6908/10000 (69%)

Epoch: 94


100%|██████████| 320/320 [00:27<00:00, 11.46it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3601
(features.3.weight): 0.6096
(features.6.weight): 0.6739
(features.8.weight): 0.7673
(features.10.weight): 0.8677
(classifier.1.weight): 0.9170
(classifier.4.weight): 0.9615
(classifier.6.weight): 0.7195


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.3822, Accuracy: 6915/10000 (69%)

Epoch: 94


100%|██████████| 223/223 [00:20<00:00, 11.51it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3600
(features.3.weight): 0.6025
(features.6.weight): 0.6689
(features.8.weight): 0.7759
(features.10.weight): 0.8826
(classifier.1.weight): 0.9434
(classifier.4.weight): 0.9758
(classifier.6.weight): 0.7201


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.3534, Accuracy: 6930/10000 (69%)

Epoch: 95


100%|██████████| 320/320 [00:28<00:00, 11.31it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3582
(features.3.weight): 0.6076
(features.6.weight): 0.6707
(features.8.weight): 0.7662
(features.10.weight): 0.8674
(classifier.1.weight): 0.9184
(classifier.4.weight): 0.9618
(classifier.6.weight): 0.7195


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.4185, Accuracy: 6911/10000 (69%)

Epoch: 95


100%|██████████| 223/223 [00:19<00:00, 11.42it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3582
(features.3.weight): 0.6014
(features.6.weight): 0.6671
(features.8.weight): 0.7753
(features.10.weight): 0.8855
(classifier.1.weight): 0.9427
(classifier.4.weight): 0.9756
(classifier.6.weight): 0.7165


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.4088, Accuracy: 6896/10000 (69%)

Epoch: 96


100%|██████████| 223/223 [00:19<00:00, 11.08it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3593
(features.3.weight): 0.6052
(features.6.weight): 0.6636
(features.8.weight): 0.7659
(features.10.weight): 0.8781
(classifier.1.weight): 0.9404
(classifier.4.weight): 0.9717
(classifier.6.weight): 0.7250


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.3577, Accuracy: 7016/10000 (70%)

Epoch: 96


100%|██████████| 320/320 [00:29<00:00, 11.01it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3568
(features.3.weight): 0.6052
(features.6.weight): 0.6687
(features.8.weight): 0.7645
(features.10.weight): 0.8673
(classifier.1.weight): 0.9180
(classifier.4.weight): 0.9617
(classifier.6.weight): 0.7192


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.3253, Accuracy: 6943/10000 (69%)

Epoch: 97


100%|██████████| 223/223 [00:19<00:00, 11.73it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3562
(features.3.weight): 0.6012
(features.6.weight): 0.6596
(features.8.weight): 0.7635
(features.10.weight): 0.8788
(classifier.1.weight): 0.9400
(classifier.4.weight): 0.9716
(classifier.6.weight): 0.7296


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.3135, Accuracy: 7013/10000 (70%)

Epoch: 97


100%|██████████| 223/223 [00:19<00:00, 11.89it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3572
(features.3.weight): 0.6000
(features.6.weight): 0.6661
(features.8.weight): 0.7730
(features.10.weight): 0.8809
(classifier.1.weight): 0.9418
(classifier.4.weight): 0.9754
(classifier.6.weight): 0.7215


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.2963, Accuracy: 7002/10000 (70%)

Epoch: 98


100%|██████████| 223/223 [00:19<00:00, 13.10it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3553
(features.3.weight): 0.5998
(features.6.weight): 0.6577
(features.8.weight): 0.7638
(features.10.weight): 0.8804
(classifier.1.weight): 0.9413
(classifier.4.weight): 0.9718
(classifier.6.weight): 0.7241


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.4111, Accuracy: 6902/10000 (69%)

Epoch: 98


100%|██████████| 223/223 [00:19<00:00, 11.39it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3547
(features.3.weight): 0.5964
(features.6.weight): 0.6639
(features.8.weight): 0.7721
(features.10.weight): 0.8846
(classifier.1.weight): 0.9432
(classifier.4.weight): 0.9756
(classifier.6.weight): 0.7204


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.2806, Accuracy: 6989/10000 (70%)

Epoch: 99


100%|██████████| 223/223 [00:19<00:00, 10.66it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3538
(features.3.weight): 0.6006
(features.6.weight): 0.6583
(features.8.weight): 0.7621
(features.10.weight): 0.8798
(classifier.1.weight): 0.9412
(classifier.4.weight): 0.9718
(classifier.6.weight): 0.7237


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.4704, Accuracy: 7020/10000 (70%)

Epoch: 99


100%|██████████| 320/320 [00:28<00:00, 11.30it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3563
(features.3.weight): 0.6047
(features.6.weight): 0.6697
(features.8.weight): 0.7648
(features.10.weight): 0.8684
(classifier.1.weight): 0.9184
(classifier.4.weight): 0.9616
(classifier.6.weight): 0.7174


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.4093, Accuracy: 6980/10000 (70%)

Epoch: 100


100%|██████████| 223/223 [00:19<00:00, 11.48it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3538
(features.3.weight): 0.5996
(features.6.weight): 0.6574
(features.8.weight): 0.7617
(features.10.weight): 0.8809
(classifier.1.weight): 0.9414
(classifier.4.weight): 0.9719
(classifier.6.weight): 0.7263


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.3458, Accuracy: 6936/10000 (69%)

Epoch: 100


100%|██████████| 320/320 [00:29<00:00, 10.07it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3532
(features.3.weight): 0.6030
(features.6.weight): 0.6667
(features.8.weight): 0.7627
(features.10.weight): 0.8686
(classifier.1.weight): 0.9195
(classifier.4.weight): 0.9619
(classifier.6.weight): 0.7166


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.3768, Accuracy: 6926/10000 (69%)

Epoch: 101


100%|██████████| 320/320 [00:28<00:00, 11.35it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3506
(features.3.weight): 0.5999
(features.6.weight): 0.6641
(features.8.weight): 0.7615
(features.10.weight): 0.8675
(classifier.1.weight): 0.9185
(classifier.4.weight): 0.9615
(classifier.6.weight): 0.7155


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.4998, Accuracy: 6903/10000 (69%)

Epoch: 101


100%|██████████| 223/223 [00:19<00:00, 11.54it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3521
(features.3.weight): 0.5980
(features.6.weight): 0.6549
(features.8.weight): 0.7594
(features.10.weight): 0.8758
(classifier.1.weight): 0.9383
(classifier.4.weight): 0.9715
(classifier.6.weight): 0.7286


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.3994, Accuracy: 6992/10000 (70%)

Epoch: 102


100%|██████████| 223/223 [00:19<00:00, 11.73it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3500
(features.3.weight): 0.5965
(features.6.weight): 0.6537
(features.8.weight): 0.7592
(features.10.weight): 0.8771
(classifier.1.weight): 0.9389
(classifier.4.weight): 0.9716
(classifier.6.weight): 0.7265


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.4240, Accuracy: 6992/10000 (70%)

Epoch: 102


100%|██████████| 223/223 [00:19<00:00, 11.42it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3541
(features.3.weight): 0.5964
(features.6.weight): 0.6623
(features.8.weight): 0.7712
(features.10.weight): 0.8839
(classifier.1.weight): 0.9437
(classifier.4.weight): 0.9759
(classifier.6.weight): 0.7199


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.3859, Accuracy: 6955/10000 (70%)

Epoch: 103


100%|██████████| 320/320 [00:28<00:00, 11.12it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3495
(features.3.weight): 0.5981
(features.6.weight): 0.6629
(features.8.weight): 0.7600
(features.10.weight): 0.8665
(classifier.1.weight): 0.9181
(classifier.4.weight): 0.9616
(classifier.6.weight): 0.7193


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.4637, Accuracy: 6949/10000 (69%)

Epoch: 103


100%|██████████| 223/223 [00:20<00:00, 11.10it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3508
(features.3.weight): 0.5932
(features.6.weight): 0.6584
(features.8.weight): 0.7695
(features.10.weight): 0.8842
(classifier.1.weight): 0.9429
(classifier.4.weight): 0.9759
(classifier.6.weight): 0.7213


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.2632, Accuracy: 7007/10000 (70%)

Epoch: 104


100%|██████████| 223/223 [00:19<00:00, 11.39it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3506
(features.3.weight): 0.5954
(features.6.weight): 0.6546
(features.8.weight): 0.7617
(features.10.weight): 0.8806
(classifier.1.weight): 0.9399
(classifier.4.weight): 0.9716
(classifier.6.weight): 0.7235


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.3493, Accuracy: 7006/10000 (70%)

Epoch: 104


100%|██████████| 223/223 [00:19<00:00, 11.23it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3496
(features.3.weight): 0.5905
(features.6.weight): 0.6551
(features.8.weight): 0.7666
(features.10.weight): 0.8819
(classifier.1.weight): 0.9415
(classifier.4.weight): 0.9757
(classifier.6.weight): 0.7195


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.2912, Accuracy: 7027/10000 (70%)

Epoch: 105


100%|██████████| 223/223 [00:18<00:00, 11.47it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3489
(features.3.weight): 0.5925
(features.6.weight): 0.6511
(features.8.weight): 0.7587
(features.10.weight): 0.8812
(classifier.1.weight): 0.9402
(classifier.4.weight): 0.9719
(classifier.6.weight): 0.7257


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.3583, Accuracy: 7013/10000 (70%)

Epoch: 105


100%|██████████| 223/223 [00:19<00:00, 11.38it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3492
(features.3.weight): 0.5908
(features.6.weight): 0.6559
(features.8.weight): 0.7686
(features.10.weight): 0.8836
(classifier.1.weight): 0.9439
(classifier.4.weight): 0.9758
(classifier.6.weight): 0.7152


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.3578, Accuracy: 7024/10000 (70%)

Epoch: 106


100%|██████████| 320/320 [00:28<00:00, 11.42it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3487
(features.3.weight): 0.5955
(features.6.weight): 0.6594
(features.8.weight): 0.7585
(features.10.weight): 0.8662
(classifier.1.weight): 0.9191
(classifier.4.weight): 0.9618
(classifier.6.weight): 0.7197


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.4726, Accuracy: 6950/10000 (70%)

Epoch: 106


100%|██████████| 223/223 [00:19<00:00, 10.70it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3480
(features.3.weight): 0.5923
(features.6.weight): 0.6499
(features.8.weight): 0.7578
(features.10.weight): 0.8795
(classifier.1.weight): 0.9403
(classifier.4.weight): 0.9717
(classifier.6.weight): 0.7217


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.4632, Accuracy: 6984/10000 (70%)

Epoch: 107


100%|██████████| 223/223 [00:19<00:00, 11.20it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3465
(features.3.weight): 0.5914
(features.6.weight): 0.6479
(features.8.weight): 0.7561
(features.10.weight): 0.8801
(classifier.1.weight): 0.9407
(classifier.4.weight): 0.9718
(classifier.6.weight): 0.7228


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.4570, Accuracy: 6975/10000 (70%)

Epoch: 107


100%|██████████| 320/320 [00:28<00:00, 11.17it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3462
(features.3.weight): 0.5927
(features.6.weight): 0.6569
(features.8.weight): 0.7579
(features.10.weight): 0.8689
(classifier.1.weight): 0.9208
(classifier.4.weight): 0.9620
(classifier.6.weight): 0.7182


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.5419, Accuracy: 6842/10000 (68%)

Epoch: 108


100%|██████████| 320/320 [00:28<00:00, 11.12it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3450
(features.3.weight): 0.5929
(features.6.weight): 0.6567
(features.8.weight): 0.7566
(features.10.weight): 0.8658
(classifier.1.weight): 0.9195
(classifier.4.weight): 0.9618
(classifier.6.weight): 0.7213


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.3594, Accuracy: 7033/10000 (70%)

Epoch: 108


100%|██████████| 223/223 [00:20<00:00, 10.95it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3452
(features.3.weight): 0.5902
(features.6.weight): 0.6475
(features.8.weight): 0.7552
(features.10.weight): 0.8793
(classifier.1.weight): 0.9403
(classifier.4.weight): 0.9717
(classifier.6.weight): 0.7239


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.4397, Accuracy: 7002/10000 (70%)

Epoch: 109


100%|██████████| 223/223 [00:20<00:00, 11.02it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3437
(features.3.weight): 0.5884
(features.6.weight): 0.6454
(features.8.weight): 0.7531
(features.10.weight): 0.8774
(classifier.1.weight): 0.9389
(classifier.4.weight): 0.9715
(classifier.6.weight): 0.7282


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.4221, Accuracy: 6893/10000 (69%)

Epoch: 109


100%|██████████| 223/223 [00:19<00:00, 11.35it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3475
(features.3.weight): 0.5909
(features.6.weight): 0.6548
(features.8.weight): 0.7674
(features.10.weight): 0.8845
(classifier.1.weight): 0.9439
(classifier.4.weight): 0.9756
(classifier.6.weight): 0.7116


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.4681, Accuracy: 6943/10000 (69%)

Epoch: 110


100%|██████████| 223/223 [00:20<00:00, 10.69it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3443
(features.3.weight): 0.5864
(features.6.weight): 0.6504
(features.8.weight): 0.7650
(features.10.weight): 0.8840
(classifier.1.weight): 0.9440
(classifier.4.weight): 0.9758
(classifier.6.weight): 0.7166


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.3692, Accuracy: 6955/10000 (70%)

Epoch: 110


100%|██████████| 223/223 [00:19<00:00, 11.19it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3445
(features.3.weight): 0.5893
(features.6.weight): 0.6463
(features.8.weight): 0.7557
(features.10.weight): 0.8803
(classifier.1.weight): 0.9404
(classifier.4.weight): 0.9717
(classifier.6.weight): 0.7231


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.4772, Accuracy: 6806/10000 (68%)

Epoch: 111


100%|██████████| 223/223 [00:20<00:00, 10.98it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3434
(features.3.weight): 0.5879
(features.6.weight): 0.6451
(features.8.weight): 0.7540
(features.10.weight): 0.8792
(classifier.1.weight): 0.9409
(classifier.4.weight): 0.9720
(classifier.6.weight): 0.7215


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.4506, Accuracy: 6948/10000 (69%)

Epoch: 111


100%|██████████| 223/223 [00:19<00:00, 11.70it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3430
(features.3.weight): 0.5866
(features.6.weight): 0.6501
(features.8.weight): 0.7640
(features.10.weight): 0.8830
(classifier.1.weight): 0.9451
(classifier.4.weight): 0.9759
(classifier.6.weight): 0.7137


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.3886, Accuracy: 6961/10000 (70%)

Epoch: 112


100%|██████████| 223/223 [00:19<00:00, 11.37it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3416
(features.3.weight): 0.5858
(features.6.weight): 0.6496
(features.8.weight): 0.7621
(features.10.weight): 0.8830
(classifier.1.weight): 0.9440
(classifier.4.weight): 0.9762
(classifier.6.weight): 0.7160


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.3318, Accuracy: 6918/10000 (69%)

Epoch: 112


100%|██████████| 223/223 [00:19<00:00, 11.28it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3420
(features.3.weight): 0.5883
(features.6.weight): 0.6451
(features.8.weight): 0.7549
(features.10.weight): 0.8809
(classifier.1.weight): 0.9415
(classifier.4.weight): 0.9720
(classifier.6.weight): 0.7155


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.5643, Accuracy: 6990/10000 (70%)

Epoch: 113


100%|██████████| 223/223 [00:19<00:00, 11.62it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3409
(features.3.weight): 0.5862
(features.6.weight): 0.6489
(features.8.weight): 0.7624
(features.10.weight): 0.8836
(classifier.1.weight): 0.9448
(classifier.4.weight): 0.9762
(classifier.6.weight): 0.7110


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.4474, Accuracy: 6816/10000 (68%)

Epoch: 113


100%|██████████| 320/320 [00:28<00:00, 11.26it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3435
(features.3.weight): 0.5913
(features.6.weight): 0.6557
(features.8.weight): 0.7557
(features.10.weight): 0.8654
(classifier.1.weight): 0.9198
(classifier.4.weight): 0.9618
(classifier.6.weight): 0.7191


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.4392, Accuracy: 6956/10000 (70%)

Epoch: 114


100%|██████████| 223/223 [00:20<00:00, 11.17it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3404
(features.3.weight): 0.5865
(features.6.weight): 0.6436
(features.8.weight): 0.7532
(features.10.weight): 0.8783
(classifier.1.weight): 0.9402
(classifier.4.weight): 0.9718
(classifier.6.weight): 0.7140


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.5152, Accuracy: 6922/10000 (69%)

Epoch: 114


100%|██████████| 320/320 [00:28<00:00, 10.71it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3406
(features.3.weight): 0.5864
(features.6.weight): 0.6505
(features.8.weight): 0.7541
(features.10.weight): 0.8662
(classifier.1.weight): 0.9203
(classifier.4.weight): 0.9623
(classifier.6.weight): 0.7204


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.3925, Accuracy: 6909/10000 (69%)

Epoch: 115


100%|██████████| 320/320 [00:28<00:00, 11.41it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3391
(features.3.weight): 0.5837
(features.6.weight): 0.6476
(features.8.weight): 0.7517
(features.10.weight): 0.8662
(classifier.1.weight): 0.9205
(classifier.4.weight): 0.9623
(classifier.6.weight): 0.7172


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.4405, Accuracy: 7004/10000 (70%)

Epoch: 115


100%|██████████| 223/223 [00:19<00:00, 11.38it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3413
(features.3.weight): 0.5851
(features.6.weight): 0.6484
(features.8.weight): 0.7627
(features.10.weight): 0.8841
(classifier.1.weight): 0.9440
(classifier.4.weight): 0.9761
(classifier.6.weight): 0.7166


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.3617, Accuracy: 6963/10000 (70%)

Epoch: 116


100%|██████████| 320/320 [00:27<00:00, 11.46it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3386
(features.3.weight): 0.5838
(features.6.weight): 0.6481
(features.8.weight): 0.7523
(features.10.weight): 0.8696
(classifier.1.weight): 0.9211
(classifier.4.weight): 0.9620
(classifier.6.weight): 0.7196


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.4771, Accuracy: 6900/10000 (69%)

Epoch: 116


100%|██████████| 223/223 [00:19<00:00, 11.46it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3387
(features.3.weight): 0.5820
(features.6.weight): 0.6445
(features.8.weight): 0.7600
(features.10.weight): 0.8846
(classifier.1.weight): 0.9436
(classifier.4.weight): 0.9759
(classifier.6.weight): 0.7170


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.3880, Accuracy: 6944/10000 (69%)

Epoch: 117


100%|██████████| 320/320 [00:28<00:00, 11.34it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3382
(features.3.weight): 0.5844
(features.6.weight): 0.6480
(features.8.weight): 0.7508
(features.10.weight): 0.8679
(classifier.1.weight): 0.9208
(classifier.4.weight): 0.9620
(classifier.6.weight): 0.7189


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.4096, Accuracy: 6992/10000 (70%)

Epoch: 117


100%|██████████| 223/223 [00:19<00:00, 10.93it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3379
(features.3.weight): 0.5824
(features.6.weight): 0.6441
(features.8.weight): 0.7582
(features.10.weight): 0.8828
(classifier.1.weight): 0.9434
(classifier.4.weight): 0.9758
(classifier.6.weight): 0.7195


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.3665, Accuracy: 6993/10000 (70%)

Epoch: 118


100%|██████████| 320/320 [00:28<00:00, 11.34it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3371
(features.3.weight): 0.5833
(features.6.weight): 0.6470
(features.8.weight): 0.7506
(features.10.weight): 0.8666
(classifier.1.weight): 0.9196
(classifier.4.weight): 0.9619
(classifier.6.weight): 0.7177


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.4636, Accuracy: 6931/10000 (69%)

Epoch: 118


100%|██████████| 223/223 [00:20<00:00, 11.32it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3395
(features.3.weight): 0.5843
(features.6.weight): 0.6413
(features.8.weight): 0.7519
(features.10.weight): 0.8798
(classifier.1.weight): 0.9406
(classifier.4.weight): 0.9719
(classifier.6.weight): 0.7194


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.4775, Accuracy: 6950/10000 (70%)

Epoch: 119


100%|██████████| 320/320 [00:29<00:00, 11.57it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3367
(features.3.weight): 0.5834
(features.6.weight): 0.6467
(features.8.weight): 0.7527
(features.10.weight): 0.8688
(classifier.1.weight): 0.9203
(classifier.4.weight): 0.9620
(classifier.6.weight): 0.7155


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.4493, Accuracy: 6926/10000 (69%)

Epoch: 119


100%|██████████| 223/223 [00:19<00:00, 11.41it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3369
(features.3.weight): 0.5811
(features.6.weight): 0.6420
(features.8.weight): 0.7569
(features.10.weight): 0.8818
(classifier.1.weight): 0.9415
(classifier.4.weight): 0.9754
(classifier.6.weight): 0.7219


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.3937, Accuracy: 6988/10000 (70%)

Epoch: 120


100%|██████████| 223/223 [00:19<00:00, 11.25it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3369
(features.3.weight): 0.5809
(features.6.weight): 0.6366
(features.8.weight): 0.7482
(features.10.weight): 0.8790
(classifier.1.weight): 0.9401
(classifier.4.weight): 0.9717
(classifier.6.weight): 0.7258


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.4591, Accuracy: 6992/10000 (70%)

Epoch: 120


100%|██████████| 320/320 [00:28<00:00, 11.34it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3355
(features.3.weight): 0.5829
(features.6.weight): 0.6458
(features.8.weight): 0.7502
(features.10.weight): 0.8666
(classifier.1.weight): 0.9192
(classifier.4.weight): 0.9614
(classifier.6.weight): 0.7187


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.4980, Accuracy: 6934/10000 (69%)

Epoch: 121


100%|██████████| 223/223 [00:19<00:00, 11.28it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3350
(features.3.weight): 0.5791
(features.6.weight): 0.6344
(features.8.weight): 0.7463
(features.10.weight): 0.8785
(classifier.1.weight): 0.9396
(classifier.4.weight): 0.9714
(classifier.6.weight): 0.7309


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.4403, Accuracy: 7014/10000 (70%)

Epoch: 121


100%|██████████| 223/223 [00:19<00:00, 11.35it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3358
(features.3.weight): 0.5806
(features.6.weight): 0.6418
(features.8.weight): 0.7576
(features.10.weight): 0.8839
(classifier.1.weight): 0.9431
(classifier.4.weight): 0.9756
(classifier.6.weight): 0.7196


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.3682, Accuracy: 6936/10000 (69%)

Epoch: 122


100%|██████████| 320/320 [00:28<00:00, 11.15it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3352
(features.3.weight): 0.5816
(features.6.weight): 0.6442
(features.8.weight): 0.7500
(features.10.weight): 0.8687
(classifier.1.weight): 0.9201
(classifier.4.weight): 0.9618
(classifier.6.weight): 0.7164


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.5158, Accuracy: 6903/10000 (69%)

Epoch: 122


100%|██████████| 223/223 [00:19<00:00, 10.85it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3348
(features.3.weight): 0.5772
(features.6.weight): 0.6328
(features.8.weight): 0.7454
(features.10.weight): 0.8789
(classifier.1.weight): 0.9410
(classifier.4.weight): 0.9720
(classifier.6.weight): 0.7252


  0%|          | 0/320 [00:00<?, ?it/s]


Test set: Average loss: 1.4189, Accuracy: 6938/10000 (69%)

Epoch: 123


100%|██████████| 320/320 [00:27<00:00, 11.49it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3337
(features.3.weight): 0.5802
(features.6.weight): 0.6433
(features.8.weight): 0.7489
(features.10.weight): 0.8682
(classifier.1.weight): 0.9206
(classifier.4.weight): 0.9621
(classifier.6.weight): 0.7179


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.4040, Accuracy: 6931/10000 (69%)

Epoch: 123


100%|██████████| 223/223 [00:19<00:00, 11.19it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3338
(features.3.weight): 0.5773
(features.6.weight): 0.6324
(features.8.weight): 0.7461
(features.10.weight): 0.8798
(classifier.1.weight): 0.9398
(classifier.4.weight): 0.9717
(classifier.6.weight): 0.7287


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.4679, Accuracy: 6867/10000 (69%)

Epoch: 124


100%|██████████| 223/223 [00:19<00:00, 10.39it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3328
(features.3.weight): 0.5770
(features.6.weight): 0.6318
(features.8.weight): 0.7439
(features.10.weight): 0.8790
(classifier.1.weight): 0.9394
(classifier.4.weight): 0.9715
(classifier.6.weight): 0.7277


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.4456, Accuracy: 6999/10000 (70%)

Epoch: 124


100%|██████████| 223/223 [00:18<00:00, 12.24it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3347
(features.3.weight): 0.5787
(features.6.weight): 0.6398
(features.8.weight): 0.7563
(features.10.weight): 0.8855
(classifier.1.weight): 0.9448
(classifier.4.weight): 0.9760
(classifier.6.weight): 0.7153


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.4590, Accuracy: 7030/10000 (70%)

Epoch: 125


100%|██████████| 223/223 [00:20<00:00, 11.09it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3327
(features.3.weight): 0.5774
(features.6.weight): 0.6327
(features.8.weight): 0.7459
(features.10.weight): 0.8810
(classifier.1.weight): 0.9420
(classifier.4.weight): 0.9720
(classifier.6.weight): 0.7221


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.5226, Accuracy: 7072/10000 (71%)

Epoch: 125


100%|██████████| 223/223 [00:20<00:00, 11.01it/s]


normalized norm of (weight - projection)
(features.0.weight): 0.3323
(features.3.weight): 0.5764
(features.6.weight): 0.6373
(features.8.weight): 0.7539
(features.10.weight): 0.8844
(classifier.1.weight): 0.9436
(classifier.4.weight): 0.9758
(classifier.6.weight): 0.7170

Test set: Average loss: 1.3831, Accuracy: 6956/10000 (70%)



In [10]:
### Apply pruning
for usr in range(args.num_users):
    mask = apply_l1_prune(mdlz[usr][0], device, args) if args.l1 else apply_prune(mdlz[usr][0], device, args)
    print_prune(model)
    test(args, mdlz[usr][0], device, test_loader)
    retrain(args, model, mask, device, train_loader, test_loader, optimizer)

Apply Pruning based on percentile
[at weight features.0.weight]
percentage of pruned: 0.0000%
nonzero parameters after pruning: 1728 / 1728

[at weight features.3.weight]
percentage of pruned: 0.0000%
nonzero parameters after pruning: 110592 / 110592

[at weight features.6.weight]
percentage of pruned: 0.0000%
nonzero parameters after pruning: 663552 / 663552

[at weight features.8.weight]
percentage of pruned: 0.0000%
nonzero parameters after pruning: 884736 / 884736

[at weight features.10.weight]
percentage of pruned: 0.0000%
nonzero parameters after pruning: 589824 / 589824

[at weight classifier.1.weight]
percentage of pruned: 0.0000%
nonzero parameters after pruning: 4194304 / 4194304

[at weight classifier.4.weight]
percentage of pruned: 0.0000%
nonzero parameters after pruning: 16777216 / 16777216

[at weight classifier.6.weight]
percentage of pruned: 0.0000%
nonzero parameters after pruning: 40960 / 40960

total nonzero parameters after pruning: 23272266 / 23272266 (0.0000%)


  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 3.1654, Accuracy: 1747/10000 (17%)

Re epoch: 1


100%|██████████| 223/223 [00:07<00:00, 29.96it/s]
  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.7493, Accuracy: 7136/10000 (71%)

Re epoch: 2


100%|██████████| 223/223 [00:07<00:00, 30.86it/s]
  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.9448, Accuracy: 7114/10000 (71%)

Re epoch: 3


100%|██████████| 223/223 [00:07<00:00, 30.14it/s]
  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 2.0576, Accuracy: 7097/10000 (71%)

Re epoch: 4


100%|██████████| 223/223 [00:07<00:00, 29.90it/s]
  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.9541, Accuracy: 7047/10000 (70%)

Re epoch: 5


100%|██████████| 223/223 [00:07<00:00, 30.22it/s]



Test set: Average loss: 1.9073, Accuracy: 6978/10000 (70%)

Apply Pruning based on percentile
[at weight features.0.weight]
percentage of pruned: 0.0000%
nonzero parameters after pruning: 1728 / 1728

[at weight features.3.weight]
percentage of pruned: 0.0000%
nonzero parameters after pruning: 110592 / 110592

[at weight features.6.weight]
percentage of pruned: 0.0000%
nonzero parameters after pruning: 663552 / 663552

[at weight features.8.weight]
percentage of pruned: 0.0000%
nonzero parameters after pruning: 884736 / 884736

[at weight features.10.weight]
percentage of pruned: 0.0000%
nonzero parameters after pruning: 589824 / 589824

[at weight classifier.1.weight]
percentage of pruned: 0.0000%
nonzero parameters after pruning: 4194304 / 4194304

[at weight classifier.4.weight]
percentage of pruned: 0.0000%
nonzero parameters after pruning: 16777216 / 16777216

[at weight classifier.6.weight]
percentage of pruned: 0.0000%
nonzero parameters after pruning: 40960 / 40960

total nonz

  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 3.4396, Accuracy: 1716/10000 (17%)

Re epoch: 1


100%|██████████| 223/223 [00:07<00:00, 30.71it/s]
  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.7150, Accuracy: 6969/10000 (70%)

Re epoch: 2


100%|██████████| 223/223 [00:07<00:00, 29.98it/s]
  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.9051, Accuracy: 7077/10000 (71%)

Re epoch: 3


100%|██████████| 223/223 [00:07<00:00, 29.90it/s]
  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 2.1301, Accuracy: 7014/10000 (70%)

Re epoch: 4


100%|██████████| 223/223 [00:07<00:00, 29.91it/s]
  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.9211, Accuracy: 7078/10000 (71%)

Re epoch: 5


100%|██████████| 223/223 [00:07<00:00, 30.23it/s]



Test set: Average loss: 2.1536, Accuracy: 7033/10000 (70%)

Apply Pruning based on percentile
[at weight features.0.weight]
percentage of pruned: 79.9769%
nonzero parameters after pruning: 346 / 1728

[at weight features.3.weight]
percentage of pruned: 91.9994%
nonzero parameters after pruning: 8848 / 110592

[at weight features.6.weight]
percentage of pruned: 92.9999%
nonzero parameters after pruning: 46449 / 663552

[at weight features.8.weight]
percentage of pruned: 93.9999%
nonzero parameters after pruning: 53085 / 884736

[at weight features.10.weight]
percentage of pruned: 94.9999%
nonzero parameters after pruning: 29492 / 589824

[at weight classifier.1.weight]
percentage of pruned: 99.0000%
nonzero parameters after pruning: 41944 / 4194304

[at weight classifier.4.weight]
percentage of pruned: 99.0000%
nonzero parameters after pruning: 167773 / 16777216

[at weight classifier.6.weight]
percentage of pruned: 92.9980%
nonzero parameters after pruning: 2868 / 40960

total nonzero

  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 5.1372, Accuracy: 1797/10000 (18%)

Re epoch: 1


100%|██████████| 223/223 [00:07<00:00, 30.85it/s]
  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.0723, Accuracy: 6511/10000 (65%)

Re epoch: 2


100%|██████████| 223/223 [00:07<00:00, 29.97it/s]
  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.1305, Accuracy: 6725/10000 (67%)

Re epoch: 3


100%|██████████| 223/223 [00:07<00:00, 29.82it/s]
  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.2641, Accuracy: 6790/10000 (68%)

Re epoch: 4


100%|██████████| 223/223 [00:07<00:00, 30.01it/s]
  0%|          | 0/223 [00:00<?, ?it/s]


Test set: Average loss: 1.4576, Accuracy: 6741/10000 (67%)

Re epoch: 5


100%|██████████| 223/223 [00:07<00:00, 30.11it/s]



Test set: Average loss: 1.6639, Accuracy: 6680/10000 (67%)



# Testing ground

In [8]:
from torch.utils.data import DataLoader
import torch.nn.functional as F

confusion_matrix = torch.zeros(10, 10)
mdlz[0][0].eval()
# testing
test_loss = 0
correct = 0

l = len(test_loader)
for idx, (data, target) in enumerate(test_loader):
    if use_cuda:
        data, target = data.to(device), target.to(device)
    log_probs = mdlz[0][0](data)
    # sum up batch loss
    test_loss += F.cross_entropy(log_probs, target, reduction='sum').item()
    # get the index of the max log-probability
    y_pred = log_probs.data.max(1, keepdim=True)[1]
    correct += y_pred.eq(target.data.view_as(y_pred)).long().cpu().sum()

    for t, p in zip(target.data, y_pred.view(-1)):
                confusion_matrix[t.long(), p.long()] += 1

In [9]:
print(confusion_matrix)

tensor([[726.,  11.,  76.,  31.,  22.,   4.,   4.,   9.,  91.,  26.],
        [ 18., 841.,   7.,  10.,   2.,   4.,  13.,   1.,  41.,  63.],
        [ 59.,   5., 608.,  70., 104.,  45.,  52.,  21.,  23.,  13.],
        [ 26.,   8.,  94., 514.,  83., 142.,  52.,  39.,  24.,  18.],
        [ 18.,   2.,  81.,  64., 697.,  33.,  40.,  47.,  15.,   3.],
        [ 12.,   4.,  60., 195.,  63., 568.,  21.,  48.,  18.,  11.],
        [  8.,   5.,  53.,  84.,  50.,  14., 769.,   6.,   9.,   2.],
        [ 23.,   3.,  46.,  42.,  88.,  56.,   8., 707.,   7.,  20.],
        [ 38.,  23.,  11.,  16.,   2.,   9.,   3.,   2., 883.,  13.],
        [ 40., 111.,   7.,  21.,   4.,   0.,   4.,  12.,  45., 756.]])


In [8]:
from torch.utils.data import DataLoader
import torch.nn.functional as F

confusion_matrix = torch.zeros(10, 10)
mdlz[0][0].eval()
# testing
test_loss = 0
correct = 0

l = len(test_loader)
for idx, (data, target) in enumerate(test_loader):
    if use_cuda:
        data, target = data.to(device), target.to(device)
    log_probs = mdlz[0][0](data)
    # sum up batch loss
    test_loss += F.cross_entropy(log_probs, target, reduction='sum').item()
    # get the index of the max log-probability
    y_pred = log_probs.data.max(1, keepdim=True)[1]
    correct += y_pred.eq(target.data.view_as(y_pred)).long().cpu().sum()

    for t, p in zip(target.data, y_pred.view(-1)):
                confusion_matrix[t.long(), p.long()] += 1

In [9]:
print(confusion_matrix)

tensor([[784.,  14.,  44.,  29.,  11.,   8.,   4.,  10.,  71.,  25.],
        [ 10., 831.,   4.,  14.,   1.,   1.,   9.,   2.,  43.,  85.],
        [ 80.,   6., 630.,  88.,  59.,  45.,  50.,  24.,  11.,   7.],
        [ 31.,  17.,  65., 535.,  46., 172.,  54.,  37.,  28.,  15.],
        [ 35.,   2.,  87.,  93., 625.,  44.,  38.,  65.,   9.,   2.],
        [ 14.,   7.,  51., 205.,  40., 590.,  17.,  49.,  14.,  13.],
        [  8.,   5.,  47.,  97.,  33.,  34., 764.,   3.,   6.,   3.],
        [ 18.,   4.,  27.,  60.,  47.,  70.,   6., 749.,   5.,  14.],
        [ 75.,  20.,   2.,  21.,   5.,   6.,   5.,   4., 844.,  18.],
        [ 32.,  80.,   9.,  28.,   2.,   8.,   3.,  18.,  25., 795.]])
