In [1]:
from __future__ import print_function
import numpy as np
import argparse
import torch
import torch.nn.functional as F
from optimizer import PruneAdam
from model import LeNet, AlexNet
from utils import regularized_nll_loss, admm_loss, \
    initialize_Z_and_U, update_X, update_Z, update_Z_l1, update_U, \
    print_convergence, print_prune, apply_prune, apply_l1_prune
from torchvision import datasets, transforms, models
from tqdm import tqdm
from Fed import FedAvg
import copy

# from torch.utils.tensorboard import SummaryWriter
# %load_ext tensorboard

In [2]:
# Training settings
parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
parser.add_argument('--dataset', type=str, default="cifar10", choices=["mnist", "cifar10"],
                    metavar='D', help='training dataset (mnist or cifar10)')
parser.add_argument('--batch-size', type=int, default=32, metavar='N',
                    help='input batch size for training (default: 64)')
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
                    help='input batch size for testing (default: 1000)')
parser.add_argument('--percent', type=list, default=[0.8, 0.92, 0.991, 0.93],
                    metavar='P', help='pruning percentage (default: 0.8)')
parser.add_argument('--alpha', type=float, default=5e-4, metavar='L',
                    help='l2 norm weight (default: 5e-4)')
parser.add_argument('--rho', type=float, default=1e-2, metavar='R',
                    help='cardinality weight (default: 1e-2)')
parser.add_argument('--l1', default=False, action='store_true',
                    help='prune weights with l1 regularization instead of cardinality')
parser.add_argument('--l2', default=False, action='store_true',
                    help='apply l2 regularization')
parser.add_argument('--num_pre_epochs', type=int, default=3, metavar='P',
                    help='number of epochs to pretrain (default: 3)')
parser.add_argument('--num_epochs', type=int, default=10, metavar='N',
                    help='number of epochs to train (default: 10)')
parser.add_argument('--num_re_epochs', type=int, default=3, metavar='R',
                    help='number of epochs to retrain (default: 3)')
parser.add_argument('--lr', type=float, default=1e-5, metavar='LR',
                    help='learning rate (default: 1e-2)')
parser.add_argument('--adam_epsilon', type=float, default=1e-8, metavar='E',
                    help='adam epsilon (default: 1e-8)')
parser.add_argument('--no-cuda', action='store_true', default=False,
                    help='disables CUDA training')
parser.add_argument('--seed', type=int, default=1, metavar='S',
                    help='random seed (default: 1)')
parser.add_argument('--save-model', action='store_true', default=False,
                    help='For Saving the current Model')
parser.add_argument('--num_users', action='store_true', default=1,
                    help='Number of users in network')
parser.add_argument('--model', action='store_true', default='resnet',
                    help='Model to train')
args = parser.parse_args(args=[])

In [3]:
args

Namespace(adam_epsilon=1e-08, alpha=0.0005, batch_size=32, dataset='cifar10', l1=False, l2=False, lr=1e-05, model='resnet', no_cuda=False, num_epochs=10, num_pre_epochs=3, num_re_epochs=3, num_users=1, percent=[0.8, 0.92, 0.991, 0.93], rho=0.01, save_model=False, seed=1, test_batch_size=1000)

In [4]:
classDict = {'T-shirt/top':0, 'Trouser':1, 'Pullover':2, 'Dress':3, 'Coat':4, 'Sandal':5, 'Shirt':6, 'Sneaker':7, 'Bag':8, 'Ankle boot':9}

# Define a function to separate CIFAR classes by class index

def get_class_i(x, y, i):
    """
    x: trainset.train_data or testset.test_data
    y: trainset.train_labels or testset.test_labels
    i: class label, a number between 0 to 9
    return: x_i
    """
    # Convert to a numpy array
    y = np.array(y)
    # Locate position of labels that equal to i
    pos_i = np.argwhere(y == i)
    # Convert the result into a 1-D list
    pos_i = list(pos_i[:,0])
    # Collect all data that match the desired label
#     x_i = [x[j] for j in pos_i]
    
    return pos_i

In [5]:
def create_dict(dataset_train):
    
    frac = int(len(dataset_train.data) * 0.05)
    x_reserve = dataset_train.data[:frac]
    y_reserve = dataset_train.targets[:frac]
    x_train = dataset_train.data[frac:]
    y_train = dataset_train.targets[frac:]
    
    reserved = get_class_i(x_reserve, y_reserve, classDict['T-shirt/top']) \
               +get_class_i(x_reserve, y_reserve, classDict['Trouser']) \
               +get_class_i(x_reserve, y_reserve, classDict['Pullover']) \
               +get_class_i(x_reserve, y_reserve, classDict['Dress']) \
               +get_class_i(x_reserve, y_reserve, classDict['Coat']) \
               +get_class_i(x_reserve, y_reserve, classDict['Sandal']) \
               +get_class_i(x_reserve, y_reserve, classDict['Shirt']) \
               +get_class_i(x_reserve, y_reserve, classDict['Sneaker']) \
               +get_class_i(x_reserve, y_reserve, classDict['Bag']) \
               +get_class_i(x_reserve, y_reserve, classDict['Ankle boot'])

    train1 = get_class_i(x_train, y_train, classDict['T-shirt/top']) \
             +get_class_i(x_train, y_train, classDict['Trouser']) \
             +get_class_i(x_train, y_train, classDict['Pullover'])

    train2 = get_class_i(x_train, y_train, classDict['Dress']) \
             +get_class_i(x_train, y_train, classDict['Coat']) \
             +get_class_i(x_train, y_train, classDict['Sandal'])
    
    train3 = get_class_i(x_train, y_train, classDict['Shirt']) \
             +get_class_i(x_train, y_train, classDict['Sneaker']) \
             +get_class_i(x_train, y_train, classDict['Bag']) \
             +get_class_i(x_train, y_train, classDict['Ankle boot'])
    
    dict_users = {0: set(reserved+train3), 1:set(train1), 2:set(train2)}
    return dict_users

class DatasetSplit(torch.utils.data.Dataset):
    def __init__(self, dataset, idxs):
        self.dataset = dataset
        self.idxs = list(idxs)

    def __len__(self):
        return len(self.idxs)

    def __getitem__(self, item):
        image, label = self.dataset[self.idxs[item]]
        return image, label

In [6]:
gossip = []

def pretrain(args, model, device, train_loader, test_loader, optimizer):
    for epoch in range(args.num_pre_epochs):
        print('Pre epoch: {}'.format(epoch + 1))
        model.train()
        for batch_idx, (data, target) in enumerate(tqdm(train_loader)):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = regularized_nll_loss(args, model, output, target)
            loss.backward()
            optimizer.step()
        test(args, model, device, test_loader)


def train(args, model, device, train_loader, test_loader, optimizer, Z, U, report=False):
    model.train()
    print('Epoch: {}'.format(epoch + 1))
    for batch_idx, (data, target) in enumerate(tqdm(train_loader)):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = admm_loss(args, device, model, Z, U, output, target)
        loss.backward()
        optimizer.step()
    X = update_X(model)
    Z = update_Z_l1(X, U, args) if args.l1 else update_Z(X, U, args)
    U = update_U(U, X, Z)
    print_convergence(model, X, Z)
    test(args, model, device, test_loader, report)


iter = 0
def test(args, model, device, test_loader, report=False):
    model.eval()
    test_loss = 0
    correct = 0
    global iter
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    gossip.append(correct / len(test_loader.dataset))
    
    if report:
#         writer.add_scalar('train/loss_gossip_admm4', test_loss, iter)
#         writer.add_scalar('valid/accuracy_gossip_admm4', correct / len(test_loader.dataset), iter)
        iter+=1


def retrain(args, model, mask, device, train_loader, test_loader, optimizer):
    for epoch in range(args.num_re_epochs):
        print('Re epoch: {}'.format(epoch + 1))
        model.train()
        for batch_idx, (data, target) in enumerate(tqdm(train_loader)):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = F.nll_loss(output, target)
            loss.backward()
            optimizer.prune_step(mask)

        test(args, model, device, test_loader)

In [7]:
### MAIN

# writer = SummaryWriter('../../runs/') 
what_gpu = 2
torch.manual_seed(args.seed)
device = torch.device('cuda:{}'.format(what_gpu) if torch.cuda.is_available() else 'cpu')
kwargs = {'num_workers': 5, 'pin_memory': True}

args.percent = [0.8, 0.92, 0.93, 0.94, 0.95, 0.99, 0.99, 0.93]
args.num_pre_epochs = 3
args.num_epochs = 60
args.num_re_epochs = 5
args.num_users = 3
args.dataset = 'cifar10'
args.model = 'resnet'
args.l1 = True
args.l2 = False
args.lr=0.000001

trainset = datasets.FashionMNIST('../../../data/fmnist', train=True, download=True,
                         transform=transforms.Compose([
                             transforms.ToTensor(),
                             transforms.Normalize([0.5], [0.5])
                         ]))
dict_users = create_dict(trainset)

test_loader = torch.utils.data.DataLoader(
    datasets.FashionMNIST('../../../data/fmnist', train=False, download=True,
                     transform=transforms.Compose([
                         transforms.ToTensor(),
                         transforms.Normalize([0.5], [0.5])
                     ])), shuffle=True, batch_size=args.test_batch_size, **kwargs)

In [8]:
mdlz = dict()
for usr in range(args.num_users):
    train_loader = torch.utils.data.DataLoader(DatasetSplit
                                               (trainset, dict_users[usr]), batch_size=args.batch_size, shuffle=True, **kwargs)
    if args.model == 'resnet':
        model = models.resnet50(pretrained=True)
        model.conv1 = torch.nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        model.to(device)
    else:
        model = LeNet().to(device) if args.dataset == "mnist" else AlexNet().to(device)
    print(model)
    
    
    optimizer = PruneAdam(model.named_parameters(), lr=args.lr, eps=args.adam_epsilon)
    pretrain(args, model, device, train_loader, test_loader, optimizer)
    Z, U = initialize_Z_and_U(model)
    mdlz[usr] = (model, optimizer, Z, U)
    
for epoch in range(args.num_epochs):
    w_locals=[]
    idxs_users = np.random.choice(range(args.num_users), 2, replace=False)
    for usr in idxs_users:
        report = True if usr == 0 else False
        train_loader = torch.utils.data.DataLoader(DatasetSplit(trainset, dict_users[usr]), batch_size=args.batch_size, shuffle=True, **kwargs)
        train(args, mdlz[usr][0], device, train_loader, test_loader, mdlz[usr][1], mdlz[usr][2], mdlz[usr][3], report=report)
        w = mdlz[usr][0].state_dict()
        w_locals.append(copy.deepcopy(w))

    # update global weights
    w_glob = FedAvg(w_locals)

    for idx in idxs_users:
        # copy weight to net_glob
        mdlz[idx][0].load_state_dict(w_glob)

  0%|          | 0/769 [00:00<?, ?it/s]

ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

100%|██████████| 769/769 [00:33<00:00, 22.83it/s]
  0%|          | 0/769 [00:00<?, ?it/s]


Test set: Average loss: -4.4186, Accuracy: 929/10000 (9%)

Pre epoch: 2


100%|██████████| 769/769 [00:38<00:00, 19.99it/s]
  0%|          | 0/769 [00:00<?, ?it/s]


Test set: Average loss: -9.3190, Accuracy: 2907/10000 (29%)

Pre epoch: 3


100%|██████████| 769/769 [00:38<00:00, 20.06it/s]



Test set: Average loss: -14.8684, Accuracy: 4910/10000 (49%)



  0%|          | 0/535 [00:00<?, ?it/s]

ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

100%|██████████| 535/535 [00:26<00:00, 20.00it/s]
  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -2.1130, Accuracy: 129/10000 (1%)

Pre epoch: 2


100%|██████████| 535/535 [00:20<00:00, 25.57it/s]
  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -6.0943, Accuracy: 1436/10000 (14%)

Pre epoch: 3


100%|██████████| 535/535 [00:20<00:00, 26.15it/s]



Test set: Average loss: -10.9022, Accuracy: 2995/10000 (30%)



  0%|          | 0/534 [00:00<?, ?it/s]

ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

100%|██████████| 534/534 [00:18<00:00, 28.30it/s]
  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -2.2652, Accuracy: 195/10000 (2%)

Pre epoch: 2


100%|██████████| 534/534 [00:26<00:00, 19.99it/s]
  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -5.3181, Accuracy: 1317/10000 (13%)

Pre epoch: 3


100%|██████████| 534/534 [00:26<00:00, 20.07it/s]
  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -8.5245, Accuracy: 2846/10000 (28%)

Epoch: 1


100%|██████████| 535/535 [01:04<00:00,  8.33it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.5281
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2305
(layer1.0.conv2.weight): 0.7230
(layer1.0.bn2.weight): 0.2871
(layer1.0.conv3.weight): 0.6949
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4996
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7396
(layer1.1.bn3.weight): 0.3979
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7291
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7577
(layer2.0.bn1.weight): 0.2390
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2984
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2957
(layer2.1.conv1

  0%|          | 0/769 [00:00<?, ?it/s]


Test set: Average loss: -14.6246, Accuracy: 4224/10000 (42%)

Epoch: 1


100%|██████████| 769/769 [01:32<00:00,  8.32it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.5325
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2305
(layer1.0.conv2.weight): 0.7230
(layer1.0.bn2.weight): 0.2871
(layer1.0.conv3.weight): 0.6949
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7396
(layer1.1.bn3.weight): 0.3978
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7291
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7577
(layer2.0.bn1.weight): 0.2390
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2983
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2957
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -21.6537, Accuracy: 6052/10000 (61%)

Epoch: 2


100%|██████████| 535/535 [00:54<00:00,  9.83it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.6660
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2305
(layer1.0.conv2.weight): 0.7230
(layer1.0.bn2.weight): 0.2871
(layer1.0.conv3.weight): 0.6949
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7396
(layer1.1.bn3.weight): 0.3978
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7291
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7577
(layer2.0.bn1.weight): 0.2390
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2984
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2957
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -15.6051, Accuracy: 4262/10000 (43%)

Epoch: 2


100%|██████████| 534/534 [01:04<00:00,  8.24it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.5294
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2304
(layer1.0.conv2.weight): 0.7230
(layer1.0.bn2.weight): 0.2871
(layer1.0.conv3.weight): 0.6949
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4996
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7965
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8113
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7396
(layer1.1.bn3.weight): 0.3979
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7291
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7577
(layer2.0.bn1.weight): 0.2390
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2983
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/769 [00:00<?, ?it/s]


Test set: Average loss: -12.2789, Accuracy: 4158/10000 (42%)

Epoch: 3


100%|██████████| 769/769 [01:24<00:00,  9.05it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.6659
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2305
(layer1.0.conv2.weight): 0.7230
(layer1.0.bn2.weight): 0.2871
(layer1.0.conv3.weight): 0.6949
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7396
(layer1.1.bn3.weight): 0.3978
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7291
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7577
(layer2.0.bn1.weight): 0.2390
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2984
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2957
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -17.9463, Accuracy: 4940/10000 (49%)

Epoch: 3


100%|██████████| 535/535 [01:04<00:00,  8.29it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7443
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2305
(layer1.0.conv2.weight): 0.7230
(layer1.0.bn2.weight): 0.2871
(layer1.0.conv3.weight): 0.6949
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4996
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7396
(layer1.1.bn3.weight): 0.3978
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7291
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7577
(layer2.0.bn1.weight): 0.2390
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2984
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -14.3082, Accuracy: 4424/10000 (44%)

Epoch: 4


100%|██████████| 534/534 [01:05<00:00,  8.20it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7442
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2305
(layer1.0.conv2.weight): 0.7230
(layer1.0.bn2.weight): 0.2871
(layer1.0.conv3.weight): 0.6949
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4996
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7396
(layer1.1.bn3.weight): 0.3978
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7291
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7577
(layer2.0.bn1.weight): 0.2390
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2984
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2957
(layer2.1.conv1

  0%|          | 0/769 [00:00<?, ?it/s]


Test set: Average loss: -13.5903, Accuracy: 4359/10000 (44%)

Epoch: 4


100%|██████████| 769/769 [01:32<00:00,  8.34it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7585
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2305
(layer1.0.conv2.weight): 0.7230
(layer1.0.bn2.weight): 0.2871
(layer1.0.conv3.weight): 0.6949
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7396
(layer1.1.bn3.weight): 0.3978
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7291
(layer1.2.bn3.weight): 0.3184
(layer2.0.conv1.weight): 0.7577
(layer2.0.bn1.weight): 0.2390
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2984
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2957
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -19.7732, Accuracy: 6003/10000 (60%)

Epoch: 5


100%|██████████| 535/535 [01:04<00:00,  8.27it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7586
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2305
(layer1.0.conv2.weight): 0.7230
(layer1.0.bn2.weight): 0.2871
(layer1.0.conv3.weight): 0.6949
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7396
(layer1.1.bn3.weight): 0.3978
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7291
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7577
(layer2.0.bn1.weight): 0.2390
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2984
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2957
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -18.0721, Accuracy: 5550/10000 (56%)

Epoch: 5


100%|██████████| 534/534 [01:04<00:00,  8.25it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7598
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2305
(layer1.0.conv2.weight): 0.7230
(layer1.0.bn2.weight): 0.2871
(layer1.0.conv3.weight): 0.6949
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7396
(layer1.1.bn3.weight): 0.3978
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7291
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7577
(layer2.0.bn1.weight): 0.2390
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2984
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/769 [00:00<?, ?it/s]


Test set: Average loss: -20.8296, Accuracy: 6215/10000 (62%)

Epoch: 6


100%|██████████| 769/769 [01:30<00:00,  8.47it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7597
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4423
(layer1.0.bn1.weight): 0.2305
(layer1.0.conv2.weight): 0.7230
(layer1.0.bn2.weight): 0.2871
(layer1.0.conv3.weight): 0.6949
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7396
(layer1.1.bn3.weight): 0.3978
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7291
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7577
(layer2.0.bn1.weight): 0.2390
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2984
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -23.6677, Accuracy: 6444/10000 (64%)

Epoch: 6


100%|██████████| 534/534 [01:04<00:00,  8.28it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7626
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2305
(layer1.0.conv2.weight): 0.7230
(layer1.0.bn2.weight): 0.2871
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7396
(layer1.1.bn3.weight): 0.3978
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7291
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7577
(layer2.0.bn1.weight): 0.2390
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2984
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -23.0266, Accuracy: 6441/10000 (64%)

Epoch: 7


100%|██████████| 534/534 [01:04<00:00,  8.26it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7618
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4423
(layer1.0.bn1.weight): 0.2305
(layer1.0.conv2.weight): 0.7230
(layer1.0.bn2.weight): 0.2871
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7396
(layer1.1.bn3.weight): 0.3978
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7577
(layer2.0.bn1.weight): 0.2390
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2984
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/769 [00:00<?, ?it/s]


Test set: Average loss: -28.2782, Accuracy: 6904/10000 (69%)

Epoch: 7


100%|██████████| 769/769 [01:31<00:00,  8.37it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7618
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4423
(layer1.0.bn1.weight): 0.2305
(layer1.0.conv2.weight): 0.7230
(layer1.0.bn2.weight): 0.2871
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7396
(layer1.1.bn3.weight): 0.3978
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7291
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7577
(layer2.0.bn1.weight): 0.2390
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2984
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/769 [00:00<?, ?it/s]


Test set: Average loss: -29.2144, Accuracy: 7036/10000 (70%)

Epoch: 8


100%|██████████| 769/769 [01:32<00:00,  8.33it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7618
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4423
(layer1.0.bn1.weight): 0.2305
(layer1.0.conv2.weight): 0.7230
(layer1.0.bn2.weight): 0.2871
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7396
(layer1.1.bn3.weight): 0.3978
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7577
(layer2.0.bn1.weight): 0.2390
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2984
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -32.9975, Accuracy: 7191/10000 (72%)

Epoch: 8


100%|██████████| 535/535 [01:04<00:00,  8.26it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7626
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2305
(layer1.0.conv2.weight): 0.7230
(layer1.0.bn2.weight): 0.2871
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7396
(layer1.1.bn3.weight): 0.3978
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7291
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7577
(layer2.0.bn1.weight): 0.2390
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2984
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/769 [00:00<?, ?it/s]


Test set: Average loss: -23.2817, Accuracy: 6509/10000 (65%)

Epoch: 9


100%|██████████| 769/769 [01:30<00:00,  8.52it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4423
(layer1.0.bn1.weight): 0.2305
(layer1.0.conv2.weight): 0.7230
(layer1.0.bn2.weight): 0.2871
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7396
(layer1.1.bn3.weight): 0.3978
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7577
(layer2.0.bn1.weight): 0.2390
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2984
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -34.0814, Accuracy: 7237/10000 (72%)

Epoch: 9


100%|██████████| 534/534 [01:04<00:00,  8.23it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7618
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4423
(layer1.0.bn1.weight): 0.2305
(layer1.0.conv2.weight): 0.7230
(layer1.0.bn2.weight): 0.2871
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7396
(layer1.1.bn3.weight): 0.3978
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7577
(layer2.0.bn1.weight): 0.2390
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2984
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/769 [00:00<?, ?it/s]


Test set: Average loss: -31.4328, Accuracy: 7134/10000 (71%)

Epoch: 10


100%|██████████| 769/769 [01:32<00:00,  8.29it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7621
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4423
(layer1.0.bn1.weight): 0.2305
(layer1.0.conv2.weight): 0.7230
(layer1.0.bn2.weight): 0.2871
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7396
(layer1.1.bn3.weight): 0.3978
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7577
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2984
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -36.0827, Accuracy: 7330/10000 (73%)

Epoch: 10


100%|██████████| 535/535 [01:03<00:00,  8.40it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7624
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4423
(layer1.0.bn1.weight): 0.2305
(layer1.0.conv2.weight): 0.7230
(layer1.0.bn2.weight): 0.2871
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7396
(layer1.1.bn3.weight): 0.3978
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7577
(layer2.0.bn1.weight): 0.2390
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2984
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -31.4182, Accuracy: 7133/10000 (71%)

Epoch: 11


100%|██████████| 535/535 [01:04<00:00,  8.26it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4423
(layer1.0.bn1.weight): 0.2305
(layer1.0.conv2.weight): 0.7230
(layer1.0.bn2.weight): 0.2871
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7396
(layer1.1.bn3.weight): 0.3978
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7577
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2984
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -35.6121, Accuracy: 7377/10000 (74%)

Epoch: 11


100%|██████████| 534/534 [01:04<00:00,  8.26it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7621
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2305
(layer1.0.conv2.weight): 0.7230
(layer1.0.bn2.weight): 0.2871
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7396
(layer1.1.bn3.weight): 0.3978
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7577
(layer2.0.bn1.weight): 0.2390
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2984
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -33.9199, Accuracy: 7354/10000 (74%)

Epoch: 12


100%|██████████| 535/535 [00:56<00:00,  9.53it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2305
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2871
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7396
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7577
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2984
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -36.0531, Accuracy: 7455/10000 (75%)

Epoch: 12


100%|██████████| 534/534 [01:04<00:00,  8.27it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7622
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4423
(layer1.0.bn1.weight): 0.2305
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2871
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7396
(layer1.1.bn3.weight): 0.3978
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7577
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2984
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -37.2772, Accuracy: 7428/10000 (74%)

Epoch: 13


100%|██████████| 534/534 [00:59<00:00,  8.92it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7622
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2305
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2871
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7396
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7577
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2985
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/769 [00:00<?, ?it/s]


Test set: Average loss: -39.4759, Accuracy: 7524/10000 (75%)

Epoch: 13


100%|██████████| 769/769 [01:32<00:00,  8.29it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7622
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4423
(layer1.0.bn1.weight): 0.2305
(layer1.0.conv2.weight): 0.7230
(layer1.0.bn2.weight): 0.2871
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3978
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7577
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2984
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -37.3677, Accuracy: 7455/10000 (75%)

Epoch: 14


100%|██████████| 534/534 [01:04<00:00,  8.29it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7622
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4423
(layer1.0.bn1.weight): 0.2305
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2871
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7577
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2985
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/769 [00:00<?, ?it/s]


Test set: Average loss: -39.7532, Accuracy: 7531/10000 (75%)

Epoch: 14


100%|██████████| 769/769 [01:29<00:00,  8.57it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7622
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4423
(layer1.0.bn1.weight): 0.2305
(layer1.0.conv2.weight): 0.7230
(layer1.0.bn2.weight): 0.2871
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2985
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -43.0535, Accuracy: 7558/10000 (76%)

Epoch: 15


100%|██████████| 535/535 [01:04<00:00,  8.29it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2305
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2871
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7396
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7577
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2985
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -40.2691, Accuracy: 7474/10000 (75%)

Epoch: 15


100%|██████████| 534/534 [01:04<00:00,  8.30it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7622
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2305
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2871
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2985
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -44.7780, Accuracy: 7573/10000 (76%)

Epoch: 16


100%|██████████| 535/535 [01:03<00:00,  8.39it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2305
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2871
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7577
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2985
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/769 [00:00<?, ?it/s]


Test set: Average loss: -45.5660, Accuracy: 7585/10000 (76%)

Epoch: 16


100%|██████████| 769/769 [01:32<00:00,  8.33it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7622
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4423
(layer1.0.bn1.weight): 0.2305
(layer1.0.conv2.weight): 0.7230
(layer1.0.bn2.weight): 0.2871
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7577
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2985
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/769 [00:00<?, ?it/s]


Test set: Average loss: -43.1774, Accuracy: 7636/10000 (76%)

Epoch: 17


100%|██████████| 769/769 [01:32<00:00,  8.27it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7622
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4423
(layer1.0.bn1.weight): 0.2305
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2985
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -48.6062, Accuracy: 7676/10000 (77%)

Epoch: 17


100%|██████████| 535/535 [01:04<00:00,  8.30it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2305
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7577
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2985
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -47.0819, Accuracy: 7635/10000 (76%)

Epoch: 18


100%|██████████| 535/535 [01:04<00:00,  8.24it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2985
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -50.0364, Accuracy: 7664/10000 (77%)

Epoch: 18


100%|██████████| 534/534 [01:04<00:00,  8.28it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2305
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7577
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2985
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -43.6996, Accuracy: 7597/10000 (76%)

Epoch: 19


100%|██████████| 534/534 [01:03<00:00,  8.40it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2985
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/769 [00:00<?, ?it/s]


Test set: Average loss: -49.2648, Accuracy: 7672/10000 (77%)

Epoch: 19


100%|██████████| 769/769 [01:33<00:00,  8.27it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7622
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2985
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -48.6911, Accuracy: 7724/10000 (77%)

Epoch: 20


100%|██████████| 535/535 [01:02<00:00,  8.55it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2985
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -48.2132, Accuracy: 7680/10000 (77%)

Epoch: 20


100%|██████████| 534/534 [01:04<00:00,  8.27it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7622
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2985
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/769 [00:00<?, ?it/s]


Test set: Average loss: -50.5966, Accuracy: 7766/10000 (78%)

Epoch: 21


100%|██████████| 769/769 [01:32<00:00,  8.31it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7622
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2985
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -51.1813, Accuracy: 7769/10000 (78%)

Epoch: 21


100%|██████████| 535/535 [01:04<00:00,  8.30it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2985
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -50.9523, Accuracy: 7709/10000 (77%)

Epoch: 22


100%|██████████| 535/535 [01:05<00:00,  8.21it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7964
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2985
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/769 [00:00<?, ?it/s]


Test set: Average loss: -52.3897, Accuracy: 7772/10000 (78%)

Epoch: 22


100%|██████████| 769/769 [01:33<00:00,  8.24it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7622
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2985
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -54.0260, Accuracy: 7832/10000 (78%)

Epoch: 23


100%|██████████| 534/534 [01:04<00:00,  8.24it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2985
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -51.3199, Accuracy: 7727/10000 (77%)

Epoch: 23


100%|██████████| 535/535 [01:04<00:00,  8.26it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2985
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -53.6706, Accuracy: 7823/10000 (78%)

Epoch: 24


100%|██████████| 535/535 [01:04<00:00,  8.30it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2985
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -55.7156, Accuracy: 7828/10000 (78%)

Epoch: 24


100%|██████████| 534/534 [01:04<00:00,  8.29it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2985
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -54.7263, Accuracy: 7836/10000 (78%)

Epoch: 25


100%|██████████| 534/534 [01:04<00:00,  8.27it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2985
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -55.7374, Accuracy: 7874/10000 (79%)

Epoch: 25


100%|██████████| 535/535 [01:04<00:00,  8.25it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2985
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -54.9442, Accuracy: 7865/10000 (79%)

Epoch: 26


100%|██████████| 535/535 [01:03<00:00,  8.36it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2985
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/769 [00:00<?, ?it/s]


Test set: Average loss: -55.7768, Accuracy: 7903/10000 (79%)

Epoch: 26


100%|██████████| 769/769 [01:29<00:00,  8.59it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2985
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -55.1047, Accuracy: 7883/10000 (79%)

Epoch: 27


100%|██████████| 534/534 [00:59<00:00,  8.91it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2985
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -57.8657, Accuracy: 7919/10000 (79%)

Epoch: 27


100%|██████████| 535/535 [01:04<00:00,  8.30it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2985
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -57.6834, Accuracy: 7952/10000 (80%)

Epoch: 28


100%|██████████| 535/535 [01:05<00:00,  8.20it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2985
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -57.8524, Accuracy: 7974/10000 (80%)

Epoch: 28


100%|██████████| 534/534 [01:04<00:00,  8.25it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2985
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/769 [00:00<?, ?it/s]


Test set: Average loss: -59.2677, Accuracy: 8055/10000 (81%)

Epoch: 29


100%|██████████| 769/769 [01:32<00:00,  8.27it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8407
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2985
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -57.9181, Accuracy: 7957/10000 (80%)

Epoch: 29


100%|██████████| 534/534 [00:59<00:00,  8.99it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -60.6060, Accuracy: 8013/10000 (80%)

Epoch: 30


100%|██████████| 535/535 [01:04<00:00,  8.34it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2985
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -60.4908, Accuracy: 8078/10000 (81%)

Epoch: 30


100%|██████████| 534/534 [01:04<00:00,  8.30it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2985
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -59.9686, Accuracy: 8064/10000 (81%)

Epoch: 31


100%|██████████| 534/534 [01:04<00:00,  8.22it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/769 [00:00<?, ?it/s]


Test set: Average loss: -60.2938, Accuracy: 8112/10000 (81%)

Epoch: 31


100%|██████████| 769/769 [01:32<00:00,  8.31it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -61.1938, Accuracy: 8025/10000 (80%)

Epoch: 32


100%|██████████| 534/534 [01:04<00:00,  8.28it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -61.1341, Accuracy: 8113/10000 (81%)

Epoch: 32


100%|██████████| 535/535 [01:04<00:00,  8.27it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2985
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/769 [00:00<?, ?it/s]


Test set: Average loss: -60.4714, Accuracy: 8102/10000 (81%)

Epoch: 33


100%|██████████| 769/769 [01:32<00:00,  8.30it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -64.0230, Accuracy: 8123/10000 (81%)

Epoch: 33


100%|██████████| 534/534 [01:04<00:00,  8.22it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/769 [00:00<?, ?it/s]


Test set: Average loss: -62.3975, Accuracy: 8144/10000 (81%)

Epoch: 34


100%|██████████| 769/769 [01:32<00:00,  8.28it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -63.3655, Accuracy: 8180/10000 (82%)

Epoch: 34


100%|██████████| 534/534 [01:04<00:00,  8.26it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/769 [00:00<?, ?it/s]


Test set: Average loss: -63.7591, Accuracy: 8175/10000 (82%)

Epoch: 35


100%|██████████| 769/769 [01:33<00:00,  8.26it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -66.2146, Accuracy: 8205/10000 (82%)

Epoch: 35


100%|██████████| 534/534 [01:03<00:00,  8.34it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -65.2472, Accuracy: 8179/10000 (82%)

Epoch: 36


100%|██████████| 535/535 [01:04<00:00,  8.24it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3977
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7292
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -62.3174, Accuracy: 8128/10000 (81%)

Epoch: 36


100%|██████████| 534/534 [01:03<00:00,  8.37it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -66.7910, Accuracy: 8235/10000 (82%)

Epoch: 37


100%|██████████| 534/534 [01:04<00:00,  8.27it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/769 [00:00<?, ?it/s]


Test set: Average loss: -67.0955, Accuracy: 8209/10000 (82%)

Epoch: 37


100%|██████████| 769/769 [01:32<00:00,  8.33it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -66.9135, Accuracy: 8218/10000 (82%)

Epoch: 38


100%|██████████| 535/535 [01:04<00:00,  8.30it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -66.7350, Accuracy: 8196/10000 (82%)

Epoch: 38


100%|██████████| 534/534 [01:04<00:00,  8.33it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -69.1552, Accuracy: 8227/10000 (82%)

Epoch: 39


100%|██████████| 534/534 [00:51<00:00, 10.41it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -66.8872, Accuracy: 8221/10000 (82%)

Epoch: 39


100%|██████████| 535/535 [01:04<00:00,  8.30it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/769 [00:00<?, ?it/s]


Test set: Average loss: -68.4376, Accuracy: 8232/10000 (82%)

Epoch: 40


100%|██████████| 769/769 [01:32<00:00,  8.34it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7681
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -67.2461, Accuracy: 8265/10000 (83%)

Epoch: 40


100%|██████████| 535/535 [00:54<00:00,  9.91it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7624
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7232
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -68.9980, Accuracy: 8271/10000 (83%)

Epoch: 41


100%|██████████| 535/535 [01:03<00:00,  8.41it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/769 [00:00<?, ?it/s]


Test set: Average loss: -69.7083, Accuracy: 8256/10000 (83%)

Epoch: 41


100%|██████████| 769/769 [01:32<00:00,  8.34it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -68.7812, Accuracy: 8263/10000 (83%)

Epoch: 42


100%|██████████| 535/535 [01:04<00:00,  8.33it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1950
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/769 [00:00<?, ?it/s]


Test set: Average loss: -70.9834, Accuracy: 8289/10000 (83%)

Epoch: 42


100%|██████████| 769/769 [01:32<00:00,  8.31it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1950
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -71.3269, Accuracy: 8277/10000 (83%)

Epoch: 43


100%|██████████| 534/534 [01:04<00:00,  8.34it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7232
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1949
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -70.9792, Accuracy: 8257/10000 (83%)

Epoch: 43


100%|██████████| 535/535 [01:04<00:00,  8.31it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1950
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/769 [00:00<?, ?it/s]


Test set: Average loss: -71.4025, Accuracy: 8274/10000 (83%)

Epoch: 44


100%|██████████| 769/769 [01:32<00:00,  8.31it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7231
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1950
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -71.9672, Accuracy: 8299/10000 (83%)

Epoch: 44


100%|██████████| 534/534 [01:04<00:00,  8.27it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7232
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1950
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/769 [00:00<?, ?it/s]


Test set: Average loss: -72.1018, Accuracy: 8295/10000 (83%)

Epoch: 45


100%|██████████| 769/769 [01:31<00:00,  8.36it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7232
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1950
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -72.4301, Accuracy: 8328/10000 (83%)

Epoch: 45


100%|██████████| 535/535 [01:03<00:00,  8.37it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7232
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1950
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -70.5873, Accuracy: 8260/10000 (83%)

Epoch: 46


100%|██████████| 535/535 [01:04<00:00,  8.31it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7232
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1950
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/769 [00:00<?, ?it/s]


Test set: Average loss: -75.8172, Accuracy: 8302/10000 (83%)

Epoch: 46


100%|██████████| 769/769 [01:32<00:00,  8.31it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7232
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1950
(layer1.1.conv1.weight): 0.7962
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -73.9735, Accuracy: 8318/10000 (83%)

Epoch: 47


100%|██████████| 535/535 [00:54<00:00,  9.89it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7232
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1950
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -74.8990, Accuracy: 8326/10000 (83%)

Epoch: 47


100%|██████████| 534/534 [01:03<00:00,  8.35it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7232
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1950
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -71.2840, Accuracy: 8330/10000 (83%)

Epoch: 48


100%|██████████| 534/534 [01:03<00:00,  8.41it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7232
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1950
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -75.9783, Accuracy: 8339/10000 (83%)

Epoch: 48


100%|██████████| 535/535 [01:04<00:00,  8.30it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7232
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1950
(layer1.1.conv1.weight): 0.7963
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3184
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9238
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -74.7264, Accuracy: 8340/10000 (83%)

Epoch: 49


100%|██████████| 534/534 [01:04<00:00,  8.26it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7232
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1950
(layer1.1.conv1.weight): 0.7962
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3184
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9238
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -75.5300, Accuracy: 8344/10000 (83%)

Epoch: 49


100%|██████████| 535/535 [01:03<00:00,  8.39it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7232
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1950
(layer1.1.conv1.weight): 0.7962
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3184
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9238
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -77.1564, Accuracy: 8349/10000 (83%)

Epoch: 50


100%|██████████| 534/534 [01:03<00:00,  8.37it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7232
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1950
(layer1.1.conv1.weight): 0.7962
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3184
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9238
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -75.7390, Accuracy: 8353/10000 (84%)

Epoch: 50


100%|██████████| 535/535 [01:04<00:00,  8.26it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7232
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1950
(layer1.1.conv1.weight): 0.7962
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9238
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -77.0166, Accuracy: 8336/10000 (83%)

Epoch: 51


100%|██████████| 535/535 [01:03<00:00,  8.38it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7624
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7232
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1950
(layer1.1.conv1.weight): 0.7962
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9238
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/769 [00:00<?, ?it/s]


Test set: Average loss: -79.2351, Accuracy: 8358/10000 (84%)

Epoch: 51


100%|██████████| 769/769 [01:32<00:00,  8.30it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7232
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1950
(layer1.1.conv1.weight): 0.7962
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8114
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9237
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -75.7326, Accuracy: 8358/10000 (84%)

Epoch: 52


100%|██████████| 534/534 [01:03<00:00,  8.35it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7232
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1950
(layer1.1.conv1.weight): 0.7962
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8115
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3184
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9238
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/769 [00:00<?, ?it/s]


Test set: Average loss: -76.5254, Accuracy: 8372/10000 (84%)

Epoch: 52


100%|██████████| 769/769 [01:31<00:00,  8.37it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7232
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1950
(layer1.1.conv1.weight): 0.7962
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8115
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9238
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -77.0349, Accuracy: 8355/10000 (84%)

Epoch: 53


100%|██████████| 534/534 [01:04<00:00,  8.31it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7232
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1950
(layer1.1.conv1.weight): 0.7962
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8115
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3184
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9238
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -78.4978, Accuracy: 8374/10000 (84%)

Epoch: 53


100%|██████████| 535/535 [01:04<00:00,  8.30it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7232
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1950
(layer1.1.conv1.weight): 0.7962
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8115
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9238
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/769 [00:00<?, ?it/s]


Test set: Average loss: -76.8830, Accuracy: 8360/10000 (84%)

Epoch: 54


100%|██████████| 769/769 [01:32<00:00,  8.30it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7232
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1950
(layer1.1.conv1.weight): 0.7962
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8115
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3184
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9238
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -79.1798, Accuracy: 8400/10000 (84%)

Epoch: 54


100%|██████████| 534/534 [01:04<00:00,  8.29it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7232
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1950
(layer1.1.conv1.weight): 0.7962
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8115
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3184
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9238
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -78.5140, Accuracy: 8388/10000 (84%)

Epoch: 55


100%|██████████| 534/534 [01:04<00:00,  8.31it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7232
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1950
(layer1.1.conv1.weight): 0.7962
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8115
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3184
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9238
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -80.1788, Accuracy: 8384/10000 (84%)

Epoch: 55


100%|██████████| 535/535 [01:03<00:00,  8.36it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7624
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7232
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1950
(layer1.1.conv1.weight): 0.7962
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8115
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9238
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -79.0009, Accuracy: 8382/10000 (84%)

Epoch: 56


100%|██████████| 535/535 [01:04<00:00,  8.30it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7624
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7232
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1950
(layer1.1.conv1.weight): 0.7962
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8115
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3184
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9238
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -81.6797, Accuracy: 8400/10000 (84%)

Epoch: 56


100%|██████████| 534/534 [01:04<00:00,  8.26it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7232
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1950
(layer1.1.conv1.weight): 0.7962
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8115
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3184
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9238
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -81.0275, Accuracy: 8395/10000 (84%)

Epoch: 57


100%|██████████| 535/535 [01:04<00:00,  8.29it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7624
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7232
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1950
(layer1.1.conv1.weight): 0.7962
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8115
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3975
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3184
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9238
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -83.1422, Accuracy: 8392/10000 (84%)

Epoch: 57


100%|██████████| 534/534 [01:04<00:00,  8.31it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4425
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7232
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1950
(layer1.1.conv1.weight): 0.7962
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8115
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3975
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3184
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9238
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -82.2865, Accuracy: 8404/10000 (84%)

Epoch: 58


100%|██████████| 535/535 [01:03<00:00,  8.47it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7624
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4425
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7232
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1950
(layer1.1.conv1.weight): 0.7962
(layer1.1.bn1.weight): 0.2469
(layer1.1.conv2.weight): 0.8115
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3975
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9238
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -82.6228, Accuracy: 8398/10000 (84%)

Epoch: 58


100%|██████████| 534/534 [01:04<00:00,  8.30it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4425
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7232
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1950
(layer1.1.conv1.weight): 0.7962
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8115
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3975
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3184
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9238
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/769 [00:00<?, ?it/s]


Test set: Average loss: -83.3149, Accuracy: 8441/10000 (84%)

Epoch: 59


100%|██████████| 769/769 [01:32<00:00,  8.31it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4424
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7232
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1950
(layer1.1.conv1.weight): 0.7962
(layer1.1.bn1.weight): 0.2468
(layer1.1.conv2.weight): 0.8115
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3976
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3184
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9238
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/535 [00:00<?, ?it/s]


Test set: Average loss: -80.8656, Accuracy: 8394/10000 (84%)

Epoch: 59


100%|██████████| 535/535 [01:04<00:00,  8.32it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7624
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4425
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7232
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1950
(layer1.1.conv1.weight): 0.7962
(layer1.1.bn1.weight): 0.2469
(layer1.1.conv2.weight): 0.8115
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3975
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9238
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/534 [00:00<?, ?it/s]


Test set: Average loss: -82.7721, Accuracy: 8424/10000 (84%)

Epoch: 60


100%|██████████| 534/534 [01:04<00:00,  8.31it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7624
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4425
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7232
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1950
(layer1.1.conv1.weight): 0.7962
(layer1.1.bn1.weight): 0.2469
(layer1.1.conv2.weight): 0.8115
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3975
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3184
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9238
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

  0%|          | 0/769 [00:00<?, ?it/s]


Test set: Average loss: -83.7763, Accuracy: 8438/10000 (84%)

Epoch: 60


100%|██████████| 769/769 [01:30<00:00,  8.45it/s]


normalized norm of (weight - projection)
(conv1.weight): 0.7623
(bn1.weight): 0.1829
(layer1.0.conv1.weight): 0.4425
(layer1.0.bn1.weight): 0.2306
(layer1.0.conv2.weight): 0.7232
(layer1.0.bn2.weight): 0.2870
(layer1.0.conv3.weight): 0.6950
(layer1.0.bn3.weight): 0.2689
(layer1.0.downsample.0.weight): 0.4995
(layer1.0.downsample.1.weight): 0.1950
(layer1.1.conv1.weight): 0.7962
(layer1.1.bn1.weight): 0.2469
(layer1.1.conv2.weight): 0.8115
(layer1.1.bn2.weight): 0.2666
(layer1.1.conv3.weight): 0.7397
(layer1.1.bn3.weight): 0.3975
(layer1.2.conv1.weight): 0.8406
(layer1.2.bn1.weight): 0.2705
(layer1.2.conv2.weight): 0.8410
(layer1.2.bn2.weight): 0.2239
(layer1.2.conv3.weight): 0.7293
(layer1.2.bn3.weight): 0.3183
(layer2.0.conv1.weight): 0.7578
(layer2.0.bn1.weight): 0.2389
(layer2.0.conv2.weight): 0.9238
(layer2.0.bn2.weight): 0.2451
(layer2.0.conv3.weight): 0.7766
(layer2.0.bn3.weight): 0.2986
(layer2.0.downsample.0.weight): 0.7682
(layer2.0.downsample.1.weight): 0.2958
(layer2.1.conv1

In [11]:
gossip

[0.0929,
 0.2907,
 0.491,
 0.0129,
 0.1436,
 0.2995,
 0.0195,
 0.1317,
 0.2846,
 0.4224,
 0.6052,
 0.4262,
 0.4158,
 0.494,
 0.4424,
 0.4359,
 0.6003,
 0.555,
 0.6215,
 0.6444,
 0.6441,
 0.6904,
 0.7036,
 0.7191,
 0.6509,
 0.7237,
 0.7134,
 0.733,
 0.7133,
 0.7377,
 0.7354,
 0.7455,
 0.7428,
 0.7524,
 0.7455,
 0.7531,
 0.7558,
 0.7474,
 0.7573,
 0.7585,
 0.7636,
 0.7676,
 0.7635,
 0.7664,
 0.7597,
 0.7672,
 0.7724,
 0.768,
 0.7766,
 0.7769,
 0.7709,
 0.7772,
 0.7832,
 0.7727,
 0.7823,
 0.7828,
 0.7836,
 0.7874,
 0.7865,
 0.7903,
 0.7883,
 0.7919,
 0.7952,
 0.7974,
 0.8055,
 0.7957,
 0.8013,
 0.8078,
 0.8064,
 0.8112,
 0.8025,
 0.8113,
 0.8102,
 0.8123,
 0.8144,
 0.818,
 0.8175,
 0.8205,
 0.8179,
 0.8128,
 0.8235,
 0.8209,
 0.8218,
 0.8196,
 0.8227,
 0.8221,
 0.8232,
 0.8265,
 0.8271,
 0.8256,
 0.8263,
 0.8289,
 0.8277,
 0.8257,
 0.8274,
 0.8299,
 0.8295,
 0.8328,
 0.826,
 0.8302,
 0.8318,
 0.8326,
 0.833,
 0.8339,
 0.834,
 0.8344,
 0.8349,
 0.8353,
 0.8336,
 0.8358,
 0.8358,
 0.8372,
 

In [None]:
### Apply pruning
for usr in range(args.num_users):
    mask = apply_l1_prune(mdlz[usr][0], device, args) if args.l1 else apply_prune(mdlz[usr][0], device, args)
    print_prune(model)
    test(args, mdlz[usr][0], device, test_loader)
    retrain(args, model, mask, device, train_loader, test_loader, optimizer)

# Testing ground

In [12]:
from torch.utils.data import DataLoader
import torch.nn.functional as F

confusion_matrix = torch.zeros(10, 10)
mdlz[0][0].eval()
# testing
test_loss = 0
correct = 0

l = len(test_loader)
for idx, (data, target) in enumerate(test_loader):
    data, target = data.to(device), target.to(device)
    log_probs = mdlz[0][0](data)
    # sum up batch loss
    test_loss += F.cross_entropy(log_probs, target, reduction='sum').item()
    # get the index of the max log-probability
    y_pred = log_probs.data.max(1, keepdim=True)[1]
    correct += y_pred.eq(target.data.view_as(y_pred)).long().cpu().sum()

    for t, p in zip(target.data, y_pred.view(-1)):
                confusion_matrix[t.long(), p.long()] += 1

In [10]:
print(confusion_matrix)

tensor([[860.,   3.,  20.,  43.,   7.,   3.,  50.,   0.,  14.,   0.],
        [  2., 962.,   3.,  22.,   4.,   0.,   5.,   0.,   2.,   0.],
        [ 14.,   1., 785.,  13.,  97.,   0.,  84.,   0.,   6.,   0.],
        [ 35.,  10.,  26., 865.,  31.,   0.,  29.,   1.,   2.,   1.],
        [  1.,   0., 124.,  50., 722.,   0.,  96.,   0.,   7.,   0.],
        [  0.,   0.,   0.,   1.,   0., 944.,   0.,  40.,   3.,  12.],
        [240.,   3., 140.,  43.,  87.,   0., 460.,   0.,  27.,   0.],
        [  0.,   0.,   0.,   0.,   0.,  23.,   0., 944.,   0.,  33.],
        [  4.,   0.,   6.,   4.,   6.,   1.,   7.,   5., 966.,   1.],
        [  0.,   0.,   0.,   0.,   0.,  10.,   1.,  49.,   0., 940.]])


In [None]:
gossip

In [10]:
gossip

[0.0932,
 0.295,
 0.4966,
 0.0125,
 0.1432,
 0.301,
 0.0198,
 0.1304,
 0.2857,
 0.4215,
 0.4188,
 0.3846,
 0.3813,
 0.4863,
 0.6131,
 0.5198,
 0.6044,
 0.6679,
 0.65,
 0.7022,
 0.6875,
 0.7274,
 0.4848,
 0.6652,
 0.6929,
 0.7151,
 0.7121,
 0.7002,
 0.7335,
 0.7398,
 0.7255,
 0.7315,
 0.7441,
 0.75,
 0.7431,
 0.7566,
 0.7392,
 0.7572,
 0.7532,
 0.7602,
 0.764,
 0.7622,
 0.7666,
 0.7704,
 0.7613,
 0.7665,
 0.7698,
 0.7753,
 0.7722,
 0.7781,
 0.781,
 0.7833,
 0.7825,
 0.7905,
 0.772,
 0.7873,
 0.7896,
 0.7882,
 0.7839,
 0.7961,
 0.7862,
 0.7978,
 0.7997,
 0.8043,
 0.8007,
 0.8064,
 0.8105,
 0.8019,
 0.8099,
 0.8079,
 0.8136,
 0.8074,
 0.8174,
 0.8161,
 0.8125,
 0.8216,
 0.8183,
 0.8221,
 0.8197,
 0.8231,
 0.8249,
 0.8276,
 0.8272,
 0.8289,
 0.8165,
 0.8251,
 0.8255,
 0.8272,
 0.8248,
 0.8264,
 0.8306,
 0.8303,
 0.829,
 0.8325,
 0.8307,
 0.8334,
 0.8322,
 0.8347,
 0.8333,
 0.8342,
 0.8373,
 0.8361,
 0.8375,
 0.84,
 0.8391,
 0.8419,
 0.8394,
 0.8397,
 0.8299,
 0.8382,
 0.8354,
 0.8392,
 0.8