In [1]:
import os
import sys
sys.path.insert(0, '../src_adv/')

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
mnist_root = '../data/'
# Training dataset
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST(root=mnist_root, train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                   ])), batch_size=64, shuffle=True, num_workers=8)
# Test dataset
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST(root=mnist_root, train=False, transform=transforms.Compose([
        transforms.ToTensor(),
    ])), batch_size=64, shuffle=True, num_workers=4)

fgsm_loader = torch.utils.data.DataLoader(
    datasets.MNIST(root=mnist_root, train=False, transform=transforms.Compose([
        transforms.ToTensor(),
    ])), batch_size=1, shuffle=True, num_workers=4)

print(len(train_loader), len(test_loader))

from utils import train, test, train_pgd, train_pgd_plus
# from utils_fix import train, test, train_pgd
from functools import partial
train_mnist_adv = partial(train_pgd, train_loader=train_loader, eps=0.1, niter=10, alpha=0.004)
test_mnist = partial(test, data_loader=test_loader)

938 157


In [3]:
# lr decay every 15 epoch
def Pipeline(sdir, model, optimizer, name='', nepoch=50, bep=1, patience=15, 
             tloss=0.1, tacc=99, crit=nn.CrossEntropyLoss()):
    if not os.path.isdir(sdir):
        os.makedirs(sdir)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=patience)
    for epoch in range(bep, nepoch+bep):
        train_mnist_adv(model, epoch=epoch, optimizer=optimizer, crit=crit)
        vloss,vacc = test_mnist(model,test_crit=crit)
        scheduler.step()
        if tloss>vloss or vacc>tacc:    # save best model
            torch.save(model.state_dict(), '%s/%s_ep-%02d_vloss-%.4f_vacc-%.2f.pt' % (sdir, name, epoch, vloss, vacc))
            tloss = vloss; tacc = vacc
    return tloss, tacc

In [4]:
from torch._jit_internal import weak_module, weak_script_method, List

@weak_module
class condNorm(nn.Module):
    def __init__(self, planes=64, std=1.0, minl=1.0, eps=1e-5):
        super().__init__()
        self.planes = planes
        self.std = std
        self.minl = minl
        self.eps = eps
    
    def forward(self, x):
        # x: N x C x H x W,  4D Tensor
        x = x - x.mean([2,3], keepdim=True)
        norm = torch.sqrt(torch.mean(x**2, dim=[2,3], keepdim=True) + self.eps)
        x = self.std * x / torch.max(norm, self.minl*torch.ones_like(norm))
        return x
    
from torch.autograd import Variable
import torch.optim as optim
%matplotlib inline

def convert_np(inp, group=8):
    inp = inp.detach().cpu().squeeze()
    n, h, w = inp.shape
    t = n // group
    g = group
    out = [inp[i:i+g].view(h*g,w) for i in range(0, n, g)]
    out = torch.cat(out, -1)
    out = np.clip(out.detach().cpu().numpy(), 0, 1)
    return out


def show_batch(inp1, inp2, dpi=130):
    out1 = convert_np(inp1)
    out2 = convert_np(inp2)
    fig, ax = plt.subplots(1,2, dpi=dpi)
    ax[0].imshow(out1, cmap='gray')
    ax[1].imshow(out2, cmap='gray')
    plt.show()
    

def advs_pgd(model, X, y, epsilon, niters=100, alpha=0.01): 
    out = model(X)
    ce = nn.CrossEntropyLoss()(out, y)
    err = (out.data.max(1)[1] != y.data).float().sum()  / X.size(0)

    X_pgd = Variable(X.data, requires_grad=True)
    for i in range(niters): 
        opt = optim.Adam([X_pgd], lr=1e-3)
        opt.zero_grad()
        loss = nn.CrossEntropyLoss()(model(X_pgd), y)
        loss.backward()
        eta = alpha*X_pgd.grad.data.sign()
        X_pgd = Variable(X_pgd.data + eta, requires_grad=True)
        
        # adjust to be within [-epsilon, epsilon]
        eta = torch.clamp(X_pgd.data - X.data, -epsilon, epsilon)
        X_pgd = Variable(X.data + eta, requires_grad=True)
        
    err_pgd = (model(X_pgd).data.max(1)[1] != y.data).float().sum() / X.size(0)
    print("err: %.2f%% %.2f%%" % (err*100, err_pgd*100))
    return X_pgd

# Test Time

## pure conv

In [5]:
from sim_model.res_free import ResNet18

model = ResNet18().to(device)
print('Number of Parameters: ', sum([p.numel() for p in model.parameters()]))
print('Training Parameters: ', sum([p.numel() for p in model.parameters() if p.requires_grad]))
model

Number of Parameters:  11163210
Training Parameters:  11163210


In [None]:
sdir = './runs/tmp_time_test'
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
tloss, tacc = Pipeline(sdir, model, optimizer, 'res18_free', 45)

                                                   
Test set: Avg loss: 0.0528 |  Accuracy: 98.46 | 

                                                   
Test set: Avg loss: 0.0503 |  Accuracy: 98.75 | 

step: 0026/0938 loss: 0.0746 |

In [None]:
sdir = './runs/tmp_time_test'
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
tloss, tacc = Pipeline(sdir, model, optimizer, 'res18_free', 45)

                                                   
Test set: Avg loss: 0.0449 |  Accuracy: 98.58 | 

                                                   
Test set: Avg loss: 0.0311 |  Accuracy: 99.06 | 

step: 0031/0938 loss: 0.0226 |

## +bn 

In [5]:
from sim_model.res_new import ResNet18

model = ResNet18().to(device)  # BN default
print('Number of Parameters: ', sum([p.numel() for p in model.parameters()]))
print('Training Parameters: ', sum([p.numel() for p in model.parameters() if p.requires_grad]))
model

Number of Parameters:  11172682
Training Parameters:  11172682


ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
  )
  (layer2): Sequ

In [None]:
sdir = './runs/tmp_time_test'
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
tloss, tacc = Pipeline(sdir, model, optimizer, 'res18_bn', 45)

                                                   
Test set: Avg loss: 0.0525 |  Accuracy: 98.32 | 

                                                   
Test set: Avg loss: 0.0370 |  Accuracy: 98.86 | 

                                                   
Test set: Avg loss: 0.0430 |  Accuracy: 98.77 | 

                                                   
Test set: Avg loss: 0.0480 |  Accuracy: 98.56 | 

                                                   
Test set: Avg loss: 0.0245 |  Accuracy: 99.27 | 

step: 0160/0938 loss: 0.0846 |

In [None]:
sdir = './runs/tmp_time_test'
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
tloss, tacc = Pipeline(sdir, model, optimizer, 'res18_bn', 45)

                                                   
Test set: Avg loss: 0.0525 |  Accuracy: 98.32 | 

                                                   
Test set: Avg loss: 0.0370 |  Accuracy: 98.86 | 

                                                   
Test set: Avg loss: 0.0430 |  Accuracy: 98.77 | 

                                                   
Test set: Avg loss: 0.0480 |  Accuracy: 98.56 | 

                                                   
Test set: Avg loss: 0.0245 |  Accuracy: 99.27 | 

step: 0160/0938 loss: 0.0846 |

## +cn 

In [5]:
from sim_model.res_new import ResNet18

# @weak_module
class condNorm(nn.Module):
    def __init__(self, planes=64, std=1.0, minl=1.0, eps=1e-5):
        super().__init__()
        self.planes = planes
        self.std = std
        self.minl = minl
        self.eps = eps
    
    def forward(self, x):
        # x: N x C x H x W,  4D Tensor
        x = x - x.mean([0,2,3], keepdim=True)
        norm = torch.sqrt(torch.mean(x**2, dim=[0,2,3], keepdim=True) + self.eps)
        x = self.std * x / torch.max(norm, self.minl*torch.ones_like(norm))
        return x
    
model = ResNet18(normlayer=condNorm).to(device)  # BN default
print('Number of Parameters: ', sum([p.numel() for p in model.parameters()]))
print('Training Parameters: ', sum([p.numel() for p in model.parameters() if p.requires_grad]))
model

Number of Parameters:  11165002
Training Parameters:  11165002


ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): condNorm()
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): condNorm()
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): condNorm()
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): condNorm()
      (shortcut): Sequential()
    )
  )
  (layer2): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn1): condNorm()
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): condNorm()
      (sho

In [None]:
sdir = './runs/tmp_time_test'
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
tloss, tacc = Pipeline(sdir, model, optimizer, 'res18_bcn', 45)

                                                   
Test set: Avg loss: 0.0607 |  Accuracy: 98.15 | 

                                                   
Test set: Avg loss: 0.0364 |  Accuracy: 98.99 | 

                                                   
Test set: Avg loss: 0.0377 |  Accuracy: 98.73 | 

                                                   
Test set: Avg loss: 0.0402 |  Accuracy: 98.73 | 

step: 0058/0938 loss: 0.0139 |

# fix inplace +bcn(cuda update) 

In [5]:
from sim_model.res_new_fix_inplace import ResNet18
from bcn import TCondNorm

model = ResNet18(normlayer=TCondNorm).to(device)  # BN default
print('Number of Parameters: ', sum([p.numel() for p in model.parameters()]))
print('Training Parameters: ', sum([p.numel() for p in model.parameters() if p.requires_grad]))
model

Number of Parameters:  11172682
Training Parameters:  11172682


ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): TCondNorm(planes=64, minl=1.0, epsilon=1e-05)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): TCondNorm(planes=64, minl=1.0, epsilon=1e-05)
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): TCondNorm(planes=64, minl=1.0, epsilon=1e-05)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): TCondNorm(planes=64, minl=1.0, epsilon=1e-05)
      (shortcut): Sequential()
    )
  )
  (layer2): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn1): 

In [6]:
sdir = './runs/tmp_time_test'
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
tloss, tacc = Pipeline(sdir, model, optimizer, 'res18_bcn_ext1', 4)

                                                   
Test set: Avg loss: 0.0492 |  Accuracy: 98.47 | 

                                                   
Test set: Avg loss: 0.0415 |  Accuracy: 98.66 | 

                                                   
Test set: Avg loss: 0.0377 |  Accuracy: 98.91 | 

                                                   
Test set: Avg loss: 0.0312 |  Accuracy: 99.13 | 



In [6]:
# before: 253-256
sdir = './runs/tmp_time_test'
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
tloss, tacc = Pipeline(sdir, model, optimizer, 'res18_bcn_ext1', 20)

                                                   
Test set: Avg loss: 0.0586 |  Accuracy: 98.23 | 

                                                   
Test set: Avg loss: 0.0464 |  Accuracy: 98.59 | 

                                                   
Test set: Avg loss: 0.0622 |  Accuracy: 98.32 | 

                                                   
Test set: Avg loss: 0.0327 |  Accuracy: 99.00 | 

                                                   
Test set: Avg loss: 0.0475 |  Accuracy: 98.52 | 

                                                   
Test set: Avg loss: 0.0261 |  Accuracy: 99.31 | 

                                                   
Test set: Avg loss: 0.0270 |  Accuracy: 99.19 | 

                                                   
Test set: Avg loss: 0.0331 |  Accuracy: 99.07 | 

                                                   
Test set: Avg loss: 0.0255 |  Accuracy: 99.16 | 

                                                   
Test set: Avg loss: 0.0214 |  

# more layer

In [5]:
# resnet34
from sim_model.res_free import ResNet34
from bcn import TCondNorm

model = ResNet34().to(device)
print('Number of Parameters: ', sum([p.numel() for p in model.parameters()]))
print('Training Parameters: ', sum([p.numel() for p in model.parameters() if p.requires_grad]))
model

Number of Parameters:  21263946
Training Parameters:  21263946


ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (shortcut): Sequential()
    )
    (2): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (shortcut): Sequential()
    )
  )
  (layer2): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
     

In [None]:
sdir = './runs/tmp_time_test'
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
tloss, tacc = Pipeline(sdir, model, optimizer, 'res34_pure', 45)

                                                   
Test set: Avg loss: 0.0564 |  Accuracy: 98.56 | 

                                                   
Test set: Avg loss: 0.0514 |  Accuracy: 98.72 | 

step: 0010/0938 loss: 0.0206 |

In [None]:
# bn
from sim_model.res_new_fix_inplace import ResNet34
from bcn import TCondNorm

model = ResNet34().to(device)
print('Number of Parameters: ', sum([p.numel() for p in model.parameters()]))
print(model)
sdir = './runs/tmp_time_test'
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
tloss, tacc = Pipeline(sdir, model, optimizer, 'res34_bn', 4)

Number of Parameters:  21280842
ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequent

                                                   
Test set: Avg loss: 0.0577 |  Accuracy: 98.25 | 

step: 0107/0938 loss: 0.0161 |

In [5]:
# bcn
from sim_model.res_new_fix_inplace import ResNet34
from bcn import TCondNorm

model = ResNet34(normlayer=TCondNorm).to(device)
print('Number of Parameters: ', sum([p.numel() for p in model.parameters()]))
print(model)
sdir = './runs/tmp_time_test'
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
tloss, tacc = Pipeline(sdir, model, optimizer, 'res34_bcn_ext1', 20)

Number of Parameters:  21280842
ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): TCondNorm(planes=64, minl=1.0, epsilon=1e-05)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): TCondNorm(planes=64, minl=1.0, epsilon=1e-05)
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): TCondNorm(planes=64, minl=1.0, epsilon=1e-05)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): TCondNorm(planes=64, minl=1.0, epsilon=1e-05)
      (shortcut): Sequential()
    )
    (2): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1

                                                   
Test set: Avg loss: 0.0336 |  Accuracy: 98.96 | 

                                                   
Test set: Avg loss: 0.0267 |  Accuracy: 99.15 | 

                                                   
Test set: Avg loss: 0.0227 |  Accuracy: 99.25 | 

                                                   
Test set: Avg loss: 0.0208 |  Accuracy: 99.40 | 

                                                   
Test set: Avg loss: 0.0241 |  Accuracy: 99.30 | 

                                                   
Test set: Avg loss: 0.0199 |  Accuracy: 99.39 | 

                                                   
Test set: Avg loss: 0.0258 |  Accuracy: 99.17 | 

                                                   
Test set: Avg loss: 0.0204 |  Accuracy: 99.35 | 

                                                   
Test set: Avg loss: 0.0259 |  Accuracy: 99.31 | 

                                                   
Test set: Avg loss: 0.0158 |  

In [5]:
# bcn torch
from sim_model.res_new_fix_inplace import ResNet34

# @weak_module
class condNorm(nn.Module):
    def __init__(self, planes=64, minl=1.0, eps=1e-5):
        super().__init__()
        self.planes = planes
        self.minl = minl
        self.eps = eps
    
    def forward(self, x):
        # x: N x C x H x W,  4D Tensor
        x = x - x.mean([0,2,3], keepdim=True)
        norm = torch.sqrt(torch.mean(x**2, dim=[0,2,3], keepdim=True) + self.eps)
        x = x / torch.max(norm, self.minl*torch.ones_like(norm))
        return x
    
model = ResNet34(normlayer=condNorm).to(device)
print('Number of Parameters: ', sum([p.numel() for p in model.parameters()]))
print(model)
sdir = './runs/tmp_time_test'
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
tloss, tacc = Pipeline(sdir, model, optimizer, 'res34_bcn_th', 4)

Number of Parameters:  21265738
ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): condNorm()
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): condNorm()
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): condNorm()
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): condNorm()
      (shortcut): Sequential()
    )
    (2): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): condNorm()
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): condNorm()
      (sh

# remain

## res18 cn2 alpha
cn1 remove maxpool in backbone  
replace all bn by cn, eg:  
backbone bn, conv-~~cn~~-relu, - BasicBlock(+skip cn)  
minl = 1.0  

In [5]:
from sim_model.res_direct_alpha import resnet18

model = resnet18(num_classes=10, norm_layer=condNorm, nchannel=1).to(device)
print('Number of Parameters: ', sum([p.numel() for p in model.parameters()]))
print('Training Parameters: ', sum([p.numel() for p in model.parameters() if p.requires_grad]))

Number of Parameters:  11165770
Training Parameters:  11165770


In [6]:
sdir = './runs/res18_cond2_alpha'
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
tloss, tacc = Pipeline(sdir, model, optimizer, 'res18', 45)

                                                   
Test set: Avg loss: 0.0749 |  Accuracy: 98.08 | 

                                                   
Test set: Avg loss: 0.0437 |  Accuracy: 98.83 | 

                                                   
Test set: Avg loss: 0.0299 |  Accuracy: 99.03 | 

                                                   
Test set: Avg loss: 0.0678 |  Accuracy: 98.18 | 

                                                   
Test set: Avg loss: 0.1218 |  Accuracy: 96.52 | 

                                                   
Test set: Avg loss: 0.0317 |  Accuracy: 99.13 | 

                                                   
Test set: Avg loss: 0.0537 |  Accuracy: 98.69 | 

                                                   
Test set: Avg loss: 0.0291 |  Accuracy: 99.19 | 

                                                   
Test set: Avg loss: 0.0261 |  Accuracy: 99.29 | 

                                                   
Test set: Avg loss: 0.0274 |  

                                                   
Test set: Avg loss: 0.0169 |  Accuracy: 99.57 | 

                                                   
Test set: Avg loss: 0.0178 |  Accuracy: 99.57 | 

                                                   
Test set: Avg loss: 0.0171 |  Accuracy: 99.56 | 

                                                   
Test set: Avg loss: 0.0178 |  Accuracy: 99.56 | 

                                                   
Test set: Avg loss: 0.0183 |  Accuracy: 99.58 | 

                                                   
Test set: Avg loss: 0.0183 |  Accuracy: 99.58 | 

                                                   
Test set: Avg loss: 0.0189 |  Accuracy: 99.57 | 

                                                   
Test set: Avg loss: 0.0196 |  Accuracy: 99.57 | 

                                                   
Test set: Avg loss: 0.0188 |  Accuracy: 99.57 | 

                                                   
Test set: Avg loss: 0.0191 |  

## res18 cn5 (new)

In [20]:
from sim_model.res_new import ResNet18

model = ResNet18(normlayer=condNorm).to(device)
print('Number of Parameters: ', sum([p.numel() for p in model.parameters()]))
print('Training Parameters: ', sum([p.numel() for p in model.parameters() if p.requires_grad]))
model

Number of Parameters:  11165002
Training Parameters:  11165002


ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): condNorm()
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): condNorm()
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): condNorm()
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): condNorm()
      (shortcut): Sequential()
    )
  )
  (layer2): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn1): condNorm()
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): condNorm()
      (sho

In [21]:
sdir = './runs/res18_cond5'
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
tloss, tacc = Pipeline(sdir, model, optimizer, 'res18', 45)

                                                   
Test set: Avg loss: 0.0482 |  Accuracy: 98.55 | 

                                                   
Test set: Avg loss: 0.0637 |  Accuracy: 98.10 | 

                                                   
Test set: Avg loss: 0.0361 |  Accuracy: 98.85 | 

                                                   
Test set: Avg loss: 0.0320 |  Accuracy: 98.99 | 

                                                   
Test set: Avg loss: 0.0242 |  Accuracy: 99.21 | 

                                                   
Test set: Avg loss: 0.0362 |  Accuracy: 98.99 | 

                                                   
Test set: Avg loss: 0.0421 |  Accuracy: 98.65 | 

                                                   
Test set: Avg loss: 0.0264 |  Accuracy: 99.14 | 

                                                   
Test set: Avg loss: 0.0242 |  Accuracy: 99.28 | 

                                                   
Test set: Avg loss: 0.0240 |  

KeyboardInterrupt: 