
Alexnet code from https://github.com/pytorch/vision/blob/master/torchvision/models/alexnet.py

In [None]:

import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import os
import time
import torch.nn as nn
import matplotlib.pyplot as plt

import csv
import torch
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

import math

import numpy as np
import torch
import torch.nn as nn
import torch.utils.data
import torch.utils.data.distributed
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
import time
import shutil
import os
import torch.nn.functional as F

# Params

In [2]:
DATA_DIR='/home/actlab/releq/data.imagenet' #'../../../ILSVRC'
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
BATCH_SIZE = 100 # originally 256 
WORKERS = 1
PRINT_FREQ = 5
EPOCH = 40
LR = 0.001


# Dataset

In [3]:
traindir = os.path.join(DATA_DIR, 'train')
valdir = os.path.join(DATA_DIR, 'val')
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

train_dataset = datasets.ImageFolder(
    traindir,
    transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize,
    ]))


In [4]:
train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=BATCH_SIZE, shuffle=(None is None),
        num_workers=WORKERS, pin_memory=True, sampler= None)

val_loader = torch.utils.data.DataLoader(
        datasets.ImageFolder(valdir, transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ])),
        batch_size=BATCH_SIZE, shuffle=False,
        num_workers=WORKERS, pin_memory=True)

# Utils

In [5]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


In [6]:
def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

In [7]:
def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr = LR * (0.1 ** (epoch // 30))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [8]:
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, 'model_best.pth.tar')

# Validate

In [9]:
def validate(val_loader, model, model_original, criterion):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()
    SNR = []
    with torch.no_grad():
        end = time.time()
        for i, (input, target) in enumerate(val_loader):
            input, target = input.to(device), target.to(device)
            # compute output
            output = model(input)
            loss = criterion(output, target)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            losses.update(loss.item(), input.size(0))
            top1.update(acc1[0], input.size(0))
            top5.update(acc5[0], input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()
            #
            output_original = model_original(input)
            output = model.model_pt1(input)
            output = model.intermed(output)

            if i % PRINT_FREQ== 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                       i, len(val_loader), batch_time=batch_time, loss=losses,
                       top1=top1, top5=top5))


        print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
              .format(top1=top1, top5=top5))

    return top1.avg, sum(SNR)/len(SNR)


# Train

In [10]:
def train(train_loader, model, model_original,criterion, optimizer, epoch):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    
    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        input , target = input.to(device), target.to(device)
        # compute output
        output = model(input)
        loss = criterion(output, target)

        # measure accuracy and record loss
        acc1, acc5 = accuracy(output, target, topk=(1, 5))
        losses.update(loss.item(), input.size(0))
        top1.update(acc1[0], input.size(0))
        top5.update(acc5[0], input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
       
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        output_original = model_original(input)
        output_syn = model.model_pt1(input)
        output_syn = model.intermed(output_syn)

        if i % PRINT_FREQ == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                   epoch, i, len(train_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses, top1=top1, top5=top5))
            if (top1.val> 47.0):
                break


# Extracting Conv layer numbers


In [11]:
model_original = models.alexnet(pretrained= True)
model_original.to(device)
criterion = nn.CrossEntropyLoss().to(device)
print(model_original)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU(inplace)
    (3): Dropout(p

In [12]:
conv_layers = []
fc_layers=[]
for i, layer in enumerate(model_original.features):
    if isinstance(layer, nn.Conv2d):
        if ((i is not 0 )):
            conv_layers.append(i)
conv_layers.append(len(model_original.features))
for i, layer in enumerate(model_original.classifier):
    if isinstance(layer, nn.Linear):
        fc_layers.append(i)
        


# extract activation size

In [13]:
conv_shapes=[]
for cnt2, (data, target) in enumerate(val_loader):
    for cnt,i in enumerate(conv_layers):
        data,target = data.to(device), target.to(device)

        newmodel_original =  torch.nn.Sequential(*(list(model_original.features)[0:i]))
        
        output_original = newmodel_original(data)
        conv_shapes.append(output_original.shape[1:])
   
    if (cnt2==0):
        break
    



# Build new model



In [14]:
class NoisyActivation(nn.Module):
    def __init__(self, activation_size):
        super(NoisyActivation, self).__init__()
        m =torch.distributions.laplace.Laplace(loc = 0.6, scale = 1.2, validate_args=None)
        self.noise = nn.Parameter(m.rsample(activation_size))

    def forward(self, input):

        return input + self.noise

In [15]:
class alexnet_syn(nn.Module):

    def __init__(self, model_features, model_classifier, conv_layers, conv_shapes, index ):
        super(alexnet_syn, self).__init__()
        
        self.model_pt1 =  torch.nn.Sequential(*(list(model_features)[0:conv_layers[index]]))
        self.intermed = NoisyActivation(conv_shapes[index])
        self.model_pt2 =  torch.nn.Sequential(*(list(model_features)[conv_layers[index]:]))
        self.model_pt3 = model_classifier
        for params in self.model_pt1.parameters():
            params.requires_grad = False
        for params in self.model_pt2.parameters():
            params.requires_grad = False
        for params in self.model_pt3.parameters():
            params.requires_grad = False


    def forward(self, img):
        x = self.model_pt1(img)
        x = self.intermed (x)
        x = self.model_pt2(x)
        x = x.view(x.size(0), -1)
        x = self.model_pt3(x)

        return x
    



# Test new model

In [16]:
model_original = models.alexnet(pretrained= True)
model_original.to(device)


model_syn_original =torch.nn.Sequential(*(list(model_original.features)[0:conv_layers[4]]))
model_syn_original_rest = torch.nn.Sequential(*(list(model_original.classifier)))

model_syn_original.eval()
model_syn_original_rest.eval()


Sequential(
  (0): Dropout(p=0.5)
  (1): Linear(in_features=9216, out_features=4096, bias=True)
  (2): ReLU(inplace)
  (3): Dropout(p=0.5)
  (4): Linear(in_features=4096, out_features=4096, bias=True)
  (5): ReLU(inplace)
  (6): Linear(in_features=4096, out_features=1000, bias=True)
)

In [18]:
model_syn = alexnet_syn(model_original.features, model_original.classifier, conv_layers, conv_shapes, 4)
model_syn.to(device)
criterion = nn.CrossEntropyLoss().to(device)
weights_noise =np.expand_dims( model_syn.intermed.noise.cpu().detach().numpy(),axis=0)

In [19]:
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model_syn.parameters()), lr=0.0001, weight_decay=-0.00001)


In [20]:
for epoch in range(EPOCH):
        adjust_learning_rate(optimizer, epoch)

        # train for one epoch
        train(train_loader, model_syn,model_syn_original, criterion, optimizer, epoch)

        # evaluate on validation set
        acc1, SNR = validate(val_loader, model_syn, model_syn_original, criterion)
        
        if (acc1 > 54.5):
            with open('activation-4-alexnet-test.csv','a') as fd:
                writer = csv.writer(fd)
                writer.writerow([SNR, acc, epoch])
            weights_noise=np.concatenate((weights_noise,np.expand_dims(model_syn.intermed.noise.cpu().detach().numpy(),axis=0)),axis=0)
            np.save("activations-4-noise-samples-alexnet-test", weights_noise)
            break



Epoch: [0][0/12812]	Time 1.296 (1.296)	Data 1.274 (1.274)	Loss 2.8094 (2.8094)	Acc@1 40.000 (40.000)	Acc@5 66.000 (66.000)
Epoch: [0][5/12812]	Time 1.252 (1.288)	Data 1.234 (1.269)	Loss 2.5751 (3.0683)	Acc@1 44.000 (36.833)	Acc@5 66.000 (60.000)
Epoch: [0][10/12812]	Time 1.577 (1.335)	Data 1.559 (1.316)	Loss 3.1819 (3.0328)	Acc@1 36.000 (37.818)	Acc@5 61.000 (62.455)
Epoch: [0][15/12812]	Time 1.188 (1.296)	Data 1.169 (1.277)	Loss 2.7690 (2.9808)	Acc@1 44.000 (38.812)	Acc@5 66.000 (63.000)
Epoch: [0][20/12812]	Time 1.138 (1.291)	Data 1.120 (1.272)	Loss 2.8336 (2.9517)	Acc@1 42.000 (38.810)	Acc@5 70.000 (64.048)
Epoch: [0][25/12812]	Time 1.048 (1.281)	Data 1.029 (1.262)	Loss 3.3255 (2.9539)	Acc@1 31.000 (38.654)	Acc@5 55.000 (64.192)
Epoch: [0][30/12812]	Time 1.107 (1.262)	Data 1.089 (1.243)	Loss 2.7699 (2.9367)	Acc@1 37.000 (38.645)	Acc@5 64.000 (64.097)
Epoch: [0][35/12812]	Time 1.110 (1.266)	Data 1.091 (1.247)	Loss 2.6138 (2.9279)	Acc@1 46.000 (38.750)	Acc@5 69.000 (64.111)
Epoch: [0]

KeyboardInterrupt: 