In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

import math
import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms

from pruning.layers import MaskedLinear, MaskedConv2d 
from pruning.methods import filter_prune
from pruning.utils import to_var, prune_rate

import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
class polynom_act(nn.Module):

    def __init__(self, alpha=None, beta=None, c=None):
        super(polynom_act, self).__init__()
        
        #self.alpha = nn.Parameter(torch.randn(1), requires_grad=True)
        #self.beta = nn.Parameter(torch.randn(1), requires_grad=True)
        #self.c = nn.Parameter(torch.randn(1), requires_grad=True)
        
        self.alpha = nn.Parameter(torch.FloatTensor([0]), requires_grad=True)
        self.beta = nn.Parameter(torch.FloatTensor([0]), requires_grad=True)
        self.c = nn.Parameter(torch.FloatTensor([0]), requires_grad=True)

    def forward(self, x):
        return (self.alpha * (x ** 2) + self.beta * x + self.c)

In [42]:
class VGG5(nn.Module):
    def __init__(self,kernel_size=3,dropout=0.2):
        super(VGG5, self).__init__()
        
        self.kernel_size    = kernel_size
        self.dropout        = dropout
        
        activation=nn.ReLU(inplace=True)
        #activation=polynom_act()

        self.conv1 = MaskedConv2d(3, 64, kernel_size=self.kernel_size, padding=(self.kernel_size-1)//2, stride=1, bias=False)
        #nn.init.xavier_uniform(self.conv1.weight)
        
        self.relu1 = activation
        self.dropout1=nn.Dropout(self.dropout)
        self.avgpool1=nn.AvgPool2d(kernel_size=2,stride=2)

        self.conv2 = MaskedConv2d(64, 128, kernel_size=self.kernel_size,padding=(self.kernel_size-1)//2, stride=1, groups=8, bias=False)
        #nn.init.xavier_uniform(self.conv2.weight)

        self.relu2= activation
        self.dropout2=nn.Dropout(self.dropout)
        
        #self.avgpool2=nn.AvgPool2d(kernel_size=2,stride=2)

        self.conv3 = MaskedConv2d(128, 128, kernel_size=self.kernel_size, padding=(self.kernel_size-1)//2, stride=1,groups=8, bias=False)
        #nn.init.xavier_uniform(self.conv3.weight)
        
        self.relu3= activation
        self.dropout3=nn.Dropout(self.dropout)
        
        self.avgpool3=nn.AvgPool2d(kernel_size=2, stride=2)
        
        self.linear1 = nn.Linear(128*8*8,4096,bias=False)
        #self.linear1 = nn.Linear(256,128,bias=False)
        self.relu4= activation
        self.dropout4=nn.Dropout(0.5)
        
        self.linear2 = nn.Linear(4096,4096,bias=False)
        #self.linear2 = nn.Linear(128,128,bias=False)
        self.relu5= activation
        self.dropout5=nn.Dropout(0.5)
        
        self.linear3 = nn.Linear(4096,10,bias=False)
        #self.linear3 = nn.Linear(128,10,bias=False)
        
        for m in self.modules():
            
            if isinstance(m, MaskedConv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
                if m.bias is not None:
                    m.bias.data.zero_()
                    
            elif isinstance(m, nn.Linear):
                n = m.weight.size(1)
                m.weight.data.normal_(0, 0.01)
                if m.bias is not None:
                    m.bias.data.zero_()
        
    def forward(self, x):
        out = self.dropout1(self.relu1(self.conv1(x)))
        out = self.avgpool1(out)
        
        out = self.dropout2(self.relu2(self.conv2(out)))
        #out = self.avgpool2(out)
        
        out = self.dropout3(self.relu2(self.conv3(out)))
        out = self.avgpool3(out)

        out = out.view(out.size(0), -1)
        
        out = self.dropout4(self.relu4(self.linear1(out)))
        out = self.dropout5(self.relu5(self.linear2(out)))
        out = self.linear3(out)
        
        return out

    def set_masks(self, masks):
        
        self.conv1.set_mask(torch.from_numpy(masks[0]))
        self.conv2.set_mask(torch.from_numpy(masks[1]))
        self.conv3.set_mask(torch.from_numpy(masks[2]))
        self.linear1.set_mask(torch.from_numpy(masks[3]))
        self.linear2.set_mask(torch.from_numpy(masks[4]))

In [43]:
param = {
    'pruning_perc': 50.,
    'batch_size': 128, 
    'test_batch_size': 100,
    'num_epochs': 100,
    'learning_rate': 1e-1,
    'weight_decay': 5e-4,
    'momentum':0.9,
    'amsgrad':True,
}

In [44]:
# Data loaders

transform_train = transforms.Compose(
    [
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ]
)

transform_test = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ]
)

train_dataset = datasets.CIFAR10(root='../data/', train=True, download=True,transform=transform_train)
loader_train = torch.utils.data.DataLoader(train_dataset, batch_size=param['batch_size'], shuffle=True)


test_dataset = datasets.CIFAR10(root='../data/', train=False, transform=transform_test)
loader_test = torch.utils.data.DataLoader(test_dataset, batch_size=param['test_batch_size'], shuffle=False)

Files already downloaded and verified


In [45]:
def train(model, loss_fn, optimizer, param, loader_train, loader_val=None):

    model.train()
    for epoch in range(param['num_epochs']):
        print('Starting epoch %d / %d' % (epoch + 1, param['num_epochs']))

        for t, (x, y) in enumerate(loader_train):
            
            x_var, y_var = to_var(x), to_var(y.long())

            scores = model(x_var)
            loss = loss_fn(scores, y_var)

            if (t + 1) % 100 == 0:

                print('t = %d, loss = %.8f' % (t + 1, loss.item()))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        if (epoch+1) % 10 ==0:
            
            torch.save(model.state_dict(), 'models/vgg5_pretrained'+str(epoch+1)+'.pkl')

In [46]:
def test(model, loader):

    model.eval()
    num_correct, num_samples = 0, len(loader.dataset)
    for x, y in loader:
        x_var = to_var(x, volatile=True)
        scores = model(x_var)
        _, preds = scores.data.cpu().max(1)
        num_correct += (preds == y).sum()

    acc = float(num_correct) / num_samples

    print('Test accuracy: {:.2f}% ({}/{})'.format(
        100.*acc,
        num_correct,
        num_samples,
        ))
    
    return acc

In [47]:
net=VGG5()

In [48]:
net.to(device)  #level 11

VGG5(
  (conv1): MaskedConv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (relu1): ReLU(inplace=True)
  (dropout1): Dropout(p=0.2, inplace=False)
  (avgpool1): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (conv2): MaskedConv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=8, bias=False)
  (relu2): ReLU(inplace=True)
  (dropout2): Dropout(p=0.2, inplace=False)
  (conv3): MaskedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=8, bias=False)
  (relu3): ReLU(inplace=True)
  (dropout3): Dropout(p=0.2, inplace=False)
  (avgpool3): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (linear1): Linear(in_features=8192, out_features=4096, bias=False)
  (relu4): ReLU(inplace=True)
  (dropout4): Dropout(p=0.5, inplace=False)
  (linear2): Linear(in_features=4096, out_features=4096, bias=False)
  (relu5): ReLU(inplace=True)
  (dropout5): Dropout(p=0.5, inplace=False)
  (linear3): Linear(in_features=4096, out_features=10, bias

In [10]:
pretrained_dict=torch.load('./models/vgg5_pretrained30.pkl')
model_dict=net.state_dict()
pretrained_dict={k: v for k, v in pretrained_dict.items() if k in model_dict}
model_dict.update(pretrained_dict)
net.load_state_dict(model_dict)

<All keys matched successfully>

In [49]:
criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(net.parameters(), lr=param['learning_rate'], weight_decay=param['weight_decay'], momentum=param['momentum'])

#optimizer = torch.optim.Adam(net.parameters(), lr=param['learning_rate'], weight_decay=param['weight_decay'], amsgrad=param['amsgrad'])

train(net, criterion, optimizer, param, loader_train)

Starting epoch 1 / 100
t = 100, loss = 1.98441970
t = 200, loss = 1.92638016
t = 300, loss = 1.85138297
Starting epoch 2 / 100
t = 100, loss = 1.80311656
t = 200, loss = 1.68488371
t = 300, loss = 1.63388205


KeyboardInterrupt: 

In [9]:
pretrained_dict=torch.load('./models/vgg5_pretrained10.pkl')
model_dict=net.state_dict()
pretrained_dict={k: v for k, v in pretrained_dict.items() if k in model_dict}
model_dict.update(pretrained_dict)
net.load_state_dict(model_dict)

<All keys matched successfully>

In [19]:
test(net, loader_test)

Test accuracy: 84.07% (8407/10000)


0.8407

In [18]:
net.load_state_dict(torch.load('./models/vgg5_pretrained10.pkl'))

<All keys matched successfully>