In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

import torch
import torch.nn as nn

import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms

from pruning.layers import MaskedLinear, MaskedConv2d 
from pruning.methods import filter_prune
from pruning.utils import to_var, prune_rate

import numpy as np


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class polynom_act(nn.Module):

    def __init__(self, alpha=None, beta=None, c=None):
        super(polynom_act, self).__init__()
        self.alpha = nn.Parameter(torch.randn(1), requires_grad=True)
        self.beta = nn.Parameter(torch.randn(1), requires_grad=True)
        self.c = nn.Parameter(torch.randn(1), requires_grad=True)

    def forward(self, x):
        return (self.alpha * (x ** 2) + self.beta * x + self.c)

class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()

        self.conv1 = MaskedConv2d(1, 64, kernel_size=5, padding=2, stride=1)
        nn.init.xavier_uniform(self.conv1.weight)
        
        self.relu1 = polynom_act()
        self.avgpool1=nn.AvgPool2d(kernel_size=(2,2),stride=(2,2))

        self.conv2 = MaskedConv2d(64, 32, kernel_size=5, stride=1,groups=4)
        nn.init.xavier_uniform(self.conv2.weight)
        
        self.relu2= polynom_act()
        self.avgpool2=nn.AvgPool2d(kernel_size=10,stride=1)

        self.linear1 = nn.Linear(32,32)
        self.relu4= polynom_act()
        self.linear2 = nn.Linear(32,10)
        
    def forward(self, x):
        out = self.avgpool1(self.relu1(self.conv1(x)))
        
        out = self.avgpool2(self.relu2(self.conv2(out)))

        #print(out.shape)
        out = out.reshape(out.shape[0],-1)
        
        out = self.relu4(self.linear1(out))
        
        out = self.linear2(out)
        return out

    def set_masks(self, masks):
        # Should be a less manual way to set masks
        # Leave it for the future
        self.conv1.set_mask(torch.from_numpy(masks[0]))
        self.conv2.set_mask(torch.from_numpy(masks[1]))
        #self.conv3.set_mask(torch.from_numpy(masks[2]))
        self.linear1.set_mask(torch.from_numpy(masks[2]))
        self.linear2.set_mask(torch.from_numpy(masks[3]))

In [2]:
param = {
    'pruning_perc': 50.,
    'batch_size': 128, 
    'test_batch_size': 100,
    'num_epochs': 80,
    'learning_rate': 3e-4,
    'weight_decay': 5e-4,
}

In [3]:
# Data loaders
train_dataset = datasets.MNIST(root='../data/',train=True, download=True, 
    transform=transforms.ToTensor())
loader_train = torch.utils.data.DataLoader(train_dataset, 
    batch_size=param['batch_size'], shuffle=True)

test_dataset = datasets.MNIST(root='../data/', train=False, download=True, 
    transform=transforms.ToTensor())
loader_test = torch.utils.data.DataLoader(test_dataset, 
    batch_size=param['test_batch_size'], shuffle=True)


In [4]:
net = LeNet5()

# Load the pretrained model
#net.load_state_dict(torch.load('models/convnet_pretrained.pkl'))



In [5]:
net.to(device) # level 9 

LeNet5(
  (conv1): MaskedConv2d(1, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (relu1): polynom_act()
  (avgpool1): AvgPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0)
  (conv2): MaskedConv2d(64, 32, kernel_size=(5, 5), stride=(1, 1), groups=4)
  (relu2): polynom_act()
  (avgpool2): AvgPool2d(kernel_size=10, stride=1, padding=0)
  (linear1): Linear(in_features=32, out_features=32, bias=True)
  (relu4): polynom_act()
  (linear2): Linear(in_features=32, out_features=10, bias=True)
)

In [6]:
def train(model, loss_fn, optimizer, param, loader_train, loader_val=None):

    model.train()
    for epoch in range(param['num_epochs']):
        print('Starting epoch %d / %d' % (epoch + 1, param['num_epochs']))

        for t, (x, y) in enumerate(loader_train):
            x_var, y_var = to_var(x), to_var(y.long())

            scores = model(x_var)
            loss = loss_fn(scores, y_var)

            if (t + 1) % 100 == 0:
                #print(loss.item())
                print('t = %d, loss = %.8f' % (t + 1, loss.item()))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
    
    torch.save(model.state_dict(), 'models/lenet_pretrained.pkl')

In [7]:
def test(model, loader):

    model.eval()

    num_correct, num_samples = 0, len(loader.dataset)
    for x, y in loader:
        x_var = to_var(x, volatile=True)
        scores = model(x_var)
        _, preds = scores.data.cpu().max(1)
        num_correct += (preds == y).sum()

    acc = float(num_correct) / num_samples

    print('Test accuracy: {:.2f}% ({}/{})'.format(
        100.*acc,
        num_correct,
        num_samples,
        ))
    
    return acc

In [8]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=param['learning_rate'], weight_decay=param['weight_decay'])

train(net, criterion, optimizer, param, loader_train)

Starting epoch 1 / 80
t = 100, loss = 2.29180002
t = 200, loss = 2.03327465
t = 300, loss = 1.50349295
t = 400, loss = 1.17553723
Starting epoch 2 / 80
t = 100, loss = 0.99831265
t = 200, loss = 1.13252389
t = 300, loss = 0.64255744
t = 400, loss = 0.54402000
Starting epoch 3 / 80
t = 100, loss = 0.46036699
t = 200, loss = 0.55836380
t = 300, loss = 0.44845489
t = 400, loss = 0.45396307
Starting epoch 4 / 80
t = 100, loss = 0.50006825
t = 200, loss = 0.41457841
t = 300, loss = 0.41137362
t = 400, loss = 0.54552788
Starting epoch 5 / 80
t = 100, loss = 0.47838643
t = 200, loss = 0.44950673
t = 300, loss = 0.28056955
t = 400, loss = 0.35922059
Starting epoch 6 / 80
t = 100, loss = 0.15570737
t = 200, loss = 0.38969526
t = 300, loss = 0.32153919
t = 400, loss = 0.54759794
Starting epoch 7 / 80
t = 100, loss = 0.33928177
t = 200, loss = 0.24040742
t = 300, loss = 0.19568352
t = 400, loss = 0.25796962
Starting epoch 8 / 80
t = 100, loss = 0.44279936
t = 200, loss = 0.19115177
t = 300, loss 

t = 400, loss = 0.01821338
Starting epoch 64 / 80
t = 100, loss = 0.02266996
t = 200, loss = 0.03756992
t = 300, loss = 0.03218032
t = 400, loss = 0.01958508
Starting epoch 65 / 80
t = 100, loss = 0.04024110
t = 200, loss = 0.06464313
t = 300, loss = 0.01070078
t = 400, loss = 0.02153328
Starting epoch 66 / 80
t = 100, loss = 0.01210746
t = 200, loss = 0.01451194
t = 300, loss = 0.08940686
t = 400, loss = 0.03579009
Starting epoch 67 / 80
t = 100, loss = 0.02704545
t = 200, loss = 0.03648291
t = 300, loss = 0.00855329
t = 400, loss = 0.02647634
Starting epoch 68 / 80
t = 100, loss = 0.02475363
t = 200, loss = 0.02709866
t = 300, loss = 0.01295702
t = 400, loss = 0.01902932
Starting epoch 69 / 80
t = 100, loss = 0.05978847
t = 200, loss = 0.02551782
t = 300, loss = 0.09170306
t = 400, loss = 0.00980793
Starting epoch 70 / 80
t = 100, loss = 0.02474120
t = 200, loss = 0.03156541
t = 300, loss = 0.03274680
t = 400, loss = 0.07724077
Starting epoch 71 / 80
t = 100, loss = 0.05480339
t = 20

In [9]:
net.load_state_dict(torch.load('./models/lenet_pretrained.pkl'))

<All keys matched successfully>

In [10]:
test(net, loader_test)

  return Variable(x, requires_grad=requires_grad, volatile=volatile)


Test accuracy: 98.69% (9869/10000)


0.9869