In [None]:
# Import Libraries
from models_to_prune import *
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

import numpy as np

import torchvision.transforms as tf
import torchvision.datasets as ds
import torch.utils.data as data

import os
import time
import torch

from torch.utils.data import TensorDataset, DataLoader

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
# See layer shapes
cnn = BasicCNN()
for name, layer in cnn.named_modules():
    if 'conv' in name:
        filters = layer.weight.data.clone()
        print(name,':',filters.size())
        # reduce last dim of 3x3 to 1x1 then squeeze
        pooled_filter = torch.squeeze(F.avg_pool2d(filters,
                                                   filters.size()[-1])) 
        pooled_filter = pooled_filter*1000 # scaling up the magnitudes 
        print("pooled :",pooled_filter.size())

# interpet 4d tensors as set of 3d blocks.  
#print(pooled_filter[0])
print(pooled_filter.cpu().mean())

conv1 : torch.Size([64, 3, 3, 3])
pooled : torch.Size([64, 3])
conv2 : torch.Size([128, 64, 3, 3])
pooled : torch.Size([128, 64])
conv3 : torch.Size([256, 128, 3, 3])
pooled : torch.Size([256, 128])
conv4 : torch.Size([512, 256, 3, 3])
pooled : torch.Size([512, 256])
tensor(-0.0003)


In [None]:
# Create "dataset" of pooled layers
# Convert set of 3d blocks to set of flat 2d maps. 

# Create pad-tensor container, same size as biggest layer
filter_repeats = 10000 # each filter layer will be repeated this many times
feat_size = 16  # size of 2d maps
magnitude_scaler = 100
state_rep = torch.zeros([filter_repeats, 512, feat_size,feat_size]) # set of N padded [512,16,16] 
                                                        # tensors for each of the 4 layers  
for i in range(filter_repeats):
    cnn = BasicCNN()
    for name, layer in cnn.named_modules():
        if 'conv' in name:
            filters = layer.weight.data.clone()
            pooled_filter = torch.squeeze(F.avg_pool2d(filters,
                                                       filters.size()[-1]))
            pooled_filter = pooled_filter*magnitude_scaler # scaling up the magnitudes 
            conv_layer_num = int(name[-1])
            size = pooled_filter.size()
            #if conv_layer_num == 1:
            #    pads = (feat_size//2) - size[-1]//2
            #    state_rep[i, :size[0], feat_size//2, pads-1 :-pads] = pooled_filter  # copy in center
            #elif conv_layer_num == 2:
            #    pads = (feat_size//2) - 4
            #    state_rep[i+filter_repeats, :size[0], pads:-pads, pads:-pads] = pooled_filter.view(size[0],8,8)
            #elif conv_layer_num == 3:
            #    pads_r = (feat_size//2) - 4
            #    pads_c = (feat_size//2) - 8
            #    state_rep[i+filter_repeats*2, :size[0], :8, :16] = pooled_filter.view(size[0],8,16)
            if conv_layer_num == 4:
                state_rep[i] = pooled_filter.view(size[0],16,16) # same size as init state_rep
                #state_rep[i+filter_repeats*3] = pooled_filter.view(size[0],16,16) # same size as init state_rep
                #print(state_rep[i+filter_repeats*3][0])

val_rep = filter_repeats//10
validation = torch.zeros([val_rep*4, 512, feat_size,feat_size]) # set of N padded [512,16,16] 
                                                        # tensors for each of the 4 layers  
for i in range(val_rep):
    cnn = BasicCNN()
    for name, layer in cnn.named_modules():
        if 'conv' in name:
            filters = layer.weight.data.clone()
            pooled_filter = torch.squeeze(F.avg_pool2d(filters,
                                                       filters.size()[-1]))
            pooled_filter = pooled_filter*magnitude_scaler # scaling up the magnitudes 
            conv_layer_num = int(name[-1])
            size = pooled_filter.size()
            if conv_layer_num == 1:
                pads = (feat_size//2) - size[-1]//2
                validation[i, :size[0], feat_size//2, pads-1 :-pads] = pooled_filter  # copy in center
            elif conv_layer_num == 2:
                pads = (feat_size//2) - 4
                validation[i+val_rep, :size[0], pads:-pads, pads:-pads] = pooled_filter.view(size[0],8,8)
            elif conv_layer_num == 3:
                pads_r = (feat_size//2) - 4
                pads_c = (feat_size//2) - 8
                validation[i+val_rep*2, :size[0], :8, :16] = pooled_filter.view(size[0],8,16)
            elif conv_layer_num == 4:
                validation[i+val_rep*3] = pooled_filter.view(size[0],16,16) # same size as init state_rep

In [26]:
# Build Autoencoder Class, modified from https://github.com/L1aoXingyu

class autoencoder(nn.Module):
    def __init__(self):
        super(autoencoder, self).__init__()
        self.encoding_dim = 64
        self.encoder = nn.Sequential( # input size is [512,16,16]
            nn.Conv2d(512, 256, 3),  # b, 256, 14, 14
            nn.Sigmoid(),
            nn.MaxPool2d(4, stride=1),  # b, 256, 11, 11
            nn.Conv2d(256, 128, 3),  # b, 128, 9, 9
            
            nn.Sigmoid(),
            nn.MaxPool2d(3, stride=1),  # b, 128, 7, 7
            nn.Conv2d(128, 64, 3),  # b, 64, 5, 5
            nn.Sigmoid(),
            nn.MaxPool2d(2, stride=1),# b, 64, 4, 4

            nn.Flatten(), #from dim=1 to -1
            nn.Linear(64*4*4,self.encoding_dim)
        )
        
        self.latent_to_map = nn.Linear(self.encoding_dim, 64*4*4)
        self.decoder = nn.Sequential(    
            nn.ConvTranspose2d(64, 128, 4, stride=1),  # b, 64,7,7
            nn.Sigmoid(),
            nn.ConvTranspose2d(128, 256, 5, stride=1),  # b, 256, 11, 11
            nn.Sigmoid(),
            nn.ConvTranspose2d(256, 512, 6),  # b, 512, 16, 16
          #  nn.Tanh()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.latent_to_map(x).view(-1,64,4,4) 
        x = self.decoder(x)
        return x

model = autoencoder().cuda()
print(model)

autoencoder(
  (encoder): Sequential(
    (0): Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1))
    (1): Sigmoid()
    (2): MaxPool2d(kernel_size=4, stride=1, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1))
    (4): Sigmoid()
    (5): MaxPool2d(kernel_size=3, stride=1, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1))
    (7): Sigmoid()
    (8): MaxPool2d(kernel_size=2, stride=1, padding=0, dilation=1, ceil_mode=False)
    (9): Flatten()
    (10): Linear(in_features=1024, out_features=64, bias=True)
  )
  (latent_to_map): Linear(in_features=64, out_features=1024, bias=True)
  (decoder): Sequential(
    (0): ConvTranspose2d(64, 128, kernel_size=(4, 4), stride=(1, 1))
    (1): Sigmoid()
    (2): ConvTranspose2d(128, 256, kernel_size=(5, 5), stride=(1, 1))
    (3): Sigmoid()
    (4): ConvTranspose2d(256, 512, kernel_size=(6, 6), stride=(1, 1))
  )
)


In [29]:
num_epochs = 10
batch_size = 16
learning_rate = 1e-3

state_rep = state_rep.to(device)
train_dl = DataLoader(state_rep, batch_size=batch_size, shuffle=True)
valid_dl = DataLoader(validation, batch_size=batch_size, shuffle=True)

model = autoencoder().cuda()
MSE_criterion = nn.MSELoss()
#optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) 
                                                # removed weight_decay=1e-5
#sample = iter(train_dl).next()
#print(sample.cpu().mean())
#raise KeyboardInterrupt

# Training Loop
for epoch in range(num_epochs):
    for i, data in enumerate(train_dl):
        model.train()
        data = Variable(data).cuda()
        # ===================forward=====================
        output = model(data)
        #loss = torch.sum(torch.log(torch.cosh(data-output)))
        loss = torch.mean(torch.abs(data-output)) # MAE criterion
        rand_loss = torch.mean(torch.abs(data-torch.rand_like(output))) # sanity check
        #loss = MSE_criterion(output, data)
        # ===================backward====================
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
         # ===================log========================
        if i % 100 == 0:
            model.eval()
            val_loss = 0
            num_batches = 0
            ave_val_loss = 0
            with torch.no_grad():
                for val in valid_dl:
                    val = Variable(val).cuda()
                    val_output = model(val)
                    #val_loss += torch.sum(torch.log(torch.cosh(val-val_output)))
                    val_loss += torch.mean(torch.abs(val - val_output))
                    #val_loss += MSE_criterion(val_output,val)
                    num_batches += 1
                ave_val_loss = val_loss/num_batches


            print('epoch [{}/{}], loss:{}, val_loss:{}, rand_loss:{}'
                .format(epoch+1, num_epochs, loss.item(), ave_val_loss, rand_loss.item()))
            torch.save(model.state_dict(), './conv_autoencoder.pth')


epoch [1/10], loss:0.3417483866214752, val_loss:1.3761440515518188, rand_loss:0.5805090665817261
epoch [1/10], loss:0.33293068408966064, val_loss:0.16735203564167023, rand_loss:0.5801882743835449
epoch [1/10], loss:0.3233155310153961, val_loss:0.13830313086509705, rand_loss:0.5806146264076233
epoch [1/10], loss:0.3228972852230072, val_loss:0.13621152937412262, rand_loss:0.5800780653953552
epoch [1/10], loss:0.3225247263908386, val_loss:0.13382185995578766, rand_loss:0.5803004503250122
epoch [1/10], loss:0.3219667673110962, val_loss:0.13134582340717316, rand_loss:0.580004870891571
epoch [1/10], loss:0.32168716192245483, val_loss:0.1291329562664032, rand_loss:0.5801371932029724
epoch [1/10], loss:0.32225286960601807, val_loss:0.12731145322322845, rand_loss:0.5809592008590698
epoch [1/10], loss:0.32205507159233093, val_loss:0.1259191930294037, rand_loss:0.5805268883705139
epoch [1/10], loss:0.32172659039497375, val_loss:0.12491466850042343, rand_loss:0.5807982087135315
epoch [1/10], loss:

KeyboardInterrupt: ignored

In [30]:
loaded_model = autoencoder().cuda()
checkpoint_dict = torch.load('conv_autoencoder.pth')
loaded_model.load_state_dict(checkpoint_dict)

encoded_states = loaded_model.encoder(data)
print(encoded_states.size())
#print(encoded_states[1])
output_ = loaded_model(data)
MAE_loss = torch.mean(torch.abs(data-output))
MSE_loss = MSE_criterion(output, data)
print(MAE_loss.item())
print(MSE_loss.item())
print(output[0][0])


torch.Size([16, 64])
0.32188737392425537
0.1608814299106598
tensor([[ 4.7312e-04, -3.1684e-03, -7.4903e-03, -1.1314e-02, -9.6496e-03,
         -1.9540e-03,  7.4072e-04,  1.4077e-03,  1.4146e-03,  6.4284e-04,
         -1.5436e-03, -4.9544e-03, -1.0017e-02, -1.0930e-02, -7.3620e-03,
          8.1505e-04],
        [ 4.0795e-03, -6.8054e-04, -3.8856e-03, -2.0285e-03, -3.2532e-03,
          6.8349e-05,  1.7160e-03,  1.7578e-03,  1.9269e-03,  2.1727e-03,
          2.3942e-03, -2.9430e-03, -3.2713e-03, -2.5106e-03, -3.0127e-03,
          8.1690e-04],
        [ 2.2640e-03, -1.9204e-03,  1.5189e-03,  3.6689e-03,  5.8935e-04,
         -8.4727e-03, -6.0117e-04,  1.1800e-03,  1.4967e-03,  1.2836e-03,
          1.1956e-03, -1.1914e-03,  4.4918e-03,  2.1104e-03, -1.2721e-03,
         -7.8899e-03],
        [-5.4507e-03,  4.9092e-03,  3.1393e-03, -3.2788e-03,  5.4446e-03,
         -2.4567e-03,  1.1063e-03,  1.5781e-03,  1.3962e-03,  1.4433e-04,
         -4.5205e-03,  6.4411e-03,  1.4911e-03, -1.0902e-

In [None]:
4000/16

250.0

In [None]:
print(validation.size())


torch.Size([400, 512, 16, 16])


In [18]:
np.tanh(0.9)

0.7162978701990245