In [0]:
# Import Libraries
from models_to_prune import *
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

import numpy as np

import torchvision.transforms as tf
import torchvision.datasets as ds
import torch.utils.data as data

import os
import time
import torch

from torch.utils.data import TensorDataset, DataLoader

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [2]:
# See layer shapes
cnn = BasicCNN()
for name, layer in cnn.named_modules():
    if 'conv' in name:
        filters = layer.weight.data.clone()
        print(name,':',filters.size())
        # reduce last dim of 3x3 to 1x1 then squeeze
        pooled_filter = torch.squeeze(F.avg_pool2d(filters,
                                                   filters.size()[-1])) 
        pooled_filter = pooled_filter*100 # scaling up the magnitudes 
        print("pooled :",pooled_filter.size())

# interpet 4d tensors as set of 3d blocks.  
#print(pooled_filter[0])

conv1 : torch.Size([64, 3, 3, 3])
pooled : torch.Size([64, 3])
conv2 : torch.Size([128, 64, 3, 3])
pooled : torch.Size([128, 64])
conv3 : torch.Size([256, 128, 3, 3])
pooled : torch.Size([256, 128])
conv4 : torch.Size([512, 256, 3, 3])
pooled : torch.Size([512, 256])


In [0]:
# Create "dataset" of pooled layers
# Convert set of 3d blocks to set of flat 2d maps. 

# Create pad-tensor container, same size as biggest layer
filter_repeats = 10000 # each filter layer will be repeated this many times
feat_size = 16  # size of 2d maps
state_rep = torch.zeros([filter_repeats, 512, feat_size,feat_size]) # set of N padded [512,16,16] 
                                                        # tensors for each of the 4 layers  
for i in range(filter_repeats):
    cnn = BasicCNN()
    for name, layer in cnn.named_modules():
        if 'conv' in name:
            filters = layer.weight.data.clone()
            pooled_filter = torch.squeeze(F.avg_pool2d(filters,
                                                       filters.size()[-1]))
            pooled_filter = pooled_filter*100 # scaling up the magnitudes 
            conv_layer_num = int(name[-1])
            size = pooled_filter.size()
            #if conv_layer_num == 1:
            #    pads = (feat_size//2) - size[-1]//2
            #    state_rep[i, :size[0], feat_size//2, pads-1 :-pads] = pooled_filter  # copy in center
            #elif conv_layer_num == 2:
            #    pads = (feat_size//2) - 4
            #    state_rep[i+filter_repeats, :size[0], pads:-pads, pads:-pads] = pooled_filter.view(size[0],8,8)
            #elif conv_layer_num == 3:
            #    pads_r = (feat_size//2) - 4
            #    pads_c = (feat_size//2) - 8
            #    state_rep[i+filter_repeats*2, :size[0], :8, :16] = pooled_filter.view(size[0],8,16)
            if conv_layer_num == 4:
                state_rep[i] = pooled_filter.view(size[0],16,16) # same size as init state_rep
                #state_rep[i+filter_repeats*3] = pooled_filter.view(size[0],16,16) # same size as init state_rep
                #print(state_rep[i+filter_repeats*3][0])

val_rep = filter_repeats//10
validation = torch.zeros([val_rep*4, 512, feat_size,feat_size]) # set of N padded [512,16,16] 
                                                        # tensors for each of the 4 layers  
for i in range(val_rep):
    cnn = BasicCNN()
    for name, layer in cnn.named_modules():
        if 'conv' in name:
            filters = layer.weight.data.clone()
            pooled_filter = torch.squeeze(F.avg_pool2d(filters,
                                                       filters.size()[-1]))
            pooled_filter = pooled_filter*100 # scaling up the magnitudes 
            conv_layer_num = int(name[-1])
            size = pooled_filter.size()
            if conv_layer_num == 1:
                pads = (feat_size//2) - size[-1]//2
                validation[i, :size[0], feat_size//2, pads-1 :-pads] = pooled_filter  # copy in center
            elif conv_layer_num == 2:
                pads = (feat_size//2) - 4
                validation[i+val_rep, :size[0], pads:-pads, pads:-pads] = pooled_filter.view(size[0],8,8)
            elif conv_layer_num == 3:
                pads_r = (feat_size//2) - 4
                pads_c = (feat_size//2) - 8
                validation[i+val_rep*2, :size[0], :8, :16] = pooled_filter.view(size[0],8,16)
            elif conv_layer_num == 4:
                validation[i+val_rep*3] = pooled_filter.view(size[0],16,16) # same size as init state_rep

In [0]:
# Build Autoencoder Class, modified from https://github.com/L1aoXingyu

class autoencoder(nn.Module):
    def __init__(self):
        super(autoencoder, self).__init__()
        self.encoder = nn.Sequential( # input size is [512,16,16]
            nn.Conv2d(512, 64, 3),  # b, 64, 14, 14
            nn.Sigmoid(),
            #nn.ReLU(True),
            nn.MaxPool2d(4, stride=2),  # b, 64, 6, 6
            nn.Conv2d(64, 16, 3),  # b, 16, 4, 4
            #nn.ReLU(True),
            nn.Sigmoid(),
            nn.MaxPool2d(2, stride=1),  # b, 16, 3, 3
            nn.Flatten(), #from dim=1 to -1
            nn.Linear(16*3*3,100)
        )
        
        self.latent_to_map = nn.Linear(100, 16*3*3)
        self.decoder = nn.Sequential(    
            nn.ConvTranspose2d(16, 64, 3, stride=1),  # b, 64,4,4 
            nn.Sigmoid(),
            #nn.ReLU(True),
            nn.ConvTranspose2d(64, 256, 5, stride=2),  # b, 256, 10, 10
            nn.Sigmoid(),
            #nn.ReLU(True),
            nn.ConvTranspose2d(256, 512, 4),  # b, 512, 16, 16
          #  nn.Tanh()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.latent_to_map(x).view(-1,16,3,3) 
        x = self.decoder(x)
        return x


In [25]:
num_epochs = 100
batch_size = 64
learning_rate = 1e-3

state_rep = state_rep.to(device)
train_dl = DataLoader(state_rep, batch_size=batch_size, shuffle=True)
valid_dl = DataLoader(validation, batch_size=batch_size, shuffle=True)

model = autoencoder().cuda()
MSE_criterion = nn.MSELoss()
#optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)

for epoch in range(num_epochs):
    for i, data in enumerate(train_dl):
        model.train()
        data = Variable(data).cuda()
        # ===================forward=====================
        output = model(data)
        #loss = torch.sum(torch.log(torch.cosh(data-output)))
        loss = torch.mean(torch.abs(data-output)) # MAE criterion
        #loss = MSE_criterion(output, data)
        # ===================backward====================
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
         # ===================log========================
        if i % 100 == 0:
            model.eval()
            val_loss = 0
            num_batches = 0
            ave_val_loss = 0
            with torch.no_grad():
                for val in valid_dl:
                    val = Variable(val).cuda()
                    val_output = model(val)
                    #val_loss += torch.sum(torch.log(torch.cosh(val-val_output)))
                    val_loss += torch.mean(torch.abs(val - val_output))
                    #val_loss += MSE_criterion(val_output,val)
                    num_batches += 1
                ave_val_loss = val_loss/num_batches


            print('epoch [{}/{}], loss:{}, val_loss:{}'
                .format(epoch+1, num_epochs, loss.item(), ave_val_loss))
            torch.save(model.state_dict(), './conv_autoencoder.pth')

epoch [1/100], loss:0.34705641865730286, val_loss:1.1203267574310303
epoch [1/100], loss:0.32168132066726685, val_loss:0.12323831021785736
epoch [2/100], loss:0.32164719700813293, val_loss:0.12233477085828781
epoch [2/100], loss:0.3215946555137634, val_loss:0.1222018450498581
epoch [3/100], loss:0.3217010498046875, val_loss:0.12224549800157547
epoch [3/100], loss:0.3217606544494629, val_loss:0.12233451753854752
epoch [4/100], loss:0.32168132066726685, val_loss:0.12319160252809525
epoch [4/100], loss:0.321719765663147, val_loss:0.12320737540721893
epoch [5/100], loss:0.321804016828537, val_loss:0.12415535002946854
epoch [5/100], loss:0.32189255952835083, val_loss:0.12433242052793503
epoch [6/100], loss:0.3218381404876709, val_loss:0.12538471817970276
epoch [6/100], loss:0.32175806164741516, val_loss:0.12546184659004211
epoch [7/100], loss:0.3216565251350403, val_loss:0.12665827572345734


KeyboardInterrupt: ignored

In [26]:
loaded_model = autoencoder().cuda()
checkpoint_dict = torch.load('conv_autoencoder.pth')
loaded_model.load_state_dict(checkpoint_dict)

encoded_states = loaded_model.encoder(data)
print(encoded_states.size())
print(encoded_states[1])
output_ = loaded_model(data)
MAE_loss = torch.mean(torch.abs(data-output))
MSE_loss = MSE_criterion(output, data)
print(MAE_loss.item())
print(MSE_loss.item())
print(output[0][0])


torch.Size([64, 100])
tensor([ 2.1983, -2.4517, -2.0884,  0.4074, -2.0277,  0.4489,  2.8894, -3.8463,
         3.2029,  2.0145, -2.3532, -3.0322, -1.7299, -1.6727,  2.0970,  2.5279,
         2.3845,  2.5020, -1.9817, -1.3141,  2.1490, -0.7359, -2.3999, -2.7993,
         3.4808, -0.2314,  2.7411, -2.1603,  1.3329,  2.5883,  1.3455,  2.1759,
        -1.0607,  2.4408,  2.6574,  2.0142, -1.8354, -1.4377, -1.9669, -1.4518,
         2.3125, -2.1434,  2.9136,  2.4547,  1.3745, -2.0245,  2.9080,  2.1267,
        -2.7334,  2.1076, -2.4617,  0.9319, -2.0164, -1.9049, -0.0190,  0.6383,
         2.6601, -2.4824,  0.3906, -1.8532, -2.6333,  1.2519, -0.8801,  0.8934,
        -2.4112,  2.7612, -0.6366, -1.5770,  2.1413, -2.4599, -2.1904, -1.2095,
        -2.4878, -0.7620,  2.5534,  2.7037,  2.4145, -2.3148, -0.8629, -1.7082,
        -2.2258,  3.2913,  1.2394, -1.2462, -1.2017, -2.5013,  2.3004,  2.2218,
        -0.7199, -0.9381,  2.9387, -2.3558, -1.7926,  1.8543, -1.8983, -0.7422,
         1.6102,  

In [0]:
4000/16

250.0

In [0]:
print(validation.size())


torch.Size([400, 512, 16, 16])


In [18]:
np.tanh(0.9)

0.7162978701990245