In [1]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
from torch.utils.data import sampler
import math
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "3"
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np

In [2]:
#torch.cuda.set_device(0)
# specify dtype
use_cuda = torch.cuda.is_available()
if use_cuda:
    dtype = torch.cuda.FloatTensor
else:
    dtype = torch.FloatTensor
print(torch.cuda.device_count())
print(use_cuda)

1
True


In [3]:
# Hyperparameters
batch_size = 32
learning_rate = 4e-3
momentum = 0.9
num_epoch = 15

In [4]:
# Data
class ChunkSampler(sampler.Sampler):
    """Samples elements sequentially from some offset. 
    Arguments:
        num_samples: # of desired datapoints
        start: offset where we should start selecting from
    """
    def __init__(self, num_samples, start = 0):
        self.num_samples = num_samples
        self.start = start

    def __iter__(self):
        return iter(range(self.start, self.start + self.num_samples))

    def __len__(self):
        return self.num_samples

data_path = '/home/put_data/frank840925/IDP/data'
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])

trainset = torchvision.datasets.CIFAR10(root=data_path, 
                                        train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, 
                                          num_workers=4)

valset = torchvision.datasets.CIFAR10(root=data_path, 
                                       train=False, download=True, transform=transform)
valloader = torch.utils.data.DataLoader(valset, batch_size=batch_size, shuffle=False, sampler=ChunkSampler(5000,0), 
                                        num_workers=4)

testset = torchvision.datasets.CIFAR10(root=data_path, 
                                       train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, sampler=ChunkSampler(5000,5000),
                                        num_workers=4)

print(trainset.__len__())
print(valset.__len__())
print(testset.__len__())

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
50000
10000
10000


In [5]:
# Helper functions
# functions to show an image
def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))

def plot_losses(loss_history1=None, loss_history2=None):
    plt.clf()
    fig = plt.figure()
    ax1 = fig.add_subplot(111)
    if loss_history1:
        ax1.plot(loss_history1, color="blue", label="train")
    if loss_history2:
        ax1.plot(loss_history2, color="green", label="val")
    #ax2 = ax1.twinx()
    #ax2.set_yscale('log')
    plt.xlabel("epoch") 
    plt.ylabel("loss") 
    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.title("Cross-entropy loss")
    #plt.savefig('output_losses.png')

def plot_accuracy(accuracy1=None, accuracy2=None):
    plt.clf()
    fig2 = plt.figure()
    ax1 = fig2.add_subplot(111)
    if accuracy1:
        ax1.plot(accuracy1, color="red", label="train")
    if accuracy2:
        ax1.plot(accuracy2, color="black", label="val")
    plt.xlabel("epoch") 
    plt.ylabel("accuracy")
    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.title("Train/Val accuracy") 
    #plt.savefig('accuracy.png')

# get some random training images
#dataiter = iter(trainloader)
#images, labels = dataiter.next()
#print(images.shape)

In [72]:
# self-defined modules(layers)
class Flatten(nn.Module):
    def forward(self, x):
        # read in N, C, H, W
        N, C, H, W = x.size()
        # flatten the C * H * W values into a single vector per image
        return x.view(N, -1)  
    
class idp_tensor(nn.Module):
    def __init__(self, idp):
        super(idp_tensor, self).__init__()
        self.idp = idp
    def forward(self, c):
        #input tensor c, size N*C*H*W, output with the same size, some channels zeroed according to idp
        N, C, H, W = c.size()
        non_zero_channel = int(C*(self.idp))
        zero_channels = C-non_zero_channel
        if zero_channels > 0:
            #zeros = Variable(torch.zeros(N, zero_channels, H, W)).type(dtype)
            zeros = Variable(torch.zeros(zero_channels).view(zero_channels,1)).type(dtype) #C
            zeros = zeros.expand(zero_channels, H) #C*H
            zeros = torch.stack([zeros]*W,1) #C*H*W
            zeros = torch.stack([zeros]*N) #N*C*H*W
            c = torch.cat([c[:, :non_zero_channel, :, :].clone(), zeros], 1).type(dtype)
            #c[:, non_zero_channel:, :, :] = zeros
            return c
        else:
            return c
        
class first_idp_tensor_3(nn.Module):
    def __init__(self, idp1=0.35, idp2=0.7, idp3=1):
        super(first_idp_tensor_3, self).__init__()
        self.idp1 = idp1
        self.idp2 = idp2
        self.idp3 = idp3
    def forward(self, c):
        #input the first conv-Relu-Linear output, N*C*H*W, replicate and apply idp and concat in first dim (N)
        N, C, H, W = c.size()
        non_zero_channel_1 = int(C*(self.idp1))
        non_zero_channel_2 = int(C*(self.idp2))
        non_zero_channel_3 = int(C*(self.idp3))
        if C-non_zero_channel_1 > 0:
            #zeros = Variable(torch.zeros(N, C-non_zero_channel_1, H, W)).type(dtype)
            zeros = Variable(torch.zeros(C-non_zero_channel_1).view(C-non_zero_channel_1,1)).type(dtype) #C
            zeros = zeros.expand(C-non_zero_channel_1, H) #C*H
            zeros = torch.stack([zeros]*W,1) #C*H*W
            zeros = torch.stack([zeros]*N) #N*C*H*W
            c1 = torch.cat([c[:, :non_zero_channel_1, :, :].clone(), zeros], 1).type(dtype)
        else:
            c1 = c
        if C-non_zero_channel_2 > 0:
            #zeros = Variable(torch.zeros(N, C-non_zero_channel_2, H, W)).type(dtype)
            zeros = Variable(torch.zeros(C-non_zero_channel_2).view(C-non_zero_channel_2,1)).type(dtype) #C
            zeros = zeros.expand(C-non_zero_channel_2, H) #C*H
            zeros = torch.stack([zeros]*W,1) #C*H*W
            zeros = torch.stack([zeros]*N) #N*C*H*W
            c2 = torch.cat([c[:, :non_zero_channel_2, :, :].clone(), zeros], 1).type(dtype)
        else:
            c2 = c
        if C-non_zero_channel_3 > 0:
            #zeros = Variable(torch.zeros(N, C-non_zero_channel_3, H, W)).type(dtype)
            zeros = Variable(torch.zeros(C-non_zero_channel_3).view(C-non_zero_channel_3,1)).type(dtype) #C
            zeros = zeros.expand(C-non_zero_channel_3, H) #C*H
            zeros = torch.stack([zeros]*W,1) #C*H*W
            zeros = torch.stack([zeros]*N) #N*C*H*W
            c3 = torch.cat([c[:, :non_zero_channel_3, :, :].clone(), zeros], 1).type(dtype)
        else:
            c3 = c
        out = torch.cat([c1, c2, c3], 0)
        return out
        
class middle_idp_tensor_3(nn.Module):
    def __init__(self, idp1=0.35, idp2=0.7, idp3=1):
        super(middle_idp_tensor_3, self).__init__()
        self.idp1 = idp1
        self.idp2 = idp2
        self.idp3 = idp3
    def forward(self, c):
        #input a middle conv-Relu-Linear output, (3*N)*C*H*W, apply IDP1, IDP2, IDP3 to each
        NN, C, H, W = c.size()
        if NN>=3:
            N = int(NN/3)
            non_zero_channel_1 = int(C*(self.idp1))
            non_zero_channel_2 = int(C*(self.idp2))
            non_zero_channel_3 = int(C*(self.idp3))
            c1 = c[:N,:,:,:]
            c2 = c[N:2*N,:,:,:]
            c3 = c[2*N:,:,:,:]
            if C-non_zero_channel_1 > 0:
                #zeros = Variable(torch.zeros(N, C-non_zero_channel_1, H, W)).type(dtype)
                zeros = Variable(torch.zeros(C-non_zero_channel_1).view(C-non_zero_channel_1,1)).type(dtype) #C
                zeros = zeros.expand(C-non_zero_channel_1, H) #C*H
                zeros = torch.stack([zeros]*W,1) #C*H*W
                zeros = torch.stack([zeros]*N) #N*C*H*W
                c1 = torch.cat([c1[:, :non_zero_channel_1, :, :].clone(), zeros], 1).type(dtype)
            else:
                c1 = c1
            if C-non_zero_channel_2 > 0:
                #zeros = Variable(torch.zeros(N, C-non_zero_channel_2, H, W)).type(dtype)
                zeros = Variable(torch.zeros(C-non_zero_channel_2).view(C-non_zero_channel_2,1)).type(dtype) #C
                zeros = zeros.expand(C-non_zero_channel_2, H) #C*H
                zeros = torch.stack([zeros]*W,1) #C*H*W
                zeros = torch.stack([zeros]*N) #N*C*H*W
                c2 = torch.cat([c2[:, :non_zero_channel_2, :, :].clone(), zeros], 1).type(dtype)
            else:
                c2 = c2
            if C-non_zero_channel_3 > 0:
                #zeros = Variable(torch.zeros(N, C-non_zero_channel_3, H, W)).type(dtype)
                zeros = Variable(torch.zeros(C-non_zero_channel_3).view(C-non_zero_channel_3,1)).type(dtype) #C
                zeros = zeros.expand(C-non_zero_channel_3, H) #C*H
                zeros = torch.stack([zeros]*W,1) #C*H*W
                zeros = torch.stack([zeros]*N) #N*C*H*W
                c3 = torch.cat([c3[:, :non_zero_channel_3, :, :].clone(), zeros], 1).type(dtype)
            else:
                c3 = c3
            out = torch.cat([c1, c2, c3],0)
            return out
        else:
            return c
        
class func_allone(nn.Module):
    def forward(self, x):
        #x is an input tensor, size N*C*H*W
        #for cnn, functions are applied to each filter
        N, C, H, W = x.size()
        x = torch.mul(x.clone(), Variable(torch.ones(N, C, H, W), requires_grad=False).type(dtype))
        return x
    
class func_allone_trainable(nn.Module):
    def channel_coeff(self, N, C, H, W):
        coeff_tensor = nn.Parameter(torch.ones(C).view(C, 1), requires_grad=True).type(dtype) #C
        coeff_tensor = coeff_tensor.expand(C, H) #C*H
        coeff_tensor = torch.stack([coeff_tensor]*W,1) #C*H*W
        coeff_tensor = torch.stack(([coeff_tensor]*N)) #N*C*H*W
        return coeff_tensor

    def forward(self, x):
        N, C, H, W = x.size()
        x = torch.mul(x.clone(), self.channel_coeff(N, C, H, W))
        return x

class func_linear(nn.Module):
    def __init__(self, k=1):
        super(func_linear, self).__init__()
        self.k = k
    def channel_coeff(self, N, C, H, W):
        coeff_list = list(map(lambda a: 1-(a/(C+1)), range(0,C)))
        coeff_list = [[c] for c in coeff_list]
        coeff = Variable(torch.Tensor(coeff_list)).type(dtype) #C
        coeff_tensor = coeff.expand(C,H) #C*H
        coeff_tensor = torch.stack([coeff_tensor]*W,1) #C*H*W
        coeff_tensor = torch.stack(([coeff_tensor]*N)) #N*C*H*W
        return coeff_tensor

    def forward(self, x):
        N, C, H, W = x.size()
        x = torch.mul(x.clone(), self.channel_coeff(N, C, H, W))
        return x
        
class func_harmonic(nn.Module):
    #perform element-wise multiplication to channels in x with coefficient k/n, n is channel index
    def __init__(self, k=1):
        super(func_harmonic, self).__init__()
        self.k = k
    def channel_coeff(self, N, C, H, W):
        #C is channels, return a list with corresponding index: [k, k/2,...]
        #returns a tensor with size N*C*H*W
        coeff = list(map(lambda a: self.k/a, range(1,C+1)))
        tensor_list = []
        for c in coeff:
            coeff_tensor = torch.ones(H, W)
            coeff_tensor = torch.mul(coeff_tensor, c)
            tensor_list.append(coeff_tensor)
        ct = torch.stack(tensor_list, 0)
        ct = torch.stack(([ct]*N))
        return ct
    
    def forward(self, x):
        #x is an input tensor, size N*C*H*W
        #for cnn, functions are applied to each filter
        N, C, H, W = x.size()
        x = torch.mul(x.clone(), Variable(self.channel_coeff(N, C, H, W), requires_grad=False).type(dtype))
        return x

In [73]:
class tesla_coef_idp_VGG_3branch(nn.Module):
    def __init__(self, idp_layers, model=models.vgg16(pretrained=True).type(dtype), idp1=0.35, idp2=0.7, idp3=1):
        #idp is 0~1, idp_layers is a set, specify which layer in features should apply idp
        #now idp_layers is 3,6,8,11,13,15,18,20,22,25,27,29
        super(tesla_coef_idp_VGG_3branch, self).__init__()
        self.idp1 = idp1
        self.idp2 = idp2
        self.idp3 = idp3
        self.idp_layers = idp_layers
        self.features = nn.Sequential(*(self.new_features_list(model)))
        print(len(list(self.features.parameters())))
        #self.features = self.new_features_list(model)
        self.classifier = nn.Sequential(nn.Linear(512, 512),
                              nn.ReLU(inplace=True),
                              nn.Dropout(0.5),
                              nn.Linear(512, 10))
        print(len(list(self.classifier.parameters())))
    
    def new_features_list(self, model):
        new_layers = nn.ModuleList()
        #create from pre-trained resnet
        for i, layer in enumerate(list(model.features.children())):
            if i ==1:
                new_layers.append(layer)
                new_layers.append(func_allone_trainable())
                new_layers.append(first_idp_tensor_3(self.idp1, self.idp2, self.idp3))
            elif i not in self.idp_layers:
                new_layers.append(layer)                
            else:
                new_layers.append(layer)
                new_layers.append(func_allone_trainable())
                new_layers.append(middle_idp_tensor_3(self.idp1, self.idp2, self.idp3))
        new_layers.append(Flatten())
        return new_layers        
    
    def forward(self, x):
        f = self.features(x)
        NN, M = f.size()
        N = int(NN/3)
        o1 = f[:N, :]
        o2 = f[N:2*N, :]
        o3 = f[2*N:,:]
        o1 = self.classifier(o1)
        o2 = self.classifier(o2)
        o3 = self.classifier(o3)
        return o1, o2, o3

In [74]:
# test the model by forward pass, output size
idp_layers = {3,6,8,11,13,15,18,20,22,25,27,29}
net = tesla_coef_idp_VGG_3branch(idp_layers, idp1=0.1, idp2=0.35, idp3=1).type(dtype)

x = Variable(torch.randn(32, 3, 32, 32)).type(dtype)
out1, out2, out3 = net(x)
print(out1.size())
print(out2.size())
print(out3.size())

26
4
torch.Size([32, 10])
torch.Size([32, 10])
torch.Size([32, 10])


In [75]:
for idx, m in enumerate(net.named_modules()):
     print(idx, '->', m)

0 -> ('', tesla_coef_idp_VGG_3branch (
  (features): Sequential (
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU (inplace)
    (2): func_allone_trainable (
    )
    (3): first_idp_tensor_3 (
    )
    (4): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU (inplace)
    (6): func_allone_trainable (
    )
    (7): middle_idp_tensor_3 (
    )
    (8): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (9): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (10): ReLU (inplace)
    (11): func_allone_trainable (
    )
    (12): middle_idp_tensor_3 (
    )
    (13): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU (inplace)
    (15): func_allone_trainable (
    )
    (16): middle_idp_tensor_3 (
    )
    (17): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (18): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (19): ReLU (i

In [56]:
print(len(list(net.parameters())))
for param in net.parameters():
    print(type(param.data), param.size())

30
<class 'torch.cuda.FloatTensor'> torch.Size([64, 3, 3, 3])
<class 'torch.cuda.FloatTensor'> torch.Size([64])
<class 'torch.cuda.FloatTensor'> torch.Size([64, 64, 3, 3])
<class 'torch.cuda.FloatTensor'> torch.Size([64])
<class 'torch.cuda.FloatTensor'> torch.Size([128, 64, 3, 3])
<class 'torch.cuda.FloatTensor'> torch.Size([128])
<class 'torch.cuda.FloatTensor'> torch.Size([128, 128, 3, 3])
<class 'torch.cuda.FloatTensor'> torch.Size([128])
<class 'torch.cuda.FloatTensor'> torch.Size([256, 128, 3, 3])
<class 'torch.cuda.FloatTensor'> torch.Size([256])
<class 'torch.cuda.FloatTensor'> torch.Size([256, 256, 3, 3])
<class 'torch.cuda.FloatTensor'> torch.Size([256])
<class 'torch.cuda.FloatTensor'> torch.Size([256, 256, 3, 3])
<class 'torch.cuda.FloatTensor'> torch.Size([256])
<class 'torch.cuda.FloatTensor'> torch.Size([512, 256, 3, 3])
<class 'torch.cuda.FloatTensor'> torch.Size([512])
<class 'torch.cuda.FloatTensor'> torch.Size([512, 512, 3, 3])
<class 'torch.cuda.FloatTensor'> torch.