In [1]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
import math
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "7"
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np

In [2]:
#torch.cuda.set_device(0)
# specify dtype
use_cuda = torch.cuda.is_available()
if use_cuda:
    dtype = torch.cuda.FloatTensor
else:
    dtype = torch.FloatTensor
print(torch.cuda.device_count())
print(use_cuda)

1
True


In [3]:
# Hyperparameters
batch_size = 32
learning_rate = 1e-5
momentum = 0.9
num_epoch = 10
show_every = 2000

In [4]:
# Data
print('==> Preparing data..')
data_path = '/data/put_data/frank840925/IDP/data'
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])

trainset = torchvision.datasets.CIFAR10(root=data_path, 
                                        train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)

testset = torchvision.datasets.CIFAR10(root=data_path, 
                                       train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False)

print(trainset.__len__())
print(testset.__len__())

==> Preparing data..
Files already downloaded and verified
Files already downloaded and verified
50000
10000


In [5]:
# functions to show an image
def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))

def plot_losses(loss_history1=None, loss_history2=None):
    plt.clf()
    fig = plt.figure()
    ax1 = fig.add_subplot(111)
    if loss_history1:
        ax1.plot(loss_history1, color="blue", label="train")
    if loss_history2:
        ax1.plot(loss_history2, color="green", label="test")
    #ax2 = ax1.twinx()
    #ax2.set_yscale('log')
    plt.xlabel("epoch") 
    plt.ylabel("loss") 
    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.title("Cross-entropy loss")
    plt.savefig('output_losses.png')

def plot_accuracy(accuracy1=None, accuracy2=None):
    plt.clf()
    fig2 = plt.figure()
    ax1 = fig2.add_subplot(111)
    if accuracy1:
        ax1.plot(accuracy1, color="red", label="train")
    if accuracy2:
        ax1.plot(accuracy2, color="black", label="test")
    plt.xlabel("epoch") 
    plt.ylabel("accuracy")
    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.title("Train/Test accuracy") 
    plt.savefig('accuracy.png')

# get some random training images
dataiter = iter(trainloader)
images, labels = dataiter.next()
print(images.shape)

torch.Size([32, 3, 32, 32])


In [7]:
def set_zeros(c, idp_low, idp_high):
    N, C, H, W = c.size()
    if int(C*idp_low)!=0 and int(C*idp_high)!=C:
        print('case1')
        non_zeros = c[:,int(C*idp_low):int(C*idp_high), :,:].clone()
        low = Variable(torch.zeros(N, int(C*idp_low), H, W)).type(dtype)
        high = Variable(torch.zeros(N, C-int(C*idp_high), H, W)).type(dtype)
        c = torch.cat([low, non_zeros, high],1).type(dtype)
        return c
    elif int(C*idp_high)!=C:
        print('case2')
        non_zeros = c[:,int(C*idp_low):int(C*idp_high), :,:].clone()
        high = Variable(torch.zeros(N, C-int(C*idp_high), H, W)).type(dtype)
        c = torch.cat([non_zeros, high],1).type(dtype)
        return c
    elif int(C*idp_low)!=0:
        print('case3')
        non_zeros = c[:,int(C*idp_low):int(C*idp_high), :,:].clone()
        low = Variable(torch.zeros(N, int(C*idp_low), H, W)).type(dtype)
        c = torch.cat([low, non_zeros],1).type(dtype)
        return c
    else:
        return c

x = Variable(torch.randn(1,5,3,3)).type(dtype)
out = set_zeros(x, 0.5, 1)
print(out.size())


case3
torch.Size([1, 5, 3, 3])


In [8]:
class Flatten(nn.Module):
    def forward(self, x):
        # read in N, C, H, W
        N, C, H, W = x.size()
        # flatten the C * H * W values into a single vector per image
        return x.view(N, -1)  
    
class idp_tensor(nn.Module):
    def __init__(self, idp):
        super(idp_tensor, self).__init__()
        self.idp = idp
    def forward(self, c):
        #c is an input tensor, size N*C*H*W
        N, C, H, W = c.size()
        non_zero_channel = int(C*(self.idp))
        if C-non_zero_channel > 0:
            zeros = Variable(torch.zeros(N, C-non_zero_channel, H, W)).type(dtype)
            c = torch.cat([c[:, :non_zero_channel, :, :].clone(), zeros], 1).type(dtype)
            #c[:, non_zero_channel:, :, :] = zeros
            return c
        else:
            #idp is 1
            return c

class range_idp_tensor(nn.Module):
    #idp is 0~1
    def __init__(self, idp_low, idp_high):
        super(range_idp_tensor, self).__init__()
        self.idp_low = idp_low
        self.idp_high = idp_high
    
    def forward(self, c):
        #c is an input tensor, size N*C*H*W
        #set range C*idp_low:C*idp_high as nonzero
        N, C, H, W = c.size()
        if int(C*self.idp_low)!=0 and int(C*self.idp_high)!=C:
            non_zeros = c[:,int(C*self.idp_low):int(C*self.idp_high), :,:].clone()
            low = Variable(torch.zeros(N, int(C*self.idp_low), H, W)).type(dtype)
            high = Variable(torch.zeros(N, C-int(C*self.idp_high), H, W)).type(dtype)
            c = torch.cat([low, non_zeros, high],1).type(dtype)
            return c
        elif int(C*self.idp_high)!=C:
            non_zeros = c[:,int(C*self.idp_low):int(C*self.idp_high), :,:].clone()
            high = Variable(torch.zeros(N, C-int(C*self.idp_high), H, W)).type(dtype)
            c = torch.cat([non_zeros, high],1).type(dtype)
            return c
        elif int(C*self.idp_low)!=0:
            non_zeros = c[:,int(C*self.idp_low):int(C*self.idp_high), :,:].clone()
            low = Variable(torch.zeros(N, int(C*self.idp_low), H, W)).type(dtype)
            c = torch.cat([low, non_zeros],1).type(dtype)
            return c
        else:
            return c
        
class func_allone(nn.Module):
    def forward(self, x):
        #x is an input tensor, size N*C*H*W
        #for cnn, functions are applied to each filter
        N, C, H, W = x.size()
        x = torch.mul(x.clone(), Variable(torch.ones(N, C, H, W), requires_grad=False).type(dtype))
        return x

class func_linear(nn.Module):
    def __init__(self, k=1):
        super(func_linear, self).__init__()
        self.k = k
    def channel_coeff(self, N, C, H, W):
        #C is channels, return a list with corresponding index: [k, k/2,...]
        #returns a tensor with size N*C*H*W
        coeff = list(map(lambda a: 1-(a/(C+1)), range(0,C)))
        tensor_list = []
        for c in coeff:
            coeff_tensor = torch.ones(H, W)
            coeff_tensor = torch.mul(coeff_tensor, c)
            tensor_list.append(coeff_tensor)
        ct = torch.stack(tensor_list, 0)
        ct = torch.stack(([ct]*N))
        return ct
    
    def forward(self, x):
        N, C, H, W = x.size()
        x = torch.mul(x.clone(), Variable(self.channel_coeff(N, C, H, W), requires_grad=False).type(dtype))
        return x
        
class func_harmonic(nn.Module):
    #perform element-wise multiplication to channels in x with coefficient k/n, n is channel index
    def __init__(self, k=1):
        super(func_harmonic, self).__init__()
        self.k = k
    def channel_coeff(self, N, C, H, W):
        #C is channels, return a list with corresponding index: [k, k/2,...]
        #returns a tensor with size N*C*H*W
        coeff = list(map(lambda a: self.k/a, range(1,C+1)))
        tensor_list = []
        for c in coeff:
            coeff_tensor = torch.ones(H, W)
            coeff_tensor = torch.mul(coeff_tensor, c)
            tensor_list.append(coeff_tensor)
        ct = torch.stack(tensor_list, 0)
        ct = torch.stack(([ct]*N))
        return ct
    
    def forward(self, x):
        #x is an input tensor, size N*C*H*W
        #for cnn, functions are applied to each filter
        N, C, H, W = x.size()
        x = torch.mul(x.clone(), Variable(self.channel_coeff(N, C, H, W), requires_grad=False).type(dtype))
        return x

In [9]:
class tesla_coef_idp_VGG(nn.Module):
    def __init__(self, idp_low, idp_high, idp_layers, model):
        #idp is 0~1, idp_layers is a set, specify which layer in features should apply idp
        #now idp_layers is 1,3,6,8,11,13,15,18,20,22,25,27,29
        super(tesla_coef_idp_VGG, self).__init__()
        self.idp_low = idp_low
        self.idp_high = idp_high
        self.idp_layers = idp_layers
        self.model = model.type(dtype)
        self.features = nn.Sequential(*(self.new_features_list()))
        self.classifier = nn.Sequential(nn.Linear(512, 512),
                              nn.ReLU(inplace=True),
                              nn.Dropout(0.5),
                              nn.Linear(512, 10))
    
    def new_features_list(self):
        new_layers = []
        for i, layer in enumerate(list(self.model.features.children())):
            if i not in self.idp_layers:
                new_layers.append(layer)
            else:
                new_layers.append(layer)
                new_layers.append(func_linear())
                new_layers.append(range_idp_tensor(self.idp_low, self.idp_high))
        new_layers.append(Flatten())
        return new_layers        
    
    def forward(self, x):
        out = self.features(x)
        out = self.classifier(out)
        return out

In [10]:
#net = VGG16(idp=1).type(dtype)
idp = 1
idp_layers = {1,3,6,8,11,13,15,18,20,22,25,27,29}
net = tesla_coef_idp_VGG(0, 1,idp_layers, models.vgg16(pretrained=True)).type(dtype)

x = Variable(torch.randn(32, 3, 32, 32)).type(dtype)
out = net(x)
print(out.size())

torch.Size([32, 10])


In [11]:
# loss and optimizer
criterion = nn.CrossEntropyLoss()
#optimizer = optim.SGD([{'params': net.features.parameters()},
#                       {'params': net.classifier.parameters(), 'lr': 1e-3}], lr=3e-4, momentum=momentum)
optimizer = optim.SGD(net.parameters(), lr=4e-3, momentum=momentum)

In [12]:
train_loss = []
test_loss = []
train_acc = []
test_acc = []

# start training
for epoch in range(num_epoch):  # loop over the dataset multiple times
    running_loss1 = []
    running_loss2 = []
    running_correct1=[]
    running_correct2=[]
    for i, data in enumerate(trainloader, 0):
        correct = 0
        net.train(True)
        # get the inputs
        inputs, labels = data
        # wrap them in Variable
        inputs, labels = Variable(inputs).type(dtype), Variable(labels).type(torch.cuda.LongTensor)
        # zero the parameter gradients
        optimizer.zero_grad()
        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        # print statistics
        # loss
        running_loss1.append(loss.data[0])
        # accuracy
        _, predicted = torch.max(outputs.data, 1)
        correct += (predicted == labels.data).sum()
        correct = correct/labels.size(0)*100
        running_correct1.append(correct)
        
    for i, tdata in enumerate(testloader, 0):
        test_correct = 0
        net.train(False)
        tinputs, tlabels = tdata
        tinputs, tlabels = Variable(tinputs).type(dtype), Variable(tlabels).type(torch.cuda.LongTensor)
        toutput = net(tinputs)
        tloss = criterion(toutput, tlabels)
        running_loss2.append(tloss.data[0])
        _, tpredicted = torch.max(toutput.data, 1)
        test_correct += (tpredicted == tlabels.data).sum()
        test_correct = test_correct/tlabels.size(0)*100
        running_correct2.append(test_correct)
    
    train_loss.append(np.mean(running_loss1))
    test_loss.append(np.mean(running_loss2))
    train_acc.append(np.mean(running_correct1))
    test_acc.append(np.mean(running_correct2))
    
    #statistics
    print('Epoch [%d/%d], Loss: %.4f, Train Accuracy: %r percent' 
                %(epoch+1, num_epoch, train_loss[-1], train_acc[-1]))

print('Finished Training')

Epoch [1/10], Loss: 2.0975, Train Accuracy: 17.072536788227769 percent
Epoch [2/10], Loss: 1.2767, Train Accuracy: 50.195937300063981 percent
Epoch [3/10], Loss: 0.7714, Train Accuracy: 73.394513755598211 percent
Epoch [4/10], Loss: 0.5702, Train Accuracy: 81.02007357645553 percent
Epoch [5/10], Loss: 0.4422, Train Accuracy: 85.408669225847731 percent
Epoch [6/10], Loss: 0.3626, Train Accuracy: 88.099808061420347 percent
Epoch [7/10], Loss: 0.2939, Train Accuracy: 90.319097888675628 percent
Epoch [8/10], Loss: 0.2385, Train Accuracy: 92.130518234165066 percent
Epoch [9/10], Loss: 0.2095, Train Accuracy: 93.102207293666027 percent
Epoch [10/10], Loss: 0.1743, Train Accuracy: 94.241842610364685 percent
Finished Training


In [None]:
# Save the Model
torch.save(net.state_dict(), 'tesla_linear_idp_model.pkl')

In [None]:
plot_losses(train_loss, test_loss)

In [None]:
plot_accuracy(train_acc, test_acc)

In [None]:
# Load the trained model
net.load_state_dict(torch.load('tesla_linear_idp_model.pkl'))
# Set net in inference mode
net.eval()

In [None]:
def test_acccuracy(testloader, model):
    correct = 0
    total = 0
    for data in testloader:
        images, labels = data
        outputs = model(Variable(images, volatile=True).type(dtype))
        labels = Variable(labels, volatile=True).type(torch.cuda.LongTensor)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels.data).sum()
    acc = 100*correct/total
    print('Accuracy of the network on the 10000 test images: %d %%' % (acc))
    return acc

test_acccuracy(testloader, net)