In [9]:
from __future__ import print_function
import pickle 
import numpy as np
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
#from torchvision import datasets, transforms
from torch.autograd import Variable

In [2]:
trainset_labeled = pickle.load(open("train_labeled.p", "rb")) 
train_loader  = torch.utils.data.DataLoader(trainset_labeled, batch_size=64, shuffle=True, num_workers=2)

validset = pickle.load(open("validation.p", "rb"))
valid_loader = torch.utils.data.DataLoader(validset, batch_size=64, shuffle=True, num_workers=2)

Files already downloaded
3000
750


In [3]:
#utility functions
def rand(x, level = 1e-3):
    return x + torch.randn(x.size()) * level

def norm_weights_2d(size):
    return BatchNorm2d(nn.Linear(size))
def norm_weights_1d(size):
    return BatchNorm2d(nn.Linear(size))

def reluN(x, level = 1e-3):
    y = x + torch.randn(x.size()) * level

In [4]:
def norm(size, dim = 1):
    if dim == 1:
        return torch.nn.BatchNorm1d(size)
    else:
        return torch.nn.BatchNorm2d(size)
def linear(indim, outdim):
    return nn.Linear(indim, outdim)
def var(tens):
    # The whole torch.nn module uses Parameter objects, which are essentially wrappers
    # of autograd Variables.
    return torch.nn.Parameter(tens)
def randn(size):
    return torch.randn(size)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.layers = [1000, 600, 300, 10]
        layers = self.layers
        self.encs = [28*28, layers[0]]
        self.lats = []
        self.encnorms = []
        self.decs = []
        self.decnorms = []
        
        def _add_module_to_list(name, module, module_list):
            self.__setattr__(name, module)
            module_list.append(module)

        for idx, l in enumerate(layers[:-1]):
            # When setting any attribute in a Module, PyTorch will try to figure out
            # what kind of value it is: if it is a Parameter or a Module, PyTorch
            # will automatically add the Parameter or Module into the parameter/module
            # list.  I'm not sure if this is a good pattern in Python because adding
            # modules invisibly like this would cause problems if one wants to maintain
            # modules in list attributes for dynamicity (like here).  I would rather
            # require the developers to always explicitly add modules with "add_module()"
            # (I'm more convinced to avoid deriving from Module in general).
            _add_module_to_list('_lat%d' % idx, linear(l, l), self.lats)
            _add_module_to_list('_encnorm%d' % idx, norm(l), self.encnorms)
            _add_module_to_list('_enc%d' % idx, linear(l, layers[idx+1]), self.encs)
            _add_module_to_list('_dec%d' % idx, linear(layers[idx+1], l), self.decs)
            _add_module_to_list('_decnorm%d' % idx, norm(l), self.decnorms)
        self.batch_size = 64
        self.weights = [self.encs, self.lats, self.decs]
        

    def forward(self, x, v = 0, noise=1e-3):
        self.eps = noise
        self.batch_size = x.size()[0]
        bs = self.batch_size
        #enc= F.relu(self.enc4(F.relu(self.enc3(F.relu(self.enc2(F.relu(self.enc1(x))))))))
        corrupted = []
        corruptedout = []
        for idx, l in enumerate(self.layers):
            if idx == 0:
                corrupted.append(self.encnorms[l](self.encs[l](x)))
            else:
                corrupted.append(self.encnorms[l](self.encs[l](corruptedout[-1])))
            corruptedout.append(F.relu(self.encs[l] + self.encnorms[l].weight.unsqueeze(0).expand(
                    bs, l) * var(randn(corrupted[-1].size()) * self.eps) +
                    self.encnorms[0].bias.unsqueeze(0).expand(bs, l)))
            
        encout = F.softmax(corruptedout[-1])
        
        clean = [x]
        for norm, enc in zip(self.encnorms, self.encs):
            clean.append(F.relu(norm(enc(clean[-1]))))
        
        decout = [encout]
        decin = []
        for idx in range(len(self.layers) - 1, -1, -1):
            dec = self.decs[idx]
            lat = self.lats[idx]
            decnorm = self.decnorms[idx+1]
            decin.append(dec(decout[-1]) + lat(corruptedout[idx]))
            decout.append(F.relu(decin[-1] + decnorm.weight.unsqueeze(0).expand(bs, self.layers[idx]) *
                    var(randn(decin[-1].size()) * self.eps) + \
                    decnorm.bias.unsqueeze(0).expend(bs, self.layers[idx])))
        weight_reg = 0
        for w_list in self.weights:
            for w in w_list:
                weight_reg += (w.weight**2).mean()/100
        
        yhat = F.log_softmax(corruptedout[-1])
        
        encode_err = 0
        enc_weight = 5
        enc_decay = .2
        for c, d in zip(clean[-2::-1], decin):
            encode_err += enc_weight * ((c - d)**2).mean()
        return yhat, weight_reg + encode_err
        

model = Net()
params = list(model.parameters())

if 1:
    print(model)
    print('Models has {} learnable paramater:'.format(len(params)))
    [print('parameter {} has a size of {}'.format(i+1, params[i].size())) for i in range(len(params))]

Net (
  (_lat0): Linear (1000 -> 1000)
  (_encnorm0): BatchNorm1d(1000, eps=1e-05, momentum=0.1, affine=True)
  (_enc0): Linear (1000 -> 600)
  (_dec0): Linear (600 -> 1000)
  (_decnorm0): BatchNorm1d(1000, eps=1e-05, momentum=0.1, affine=True)
  (_lat1): Linear (600 -> 600)
  (_encnorm1): BatchNorm1d(600, eps=1e-05, momentum=0.1, affine=True)
  (_enc1): Linear (600 -> 300)
  (_dec1): Linear (300 -> 600)
  (_decnorm1): BatchNorm1d(600, eps=1e-05, momentum=0.1, affine=True)
  (_lat2): Linear (300 -> 300)
  (_encnorm2): BatchNorm1d(300, eps=1e-05, momentum=0.1, affine=True)
  (_enc2): Linear (300 -> 10)
  (_dec2): Linear (10 -> 300)
  (_decnorm2): BatchNorm1d(300, eps=1e-05, momentum=0.1, affine=True)
)
Models has 30 learnable paramater:
parameter 1 has a size of torch.Size([1000, 1000])
parameter 2 has a size of torch.Size([1000])
parameter 3 has a size of torch.Size([1000])
parameter 4 has a size of torch.Size([1000])
parameter 5 has a size of torch.Size([600, 1000])
parameter 6 has a 

In [5]:
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

In [10]:
def train(epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = Variable(data), Variable(target) # Wrap them in Variable 
        optimizer.zero_grad() # Zero the parameter gradients
        data = data.view(data.size()[0], 28*28)
        outputs = model(data,noise=1e-2) # Forward 
        output = outputs[0]
        US = outputs[1]
        loss = F.nll_loss(output, target) + US*.1
        loss.backward() 
        optimizer.step()
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data[0]))

In [11]:
def test(epoch, valid_loader):
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in valid_loader:

        data, target = Variable(data, volatile=True), Variable(target)
        data = data.view(data.size()[0], 28*28)
        outputs = model(data, noise = 0)
        output = outputs[0]
        test_loss += F.nll_loss(output, target).data[0]
        pred = output.data.max(1)[1] # get the index of the max log-probability
        correct += pred.eq(target.data).cpu().sum()

    test_loss /= len(valid_loader) # loss function already averages over batch size
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(valid_loader.dataset),
        100. * correct / len(valid_loader.dataset)))

In [12]:
for epoch in range(1, 30):
    train(epoch)
    test(epoch, valid_loader)

IndexError: list index out of range