In [42]:
from __future__ import print_function
import pickle 
import numpy as np
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
from torch.autograd import Function
from torch.legacy.nn import ParallelCriterion
import copy

In [47]:
import pickle
trainset_labeled = pickle.load(open("train_labeled.p", "rb"))
validset = pickle.load(open("validation.p", "rb"))
trainset_unlabeled = pickle.load(open("train_unlabeled.p", "rb"))
train_loader = torch.utils.data.DataLoader(trainset_labeled, batch_size=64, shuffle=True)
valid_loader = torch.utils.data.DataLoader(validset, batch_size=64, shuffle=True)
unlabeled_loader = torch.utils.data.DataLoader(trainset_unlabeled, batch_size=64, shuffle=True)

In [60]:
class Denoise(nn.Module):
    def __init__(self):
        super(Denoise, self).__init__()
        self.weight = nn.Parameter(torch.Tensor(1, 10))
        self.reset_parameter()
        
    def forward(self, input_n, input_u):

        dims = input_n.size()
        a1 = self.expand_var(dims, self.weight[0,0])
        a2 = self.expand_var(dims, self.weight[0,1])
        a3 = self.expand_var(dims, self.weight[0,2])
        a4 = self.expand_var(dims, self.weight[0,3])
        a5 = self.expand_var(dims, self.weight[0,4])
        a6 = self.expand_var(dims, self.weight[0,5])
        a7 = self.expand_var(dims, self.weight[0,6])
        a8 = self.expand_var(dims, self.weight[0,7])
        a9 = self.expand_var(dims, self.weight[0,8])
        a10 = self.expand_var(dims, self.weight[0,9])
        
        mu = Variable(torch.zeros(input_u.size()))
        nu = Variable(torch.zeros(input_u.size()))
        mu = a1 * torch.sigmoid(a2 * input_u + a3) + a4 * input_u + a5
        nu = a6 * torch.sigmoid(a7 * input_u + a8) + a9 * input_u + a10
        output = (input_n - mu) * nu + mu
        
        return output

    def reset_parameter(self):
        stdv = 0.1
        self.weight.data.uniform_(-stdv, stdv)  
        
    def expand_var(self, dims, weight_i):
        if len(dims) == 2:
            output = weight_i.unsqueeze(1).expand(dims)
        elif len(dims) == 4:
            output = weight_i.unsqueeze(1).unsqueeze(2).unsqueeze(3).expand(dims)      
        return output

        
class normalize(Function):
        
    def forward(self, z_pre, input_z):
        m = torch.mean(z_pre, 0)
        s = torch.std(z_pre, 0) + 1e-5
            
        dims = z_pre.size()
        print(s.size())
        print(z_pre.size())
        z_m = s.expand(dims)
        z_s = s.expand(dims)
            
        return (input_z - z_m) / z_s

        
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # Pre layer
        self.bn0 = nn.BatchNorm2d(1, affine = False)
        
        # First layer
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.bn1 = nn.BatchNorm2d(10, affine = False)
        self.bn1_n = nn.BatchNorm2d(10, affine = False)
        # Second layer
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.bn2 = nn.BatchNorm2d(20, affine = False)
        self.bn2_n = nn.BatchNorm2d(20, affine = False)
        #self.conv2_drop = nn.Dropout2d()
        # Third layer
        self.fc1 = nn.Linear(320, 50)
        self.bn3 = nn.BatchNorm1d(50, affine = False)
        self.bn3_n = nn.BatchNorm1d(50, affine = False)
        # Fourth layer
        self.fc2 = nn.Linear(50, 10)
        self.bn4 = nn.BatchNorm1d(10, affine = False)
        self.bn4_n = nn.BatchNorm1d(10, affine = False)
        
        # Denoise Path
        self.denoise4 = Denoise()
        # Fourth layer
        self.fc2_b = nn.Linear(10, 50)
        self.bn4_b1 = nn.BatchNorm1d(10, affine = False)
        self.bn4_b2 = nn.BatchNorm1d(10, affine = False)
        #self.normalize4 = normalize()
        # Third layer
        self.denoise3 = Denoise()
        self.fc1_b = nn.Linear(50, 320)
        self.bn3_b1 = nn.BatchNorm1d(50, affine = False)
        self.bn3_b2 = nn.BatchNorm1d(50, affine = False)
        #self.normalize3 = normalize()
        # Second layer
        self.denoise2 = Denoise()
        self.conv2t = nn.ConvTranspose2d(20, 10, kernel_size = 5)
        self.bn2_b1 = nn.BatchNorm2d(20, affine = False)
        self.bn2_b2 = nn.BatchNorm2d(20, affine = False)
        #self.normalize2 = normalize()
        # First layer
        self.denoise1 = Denoise()
        self.conv1t = nn.ConvTranspose2d(10, 1, kernel_size = 5)
        self.bn1_b1 = nn.BatchNorm2d(10, affine = False)
        self.bn1_b2 = nn.BatchNorm2d(10, affine = False)
        #self.normalize1 = normalize()
        self.denoise0 = Denoise()
        self.bn0_b1 = nn.BatchNorm2d(1, affine = False)
        self.bn0_b2 = nn.BatchNorm2d(1, affine = False)
        #self.normalize0 = normalize()
        # Loss Function for Unsupervised Learning
        #self.batch_ave_norm = BN()
    
    def forward(self, x, labeled = True):
        
        def batch_ave_norm(z_c, z_h, batch_size):
        # compute the l-2 norm of (z_hat - z)
            dims = z_c.size()
            temp = Variable(torch.zeros(1))
            for i in range(0, batch_size):
                if len(dims) == 2:
                    temp += torch.dist(z_c[i,:], z_h[i,:])   
                    layer_width = dims[-1]
                elif len(dims) == 3:
                    temp += torch.dist(z_c[i,:,:], z_h[i,:,:]) 
                    layer_width = torch.cumprod(torch.Tensor([dims[1], dims[2]]),0)[-1]
                elif len(dims) == 4:
                    temp += torch.dist(z_c[i,:,:,:], z_h[i,:,:,:])
                    layer_width = torch.cumprod(torch.Tensor([dims[1], dims[2], dims[3]]),0)[-1]  
            output = temp/(batch_size*layer_width)
            return output
        
        def normalize(z_pre, input_z):
            z = z_pre
            
            m = torch.mean(z_pre, 0)
            z_s = z_pre
            temp = torch.std(z_pre.data, 0) + 1e-5
            
            dims = z_pre.size()
            z_m = m.expand(dims)
            z_s.data = temp.expand(dims)
            return (input_z - z_m) / z_s
        
        def noise(x):
            n = torch.randn(x.size()) * 0.1
            return n
        
        batch_size = Variable(torch.Tensor([x.size()[0]]))
        batch_size = x.size()[0]
        ###### clean path ######
        ### Level 0
        h_c0 = self.bn0(x)
        ### Level 1
        x_c1 = F.max_pool2d(self.conv1(x), 2)
        z_c1 = self.bn1(x_c1)
        h_c1 = F.relu(z_c1)
        ### Level 2
        x_c2 = F.max_pool2d(self.conv2(h_c1), 2)
        z_c2 = self.bn2(x_c2)
        h_c2 = F.relu(z_c2)
        ### Level 3
        x_c3 = h_c2.view(-1, 320)
        x_c3 = self.fc1(x_c3)
        z_c3 = self.bn3(x_c3)
        h_c3 = F.relu(z_c3)
        ### Level 4
        x_c4 = self.fc2(h_c3)
        z_c4 = self.bn4(x_c4)
        h_c4 = F.relu(z_c4)
        
        
        ###### noise path ######
        ### Level 0
        h_n0 = copy.deepcopy(x)
        h_n0.data += noise(h_n0.data)
        ### Level 1
        x_n1, i_n1 = F.max_pool2d(self.conv1(h_n0), 2, return_indices = True)  # projection z_pre
        z_n1 = self.bn1_n(x_n1)                                                  # normalize
        z_n1.data += noise(x_n1.data)
        h_n1 = F.relu(z_n1)
        ### Level 2
        x_n2, i_n2 = F.max_pool2d(self.conv2(h_n1), 2, return_indices = True)
        z_n2 = self.bn2_n(x_n2)
        z_n2.data += noise(x_n2.data)
        h_n2 = F.relu(z_n2)
        ### Level 3
        x_n3 = h_n2.view(-1, 320)
        x_n3 = self.fc1(x_n3) 
        z_n3 = self.bn3_n(x_n3) 
        z_n3.data += noise(x_n3.data)
        h_n3 = F.relu(z_n3)
        ### Level 4
        x_n4 = self.fc2(h_n3)
        z_n4 = self.bn4_n(x_n4)
        z_n4.data += noise(x_n4.data)
        h_n4 = F.relu(z_n4) 
        
        ###### Decoder and denoising ######
        ### Level 4
        u_4 = h_n4
        u_4 = self.bn4_b1(u_4)
        z_d4 = self.denoise4(z_n4, u_4)
        #z_dn4 = normalize(x_c4, z_d4)   # "normalized" for loss function
        z_dn4 = self.bn4_b2(z_d4)
        ### Level 3
        #u_3 = self.bn3(self.fc2_b(u_4))
        u_3 = self.bn3_b1(self.fc2_b(z_d4))
        z_d3 = self.denoise3(z_n3, u_3)
        #z_dn3 = normalize(x_c3, z_d3)
        z_dn3 = self.bn3_b2(z_d3)
        ### Level 2
        #u_2 = self.fc1_b(u_3)
        u_2 = self.fc1_b(z_d3)
        dim_c2 = z_c2.size()
        u_2 = self.bn2_b1(u_2.view(dim_c2))
        z_d2 = self.denoise2(z_n2, u_2)
        #z_dn2 = normalize(x_c2, z_d2)
        z_dn2 = self.bn2_b2(z_d2)
        ### Level 1
        #u_1 = self.bn1(self.conv2t(F.max_unpool2d(u_2, i_n2, kernel_size = 2, stride = 2)))
        u_1 = self.bn1_b1(self.conv2t(F.max_unpool2d(z_d2, i_n2, kernel_size = 2, stride = 2)))
        z_d1 = self.denoise1(z_n1, u_1)
        #z_dn1 = normalize(x_c1, z_d1)
        z_dn1 = self.bn1_b2(z_d1)
        ### Level 0
        #u_0 = self.bn0(self.conv1t(F.max_unpool2d(u_1, i_n1, kernel_size = 2, stride = 2)))
        u_0 = self.bn0_b1(self.conv1t(F.max_unpool2d(z_d1, i_n1, kernel_size = 2, stride = 2)))
        z_d0 = self.denoise0(h_n0, u_0)
        #z_dn0 = normalize(h_c0, z_d0)
        z_dn0 = self.bn0_b2(z_d0)
        
        ### Unsupervised Loss
        C0 = batch_ave_norm(h_c0, z_dn0, batch_size)
        C1 = batch_ave_norm(z_c1, z_dn1, batch_size)
        C2 = batch_ave_norm(z_c2, z_dn2, batch_size)
        C3 = batch_ave_norm(z_c3, z_dn3, batch_size)
        C4 = batch_ave_norm(z_c4, z_dn4, batch_size)
        l0 = 10
        l1 = 5
        l2 = 5
        l3 = 5
        l4 = 10
        C_u = C0*l0 + C1*l1 + C2*l2 + C3*l3 + C4*l4 
        
        if labeled == True:
            C_d = F.log_softmax(h_n4)
        else:
            C_d = Variable(torch.zero(1))
            
        return C_d, C_u
        
model = Net()
model.parameters
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

In [61]:
def train(epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = Variable(data), Variable(target)
        optimizer.zero_grad()
            
        output, loss_u = model(data, labeled = True)
        loss_s = F.nll_loss(output, target)
        loss = loss_s + loss_u

        loss.backward()
            
        optimizer.step()
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data[0]))
               
    print("Train with unlabled data...")            
    for batch_idx, (data, target) in enumerate(unlabeled_loader):
        if batch_idx < 100:
            data = Variable(data)
            optimizer.zero_grad()
        
            output, loss = model(data, labeled = True)
            loss.backward()
            optimizer.step()
            if batch_idx % 10 == 0:
                #print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                #    epoch, batch_idx * len(data), len(train_loader.dataset),
                #    100. * batch_idx / len(train_loader), loss.data[0]))
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * len(data), len(unlabeled_loader.dataset),
                    100. * batch_idx / len(unlabeled_loader), loss.data[0]))
            
def test(epoch, valid_loader):
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in valid_loader:

        data, target = Variable(data, volatile = True), Variable(target)
        
        
        output, loss_u = model(data) 
        test_loss = test_loss + F.nll_loss(output, target).data[0] + float(loss_u.data.numpy()[0])
        
        pred = output.data.max(1)[1] # get the index of the max log-probability
        correct += pred.eq(target.data).cpu().sum()

    test_loss /= len(valid_loader) # loss function already averages over batch size
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(valid_loader.dataset),
        100. * correct / len(valid_loader.dataset)))       
        

In [65]:
for epoch in range(1,11):
    train(epoch)
    test(epoch,valid_loader)

Train with unlabled data...

Test set: Average loss: 0.8962, Accuracy: 9709/10000 (97%)

Train with unlabled data...

Test set: Average loss: 0.9124, Accuracy: 9708/10000 (97%)

Train with unlabled data...

Test set: Average loss: 0.9021, Accuracy: 9711/10000 (97%)

Train with unlabled data...

Test set: Average loss: 0.8985, Accuracy: 9712/10000 (97%)

Train with unlabled data...

Test set: Average loss: 0.9035, Accuracy: 9723/10000 (97%)

Train with unlabled data...

Test set: Average loss: 0.9033, Accuracy: 9714/10000 (97%)

Train with unlabled data...

Test set: Average loss: 0.9143, Accuracy: 9718/10000 (97%)

Train with unlabled data...

Test set: Average loss: 0.9007, Accuracy: 9720/10000 (97%)

Train with unlabled data...

Test set: Average loss: 0.9000, Accuracy: 9706/10000 (97%)

Train with unlabled data...

Test set: Average loss: 0.8948, Accuracy: 9721/10000 (97%)

