In [1]:
import torch
import numpy as np
import sys

import torch.nn as nn
import torch.nn.functional as F

from torch.nn import init

from torch.autograd import Variable

sys.path.append("../semi-supervised")

from functools import reduce
from operator import __or__
from torch.utils import data
from torch.utils.data.sampler import SubsetRandomSampler

from torchvision import datasets, transforms

from utils import onehot

In [2]:
torch.manual_seed(1337)
np.random.seed(1337)

cuda = torch.cuda.is_available()
print("CUDA: {}".format(cuda))

def binary_cross_entropy(r, x):
    "Drop in replacement until PyTorch adds `reduce` keyword."
    return -torch.sum(x * torch.log(r + 1e-8) + (1 - x) * torch.log(1 - r + 1e-8), dim=-1)

n_labels = 1000

CUDA: True


In [3]:
# data
transformations = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225),
                         inplace=True)
])

dataset1 = datasets.ImageFolder('/scratch/apo249/ssl_data_96/supervised/train/',
                                transform=transformations)

dataset2 = datasets.ImageFolder('/scratch/apo249/ssl_data_96/unsupervised/',
                                transform=transformations)

dataset3 = datasets.ImageFolder('/scratch/apo249/ssl_data_96/supervised/val/',
                                transform=transformations)


#dataset = data.ConcatDataset((dataset1, dataset2))

#train_ratio = 0.9
#train_set_size = int(train_ratio * len(dataset))
#val_set_size = len(dataset) - train_set_size

#train_data, val_data = data.random_split(dataset,
#                                         (train_set_size, val_set_size))

train_lab_loader = data.DataLoader(dataset1, batch_size=128, shuffle=True)
train_unlab_loader = data.DataLoader(dataset2, batch_size=128, shuffle=True)
val_loader = data.DataLoader(dataset3, batch_size=128, shuffle=False)


In [4]:
print(len(train_lab_loader))
print(len(train_unlab_loader))
print(len(val_loader))

500
4000
500


In [5]:
from layers import GaussianSample, GaussianMerge, GumbelSoftmax
from inference import log_gaussian, log_standard_gaussian

class Encoder(nn.Module):
    def __init__(self, dims, sample_layer=GaussianSample):
        """
        Inference network

        Attempts to infer the probability distribution
        p(z|x) from the data by fitting a variational
        distribution q_φ(z|x). Returns the two parameters
        of the distribution (µ, log σ²).

        :param dims: dimensions of the networks
           given by the number of neurons on the form
           [input_dim, [hidden_dims], latent_dim].
        """
        super(Encoder, self).__init__()

        [x_dim, h_dim, z_dim] = dims
        neurons = [x_dim, *h_dim]
        linear_layers = [nn.Linear(neurons[i-1], neurons[i]) for i in range(1, len(neurons))]

        self.hidden = nn.ModuleList(linear_layers)
        self.sample = sample_layer(h_dim[-1], z_dim)

    def forward(self, x):
        for layer in self.hidden:
            x = F.relu(layer(x))
        return self.sample(x)


class Decoder(nn.Module):
    def __init__(self, dims):
        """
        Generative network

        Generates samples from the original distribution
        p(x) by transforming a latent representation, e.g.
        by finding p_θ(x|z).

        :param dims: dimensions of the networks
            given by the number of neurons on the form
            [latent_dim, [hidden_dims], input_dim].
        """
        super(Decoder, self).__init__()

        [z_dim, h_dim, x_dim] = dims

        neurons = [z_dim, *h_dim]
        linear_layers = [nn.Linear(neurons[i-1], neurons[i]) for i in range(1, len(neurons))]
        self.hidden = nn.ModuleList(linear_layers)

        self.reconstruction = nn.Linear(h_dim[-1], x_dim)

        self.output_activation = nn.Sigmoid()

    def forward(self, x):
        for layer in self.hidden:
            x = F.relu(layer(x))
        return self.output_activation(self.reconstruction(x))


class Classifier(nn.Module):
    def __init__(self, dims):
        """
        Single hidden layer classifier
        with softmax output.
        """
        super(Classifier, self).__init__()
        [x_dim, h_dim, y_dim] = dims
        self.dense = nn.Linear(x_dim, h_dim)
        self.logits = nn.Linear(h_dim, y_dim)

    def forward(self, x):
        x = F.relu(self.dense(x))
        x = F.softmax(self.logits(x), dim=-1)
        return x

In [6]:
class DeepGenerativeModel(VariationalAutoencoder):
    def __init__(self, dims):
        """
        M2 code replication from the paper
        'Semi-Supervised Learning with Deep Generative Models'
        (Kingma 2014) in PyTorch.

        The "Generative semi-supervised model" is a probabilistic
        model that incorporates label information in both
        inference and generation.

        Initialise a new generative model
        :param dims: dimensions of x, y, z and hidden layers.
        """
        [x_dim, self.y_dim, z_dim, h_dim] = dims
        super(DeepGenerativeModel, self).__init__([x_dim, z_dim, h_dim])

        self.encoder = Encoder([x_dim + self.y_dim, h_dim, z_dim])
        self.decoder = Decoder([z_dim + self.y_dim, list(reversed(h_dim)), x_dim])
        self.classifier = Classifier([x_dim, h_dim[0], self.y_dim])

        for m in self.modules():
            if isinstance(m, nn.Linear):
                init.xavier_normal(m.weight.data)
                if m.bias is not None:
                    m.bias.data.zero_()

    def forward(self, x, y):
        # Add label and data and generate latent variable
        z, z_mu, z_log_var = self.encoder(torch.cat([x, y], dim=1))

        self.kl_divergence = self._kld(z, (z_mu, z_log_var))

        # Reconstruct data point from latent data and label
        x_mu = self.decoder(torch.cat([z, y], dim=1))

        return x_mu

    def classify(self, x):
        logits = self.classifier(x)
        return logits

    def sample(self, z, y):
        """
        Samples from the Decoder to generate an x.
        :param z: latent normal variable
        :param y: label (one-hot encoded)
        :return: x
        """
        y = y.float()
        x = self.decoder(torch.cat([z, y], dim=1))
        return x

NameError: name 'VariationalAutoencoder' is not defined

In [14]:
if __name__ == "__main__":
    from itertools import repeat, cycle
    from torch.autograd import Variable
    from inference import SVI, DeterministicWarmup, ImportanceWeightedSampler

    #labelled, unlabelled, validation = get_imgnet(location="./", batch_size=100, labels_per_class=10)
    labelled = train_lab_loader
    unlabelled = train_unlab_loader
    validation = val_loader
    
    alpha = 0.1 * len(unlabelled) / len(labelled)

    models = []

    # Kingma 2014, M2 model. Reported: 88%, achieved: ??%
    from models import DeepGenerativeModel
    models += [DeepGenerativeModel([96*96, n_labels, 50, [600, 600]])]

    # Maaløe 2016, ADGM model. Reported: 99.4%, achieved: ??%
    # from models import AuxiliaryDeepGenerativeModel
    # models += [AuxiliaryDeepGenerativeModel([784, n_labels, 100, 100, [500, 500]])]

    # from models import LadderDeepGenerativeModel
    # models += [LadderDeepGenerativeModel([784, n_labels, [32, 16, 8], [128, 128, 128]])]

    for model in models:
        if cuda: model = model.cuda()

        beta = DeterministicWarmup(n=4*len(unlabelled)*100)
        sampler = ImportanceWeightedSampler(mc=1, iw=1)

        elbo = SVI(model, likelihood=binary_cross_entropy, beta=beta, sampler=sampler)
        optimizer = torch.optim.Adam(model.parameters(), lr=3e-4, betas=(0.9, 0.999))

        epochs = 251
        best = 0.0

        file = open(model.__class__.__name__ + ".log", 'w+')

        for epoch in range(epochs):
            model.train()
            total_loss, labelled_loss, unlabelled_loss, accuracy = (0, 0, 0, 0)
            for (x, y), (u, _) in zip(cycle(labelled), unlabelled):
                # Wrap in variables
                x, y, u = Variable(x), Variable(y), Variable(u)

                if cuda:
                    # They need to be on the same device and be synchronized.
                    x, y = x.cuda(device=0), y.cuda(device=0)
                    u = u.cuda(device=0)

                L = -elbo(x, y)
                U = -elbo(u)

                # Add auxiliary classification loss q(y|x)
                logits = model.classify(x)
                classication_loss = torch.sum(y * torch.log(logits + 1e-8), dim=1).mean()

                J_alpha = L - alpha * classication_loss + U

                J_alpha.backward()
                optimizer.step()
                optimizer.zero_grad()

                total_loss += J_alpha.data.item()
                labelled_loss += L.data.item()
                unlabelled_loss += U.data.item()

                _, pred_idx = torch.max(logits, 1)
                _, lab_idx = torch.max(y, 1)
                accuracy += torch.mean((pred_idx.data == lab_idx.data).float())

            m = len(unlabelled)
            print(*(total_loss / m, labelled_loss / m, unlabelled_loss / m, accuracy / m), sep="\t", file=file)

            if epoch % 1 == 0:
                model.eval()
                print("Epoch: {}".format(epoch))
                print("[Train]\t\t J_a: {:.2f}, L: {:.2f}, U: {:.2f}, accuracy: {:.2f}".format(total_loss / m,
                                                                                              labelled_loss / m,
                                                                                              unlabelled_loss / m,
                                                                                              accuracy / m))

                total_loss, labelled_loss, unlabelled_loss, accuracy = (0, 0, 0, 0)
                for x, y in validation:
                    x, y = Variable(x), Variable(y)

                    if cuda:
                        x, y = x.cuda(device=0), y.cuda(device=0)

                    L = -elbo(x, y)
                    U = -elbo(x)

                    logits = model.classify(x)
                    classication_loss = -torch.sum(y * torch.log(logits + 1e-8), dim=1).mean()

                    J_alpha = L + alpha * classication_loss + U

                    total_loss += J_alpha.data.item()
                    labelled_loss += L.data.item()
                    unlabelled_loss += U.data.item()

                    _, pred_idx = torch.max(logits, 1)
                    _, lab_idx = torch.max(y, 1)
                    accuracy += torch.mean((pred_idx.data == lab_idx.data).float())

                m = len(validation)
                print(*(total_loss / m, labelled_loss / m, unlabelled_loss / m, accuracy / m), sep="\t", file=file)
                print("[Validation]\t J_a: {:.2f}, L: {:.2f}, U: {:.2f}, accuracy: {:.2f}".format(total_loss / m,
                                                                                              labelled_loss / m,
                                                                                              unlabelled_loss / m,
                                                                                              accuracy / m))

            if accuracy > best:
                best = accuracy
                torch.save(model, '{}.pt'.format(model.__class__.__name__))

  init.xavier_normal(m.weight.data)
  init.xavier_normal(m.weight.data)


RuntimeError: Number of dimensions of repeat dims can not be smaller than number of dimensions of tensor