Why semi-supervised learning?

https://stats.stackexchange.com/questions/321841/what-are-variational-autoencoders-and-to-what-learning-tasks-are-they-used

Ideal parameters:
$$ \min_{\boldsymbol{\lambda}}\mathcal{D}[p(\mathbf{z}\vert \mathbf{x})\vert\vert q(\mathbf{z}\vert \mathbf{x},\boldsymbol{\lambda})] $$

It should also minimize the reconstruction loss (and optional regularization terms, mainly L1 or L2)

VAE loss function:
$$ELBO(\boldsymbol{\lambda})= E_{q(\boldsymbol{z}\vert \mathbf{x},\boldsymbol{\lambda})}[\log p(\mathbf{x}\vert\boldsymbol{z})]-\mathcal{D}[(q(\boldsymbol{z}\vert \mathbf{x},\boldsymbol{\lambda})\vert\vert p(\boldsymbol{z})]$$

$$ q(\mathbf{z}\vert \mathbf{x},\boldsymbol{\lambda}) = \mathcal{N}(\mathbf{z}\vert\boldsymbol{\mu}(\mathbf{x}), \boldsymbol{\sigma}^2(\mathbf{x})I) $$

conditional distribution:
$$ p_{\boldsymbol{\phi}}(\mathbf{x}\vert\mathbf{z}) = \mathcal{N}(\mathbf{x}|\boldsymbol{\mu}(\mathbf{z}; \boldsymbol{\phi}), \boldsymbol{\sigma}(\mathbf{z}; \boldsymbol{\phi})^2I)$$


$$ ELBO(\boldsymbol{\theta},\boldsymbol{\phi})= \sum_i E_{q_{\boldsymbol{\theta}}(\boldsymbol{z}\vert \mathbf{x}_i,\boldsymbol{\lambda})}[\log p_{\boldsymbol{\phi}}(\mathbf{x}_i\vert\boldsymbol{z})]-\mathcal{D}[(q_{\boldsymbol{\theta}}(\boldsymbol{z}\vert \mathbf{x}_i,\boldsymbol{\lambda})\vert\vert p(\boldsymbol{z})] $$

In [1]:
# Import packages
import os
os.chdir("../..")
from data_preparation.GeoParser import GeoParser
from dimension_reduction.ordination import ordination2d
from sklearn.decomposition import PCA
from IPython.display import Image
import pandas as pd
import numpy as np


Import models

In [2]:
from models.semi_supervised.deep_generative_models.models.auxiliary_dgm import AuxiliaryDeepGenerativeModel
#from models.semi_supervised.deep_generative_models.models.ladder_dgm import LadderDeepGenerativeModel
from models.semi_supervised.deep_generative_models.models.dgm import DeepGenerativeModel
from utils.utils import dict_of_int_highest_elements, plot_evaluation



In [3]:
# files_destinations
home_path = "/home/simon/"
destination_folder = "annleukemia"
data_folder = "data"
results_folder = "results"
meta_destination_folder = "pandas_meta_df"

plots_folder_path = "/".join([home_path, destination_folder, results_folder, "plots/"])


In [4]:
#dataset_name = "gse33000_and_GSE24335_GSE44768_GSE44771_GSE44770"
dataset_name = "mnist"
activation = "relu"
#nrep = 3
betas=(0.9, 0.999)
vae_flavour = "o-sylvester"
early_stopping = 200
labels_per_class = 1000
n_epochs = 1000
warmup = 100
gt_input = 10000

# if ladder is yes builds a ladder vae. Do not combine with auxiliary (yet; might be possible and relatively 
# not too hard to implement, but might be overkill. Might be interesting too)
translate = "n" 


In [5]:
# Types of deep generative model

# Convolution neural network (convolutional VAE and convolutional classifier)
use_conv = False #Not applicable if not sequence (images, videos, sentences, DNA...)

# Ladder VAE (L-VAE)
ladder = False

# Auxiliary Variational Auto-Encoder (A-VAE)
auxiliary = True


In [6]:
# Load pre-computed vae (unsupervised learning)
load_vae = False



Definition of the hyper-parameters

In [7]:
lr = 1e-3
l1 = 0.
l2 = 0.
batch_size = 128
mc = 1 # seems to be a problem when mc > 1 for display only, results seem good
iw = 1 # seems to be a problem when iw > 1 for display only, results seem good

# Neurons layers
a_dim = 50
h_dims_classifier = [256]
h_dims = [128, 64]
z_dims = [50]

# number of flows
n_combinations = 20 #could be just 1 with number_of_flows?
number_of_flows = 4
num_elements = 2



In [8]:
# Files destinations
load_from_disk = True
load_merge = False
home_path = "/home/simon/"
destination_folder = "annleukemia"
data_folder = "data"
results_folder = "results"
meta_destination_folder = "pandas_meta_df"
plots_folder_path = "/".join([home_path, destination_folder, 
                              results_folder, "plots/"])

In [9]:
if auxiliary:
    dgm = AuxiliaryDeepGenerativeModel(vae_flavour, z_dims, h_dims, n_flows=number_of_flows,a_dim=a_dim,
                                       num_elements=num_elements, use_conv=False, is_hebb_layers=True,
                                       gt_input=gt_input)

    dgm.set_configs(home_path=home_path, results_folder=results_folder, data_folder=data_folder,
                    destination_folder=destination_folder, dataset_name=dataset_name, lr=lr,
                    meta_destination_folder="meta_pandas_dataframes", csv_filename="csv_loggers", 
                    is_unlabelled=True)

elif ladder:
    dgm = LadderDeepGenerativeModel(vae_flavour, z_dims, h_dims, n_flows=number_of_flows, auxiliary=False,
                                    is_hebb_layers=True, gt_input=gt_input)

    dgm.set_configs(home_path=home_path, results_folder=results_folder, data_folder=data_folder,
                    destination_folder=destination_folder, dataset_name=dataset_name, lr=lr,
                    meta_destination_folder="meta_pandas_dataframes", csv_filename="csv_loggers", 
                    is_unlabelled=True)
else:
    dgm = DeepGenerativeModel(vae_flavour, z_dims, h_dims, n_flows=number_of_flows, a_dim=0, auxiliary=False,
                              num_elements=num_elements, is_hebb_layers=False, gt_input=gt_input)

    dgm.set_configs(home_path=home_path, results_folder=results_folder, data_folder=data_folder,
                    destination_folder=destination_folder, dataset_name=dataset_name, lr=lr,
                    meta_destination_folder="meta_pandas_dataframes", csv_filename="csv_loggers", 
                    is_unlabelled=True)

a_dim (making sure it stays ok for ssl_vae) 0
self.a_dim 50




In [10]:
dgm.load_example_dataset(dataset="mnist", batch_size=batch_size, 
                         labels_per_class=labels_per_class, 
                         extra_class=True, unlabelled_train_ds=True, normalize=True, mu=0.1307, var=0.3081)

is_example = True
# GET ordination from this!
train = np.vstack([x[0].data.numpy() for x in dgm.x_train])
#unlabelled_train = np.vstack([x[0].data.numpy() for x in dgm.unlabelled_x_train])

targets = np.vstack([x[1].data.numpy() for x in dgm.x_train])
labels = [x.tolist().index(1) for x in targets]


self.train_loader 468
self.train_loader 128
self.train_loader <torch.utils.data.sampler.RandomSampler object at 0x7f87a562bb70>


In [11]:
meta_df = pd.DataFrame(train.transpose(), columns=labels)
#unlabeled_meta_df = pd.DataFrame(unlabelled_train.transpose())


In [12]:
#ordination2d(meta_df, epoch="pre", dataset_name=dataset_name, ord_type="pca",
#             images_folder_path=plots_folder_path)

In [13]:
#ordination2d(meta_df, epoch="pre", dataset_name=dataset_name, ord_type="lda",
#             images_folder_path=plots_folder_path)

In [14]:
subset = pd.DataFrame(train[0:1000].transpose(), columns=labels[0:1000])

In [15]:
# tSNE
#ordination2d(subset, epoch="pre", dataset_name=dataset_name, ord_type="tsne", 
#             images_folder_path=plots_folder_path)

In [16]:
print("train data shape", meta_df.shape)
#print("unlabelled meta_df shape", unlabeled_meta_df.shape)

train data shape (784, 60000)


In [17]:
#if meta_df is not None:
#    dgm.import_dataframe(meta_df, batch_size, labelled=True)
    #dgm.import_dataframe(unlabelled_meta_df, batch_size, labelled=False)

In [18]:
dgm.define_configurations(early_stopping=early_stopping, warmup=warmup, flavour=vae_flavour)
dgm.set_data(labels_per_class=labels_per_class, is_example=True, extra_class=True)

In [19]:
if auxiliary:
    if use_conv:
        dgm.set_conv_adgm_layers()
    else:
        dgm.set_adgm_layers(h_dims=h_dims_classifier, input_shape=[1, 28, 28])
elif ladder:
    dgm.set_ldgm_layers(hebb_layers=True, n_channels=1)
else:
    if use_conv:
        dgm.set_conv_dgm_layers(hebb_layers=True)
    else:
        print("MAIN DGM NS")
        dgm.set_dgm_layers(hebb_layers=True)

# import the M1 in the M1+M2 model (Kingma et al, 2014). Not sure if it still works... 
if load_vae:
    print("Importing the model: ", dgm.model_file_name)
    if use_conv:
        dgm.import_cvae()
    else:
        dgm.load_model()
    #dgm.set_dgm_layers_pretrained()
dgm.cuda()
# dgm.vae.generate_random(False, batch_size, z1_size, [1, 28, 28])
dgm.run(n_epochs, auxiliary, mc, iw, lambda1=l1, lambda2=l2, verbose=2, 
        show_progress=10, show_pca_train=10, show_lda_train=10, show_pca_generated=10, clip_grad=0, 
        is_input_pruning=True, start_pruning=3, show_lda_generated=10)


isotropic gaussian
self.input_size 784
self.input_size 784
Warmup on:  187200
Log file created:  logs/AuxiliaryDeepGenerativeModel_parameters.log
Log file created:  logs/AuxiliaryDeepGenerativeModel_involvment.log
Log file created:  logs/AuxiliaryDeepGenerativeModel.log
Labeled shape 468
Unlabeled shape 468
epoch 0
Progress: 0.21%

RuntimeError: size mismatch, m1: [128 x 784], m2: [795 x 128] at /pytorch/aten/src/THC/generic/THCTensorMathBlas.cu:249

References
Auto-Encoding Variational Bayes https://arxiv.org/abs/1312.6114
Semi-Supervised Learning with Deep Generative Models https://arxiv.org/abs/1406.5298
Ladder Variational Autoencoders https://arxiv.org/abs/1602.02282
Auxiliary Deep Generative Models    https://arxiv.org/abs/1602.05473
Sylvester Normalizing Flows for Variational Inference  https://arxiv.org/abs/1803.05649
Improving Variational Auto-Encoders using Householder Flow https://arxiv.org/abs/1611.09630
Variational Inference with Normalizing Flows https://arxiv.org/abs/1505.05770
Convex combination linear IAF and the Householder Flow, J.M. Tomczak & M. Welling https://jmtomczak.github.io/deebmed.html

Github Ressources:
    https://github.com/wohlert/semi-supervised-pytorch
    https://github.com/jmtomczak/vae_vpflows
    https://github.com/jmtomczak/vae_householder_flow