In [1]:
import sys
assert sys.version_info >= (3, 5)
import tensorflow as tf
from tensorflow import keras
assert tf.__version__ >= "2.0"
import numpy as np
import time
K = keras.backend
import pandas as pd
import math
import os
from sklearn.metrics import r2_score
from scipy.stats import uniform,randint
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats
from scipy.stats import norm
from sklearn.metrics import silhouette_score
from scipy.cluster.hierarchy import linkage, cophenet


tf.random.set_seed(42)
np.random.seed(42)
random_state=42

In [2]:
save_path = 'save_path'
os.chdir(save_path)

X_train_omics_unlabelled = pd.read_csv("X_train_omics_unlabelled.csv",index_col=0)
X_train_omics_labelled = pd.read_csv("X_train_omics_labelled.csv",index_col=0)
X_test_omics= pd.read_csv("X_test_omics.csv",index_col=0)
X_valid_omics= pd.read_csv("X_valid_omics.csv",index_col=0)
features = np.load("feature_selection.npy",allow_pickle=True)

train_set_labelled_y= pd.read_csv("train_set_labelled_y.csv",index_col=0)
test_set_labelled_y= pd.read_csv("test_set_labelled_y.csv",index_col=0)
valid_set_labelled_y= pd.read_csv("valid_set_labelled_y.csv",index_col=0)

X_train_omics_unlabelled = X_train_omics_unlabelled[features]
X_train_omics_labelled = X_train_omics_labelled[features]
X_test_omics = X_test_omics[features]
X_valid_omics = X_valid_omics[features]

train_set_labelled_c= pd.read_csv("train_set_labelled_c.csv",index_col=0)
train_set_unlabelled_c= pd.read_csv("train_set_unlabelled_c.csv",index_col=0)
test_set_labelled_c= pd.read_csv("test_set_labelled_c.csv",index_col=0)
valid_set_labelled_c= pd.read_csv("valid_set_labelled_c.csv",index_col=0)

#bin y 
from sklearn.preprocessing import KBinsDiscretizer
binner = KBinsDiscretizer(n_bins=10,encode="onehot-dense",strategy="uniform")
train_set_labelled_y = binner.fit_transform(train_set_labelled_y)
valid_set_labelled_y = binner.transform(valid_set_labelled_y)
test_set_labelled_y=binner.transform(test_set_labelled_y)

In [3]:
save_model_path = 'save_model_path'
os.chdir(save_model_path)


In [4]:
input_shape = X_train_omics_labelled.shape[1]

# Custom parts # 

## Helpful functions ##

In [5]:
def validation_log_lik_sampling(y_val,x_val,variational_decoder,codings_size,samples=200):
    """
    Samples a value of z for the expectation, and calculates something proportional to loglikelihood.
    
    The more samples of z, the better the MC approximation to loglik, but the longer it takes to compute.
    
    This is how we do our evaluation on the validation and also test set. 
    
    We look at the ability to generate x given y i.e. loglik(x|y)"""
    
    x_val_len = len(x_val)
    expectation = 0
    for i in range(samples):
        z = np.random.normal(loc=0,scale=1,size=codings_size*x_val_len).reshape(x_val_len,codings_size)
        x_pred = variational_decoder([z,y_val])
        diff = (x_val-x_pred)**2
        pdf = K.sum(diff,axis=-1)
        pdf = K.exp(-pdf)
        expectation += pdf 
    expectation = expectation / samples
    lik = tf.math.log(expectation)
    lik = K.mean(lik)    
    return lik


def create_batch(x_label, y_label, x_unlabel, batch_s=32):
    '''
    Creates batches of labelled and unlabelled data. The total number of points in both batches is equal to batch_s.
    Thanks to Omer Nivron for help with this.
    
    '''
    proportion_labelled = x_label.shape[0]/(x_label.shape[0] + x_unlabel.shape[0])
    
    shape_label = x_label.shape[0]
    label_per_batch = int(np.ceil(proportion_labelled*batch_s))
    batch_idx_la = np.random.choice(list(range(shape_label)), label_per_batch)
    batch_x_la = (x_label.iloc[batch_idx_la, :])
    batch_y_la = (y_label[batch_idx_la,:])

    
    shape_unlabel = x_unlabel.shape[0]
    unlabel_per_batch = batch_s - label_per_batch
    batch_idx_un = np.random.choice(list(range(shape_unlabel)), unlabel_per_batch)
    batch_x_un = (x_unlabel.iloc[batch_idx_un, :])
    
    del batch_idx_la,batch_idx_un
            
    return batch_x_la, batch_y_la, batch_x_un


def progress_bar(iteration, total, size=30):
    """Progress bar for training"""
    running = iteration < total
    c = ">" if running else "="
    p = (size - 1) * iteration // total
    fmt = "{{:-{}d}}/{{}} [{{}}]".format(len(str(total)))
    params = [iteration, total, "=" * p + c + "." * (size - p - 1)]
    return fmt.format(*params)

def print_status_bar(iteration, total, loss, metrics=None, size=30):
    """Status bar for training"""
    metrics = " - ".join(["Loss for batch: {:.4f}".format(loss)])
    end = "" if iteration < total else "\n"
    print("\r{} - {}".format(progress_bar(iteration, total), metrics), end=end)
    
def print_status_bar_epoch(iteration, total, training_loss_for_epoch,val_loss, metrics=None, size=30):
    """Status bar for training (end of epoch)"""
    metrics = " - ".join(
        ["trainLoss: {:.4f}  Val_loss: {:.4f} ".format(
            training_loss_for_epoch,val_loss)]
    )
    end = "" if iteration < total else "\n"
    print("\r{} - {}".format(progress_bar(iteration, total), metrics), end=end)
    
    
def list_average(list_of_loss):
    return sum(list_of_loss)/len(list_of_loss)


def y_pred_loss(y_in):
    """Calculates loss and true y distribution given some y data.
    
    When the model calculates this it does it in batches (unlike this function which can take the whole data in).
    
    Therefore the model's learned distribution will probably not be as good as what is learnt when using the whole
    dataset. But that is one of the things that happens if we use mini-batch gradient descent.
    
    """
    y_distribution = (K.sum(y_in,axis=0) / len(y_in))
    loss = tf.reduce_mean(keras.losses.categorical_crossentropy(y_in,y_distribution))
    return loss,y_distribution 


def rounded_accuracy(y_true,y_pred):
    """
    Calculates accuracy of classification predictions.
    
    For the 10D p vector which is y_pred, it sets the highest number to 1 and the rest to 0.
    
    It then computes accuracy.
    """
    from sklearn.metrics import accuracy_score
    b = np.zeros_like(y_pred)
    b[np.arange(len(y_pred)),y_pred.argmax(1)] = 1
    return accuracy_score(y_true,b)

## Model ##

### Custom components ###

In [6]:
class Sampling(keras.layers.Layer):
    """reparameterization trick"""
    def call(self, inputs):
        mean, log_var = inputs
        return K.random_normal(tf.shape(log_var)) * K.exp(log_var/2) + mean
    
    
    
    
class y_dist(keras.layers.Layer):
    """
    Custom layer that is used to learn the parameters, p, of the distribution over y.
    
    Outputs a loss and p. The loss is used for training. The loss is the categorical cross entropy loss between 
    p and every y sample. The mean of this is then taken to provide a per batch loss. 
    
    Shapes are configured for a 10D y. Change if you want to use different number of categories.
    
    """
    def __init__(self,**kwargs):
        super().__init__(**kwargs)
        
    def build(self,batch_input_shape):
        self.q = self.add_weight(name="q",shape=[1,9],initializer="uniform",trainable=True)
        super().build(batch_input_shape)
    
    def call(self,X):
        concatenated = tf.concat([self.q,tf.constant(np.array(0.0).reshape(1,-1),dtype="float32")],axis=-1)
        p = K.exp(concatenated)
        p = tf.math.divide(p,K.sum(p))
        loss = keras.losses.categorical_crossentropy(X,p)
        loss = tf.reduce_mean(loss)
        return loss,p 
    
    def compute_output_shape(self,batch_input_shape):
        return tf.TensorShape(10)
    
    
    
class FullModel_MMD(keras.models.Model):
    """
    This is the full model. For MMD. This is used for training purposes.
    
    It requires an encoder, decoder, classifier and y_distribution model to be already defined (as can be done with 
    the build_model function).
    
    It returns the nloglik i.e. the loss. 
    
    This loss can then be used in gradient descent and be minimised wrt parameters (of the four component models).
    
    At test time, you will call which of the component models you want to use (as opposed to trying to "call" this 
    FullModel which you won't want to do as its purpose is just to calculate the nloglik for training).
    
    """
    def __init__(self,N_parameter,beta,variational_encoder,variational_decoder,classifier,y_distribution,
                 codings_size,**kwargs):
        super().__init__(**kwargs)
        self.encoder = variational_encoder
        self.decoder = variational_decoder
        self.classifier = classifier  
        self.y_distribution = y_distribution
        self.codings_size = codings_size
        self.N = N_parameter
        self.beta = beta
    def call(self,inputs):
        """Inputs is a list, as such:
            inputs[0] is labelled X 
            inputs[1] is labelled y 
            inputs[2] is unlabelled X"""
        
        X_labelled = inputs[0]
        y_labelled = inputs[1]
        X_unlabelled = inputs[2]
        
        ############### LABELLED CASE #################
        
        codings_mean,codings_log_var,codings = self.encoder([X_labelled,y_labelled])
        y_pred_label = self.classifier(X_labelled)
        reconstructions = self.decoder([codings,y_labelled])

        #LOSSES#
        
        recon_loss = labelled_loss_reconstruction_mmd(codings=codings,x=X_labelled,x_decoded_mean=reconstructions,
                                                      batch_size=32,codings_size=self.codings_size,beta=self.beta)
        cls_loss = labelled_cls_loss(y_labelled,y_pred_label,self.N)
        y_dist_loss = self.y_distribution(y_labelled)[0]
        labelled_loss = recon_loss + cls_loss + y_dist_loss

        ############### UNLABELLED CASE #################
       
        y_pred_unlabel = self.classifier(X_unlabelled)
        codings_mean,codings_log_var,codings = self.encoder([X_unlabelled,y_pred_unlabel])
        reconstructions_un = self.decoder([codings,y_pred_unlabel])
        
        #LOSSES#
        
        unlabelled_recon_loss = unlabelled_loss_reconstruction_mmd(codings=codings,y_pred=y_pred_unlabel,x=X_unlabelled,
                                x_decoded_mean=reconstructions_un,beta=self.beta,
                                    codings_size=self.codings_size,batch_size=32)

        y_dist_loss = self.y_distribution(y_pred_unlabel)[0]
        unlabelled_loss = unlabelled_recon_loss + y_dist_loss
        
        ############### ALL LOSSES #######################
        
        loss = labelled_loss + unlabelled_loss
        return loss    

    

def build_model_mmd(n_hidden=1, n_neurons=723,input_shape=input_shape,beta=1,n_hidden_classifier=1,
              n_neurons_classifier=300,N=30,codings_size=50):
    
    """
    Builds deep generative model.
    
    Parameters specify the architecture. Architecture is such that encoder and decoder have same number of nodes and hidden
    layers. Done for simplicity. Classifier has its own architecture.
    
    Returns encoder,decoder,y_distribution, classifier and overall model. These can be used downstream.
    
    e.g. variational_encoder,variational_decoder,classifier,y_distribution,model = build_model_mmd(n_hidden=1, n_neurons=723,input_shape=input_shape,beta=1,n_hidden_classifier=1,
              n_neurons_classifier=300,N=30,codings_size=50)
    """
       
    ########## ENCODER ###############
    
    x_in = keras.layers.Input(shape=[input_shape])
    y_in = keras.layers.Input(shape=[10])
    z = keras.layers.concatenate([x_in,y_in])
    for layer in range(n_hidden):
        z = keras.layers.Dense(n_neurons,activation="elu",kernel_initializer="he_normal")(z)
        z = keras.layers.Dropout(0.3)(z)

    codings_mean = keras.layers.Dense(codings_size)(z)
    codings_log_var = keras.layers.Dense(codings_size)(z)
    codings = Sampling()([codings_mean, codings_log_var])
    variational_encoder = keras.models.Model(
        inputs=[x_in,y_in], outputs=[codings_mean, codings_log_var, codings])
    
    
    ########## DECODER ###############

    latent = keras.layers.Input(shape=[codings_size])
    l_merged = keras.layers.concatenate([latent,y_in])
    x = l_merged
    for layer in range(n_hidden):
        x = keras.layers.Dense(n_neurons, activation="elu",kernel_initializer="he_normal")(x)
        x = keras.layers.Dropout(0.3)(x)
    x_out = keras.layers.Dense(input_shape,activation="sigmoid")(x) 
    variational_decoder = keras.models.Model(inputs=[latent,y_in], outputs=[x_out])
    
    
    ########### CLASSIFIER ############
    
    y_classifier = x_in
    for layer in range(n_hidden_classifier):
        y_classifier = keras.layers.Dense(n_neurons_classifier, activation="elu",kernel_initializer="he_normal")(y_classifier)
        y_classifier = keras.layers.Dropout(rate=0.3)(y_classifier)
    y_pred = keras.layers.Dense(10,activation="softmax")(y_classifier) 
    classifier = keras.models.Model(inputs=[x_in], outputs=[y_pred])
    
    
    ############ Y DISTRIBUTION #############
    
    loss,p = y_dist()(y_in)
    y_distribution = keras.models.Model(inputs=[y_in],outputs=[loss,p])
    
    
    ########## FULL MODEL #############
    
    model = FullModel_MMD(N_parameter=N,beta=beta,variational_encoder=variational_encoder,
                  variational_decoder=variational_decoder,classifier=classifier,y_distribution=y_distribution,
                     codings_size=codings_size)
    
    return variational_encoder,variational_decoder,classifier,y_distribution,model

### Loss functions ###

In [7]:
def custom_mse(x,x_decoded_mean):
    """returns column of squared errors. Length of column is number of samples."""
    diff = (x-x_decoded_mean)**2
    return K.sum(diff,axis=-1) /2 

def compute_kernel(x, y):
    x_size = tf.shape(x)[0]
    y_size = tf.shape(y)[0]
    dim = tf.shape(x)[1]
    tiled_x = tf.tile(tf.reshape(x, tf.stack([x_size, 1, dim])), tf.stack([1, y_size, 1]))
    tiled_y = tf.tile(tf.reshape(y, tf.stack([1, y_size, dim])), tf.stack([x_size, 1, 1]))
    return tf.exp(-tf.reduce_mean(tf.square(tiled_x - tiled_y), axis=2) / tf.cast(dim, tf.float32))

def compute_mmd(x, y, sigma_sqr=1.0):
    x_kernel = compute_kernel(x, x)
    y_kernel = compute_kernel(y, y)
    xy_kernel = compute_kernel(x, y)
    return tf.reduce_mean(x_kernel) + tf.reduce_mean(y_kernel) - 2 * tf.reduce_mean(xy_kernel)
    #read this for calculations: https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.kernels.RBF.html
    #https://stats.stackexchange.com/questions/239008/rbf-kernel-algorithm-python
    #https://blogs.rstudio.com/ai/posts/2018-10-22-mmd-vae/
    #https://ermongroup.github.io/blog/a-tutorial-on-mmd-variational-autoencoders/

def labelled_loss_reconstruction_mmd(codings,x,x_decoded_mean,batch_size=32,codings_size=50,beta=1):
    recon_loss = custom_mse(x,x_decoded_mean)
        # Compare the generated z with true samples from a standard Gaussian, and compute their MMD distance
    true_samples = tf.random.normal(tf.stack([batch_size, codings_size]))
    loss_mmd = compute_mmd(true_samples, codings)
    return K.mean(recon_loss) + beta*loss_mmd

def unlabelled_loss_reconstruction_mmd(codings,y_pred,x,x_decoded_mean,beta=1,codings_size=50,batch_size=32):
    recon_loss = custom_mse(x,x_decoded_mean)
    true_samples = tf.random.normal(tf.stack([batch_size, codings_size]))
    loss_mmd = compute_mmd(true_samples, codings)    
    entropy = keras.losses.categorical_crossentropy(y_pred,y_pred)
    loss = K.mean(recon_loss) + beta*loss_mmd
    #need to check below. We are summing over y, but we are assuming that the loss term is independent of y
    #which is not the case. Should update this for better model https://github.com/bjlkeng/sandbox/issues/3
    return K.mean(K.sum(y_pred*loss,axis=-1)) - K.mean(entropy) #note the sign    

def dummy_loss(y,ypred):
    """This is a dummy loss that returns a value of zero. It is here as keras requires a loss term for each output.
        The regression_loss_for_labelled_y gives the loss which depends on the log var and mean, so we don't need another
        loss. But keras wants us to give separate losses for each. To keep keras happy, we use the dummy loss as a placeholder."""
    return 0.0

def labelled_cls_loss(y, y_pred,N=383):
    alpha = 0.1*N
    cat_xent_loss = keras.losses.categorical_crossentropy(y, y_pred)
    return alpha*K.mean(cat_xent_loss)

### Training functions ###

In [8]:
@tf.function
def train_step(inputs):
    """Decorated train_step function which applies a gradient update to the parameters"""
    with tf.GradientTape() as tape:
        loss = model(inputs,training=True)
        loss = tf.add_n([loss] + model.losses)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return loss

def fit_model(X_train_la, y_train_la, X_train_un,epochs,X_valid_la, y_valid_la,
              patience,variational_encoder,variational_decoder,
             classifier,y_distribution,model,Sampling=Sampling,y_dist=y_dist,batch_size=32,learning_rate=0.001,codings_size=50,
             valid_set=True):

    """
    Fits a single model. Gets the validation loss too if valid set exists. 
    And includes a version of early stopping, given by the patience.
    Progress bars are shown too.
    Number of epochs are specified by the parameter epochs.
    
    Need to pass in all the custom components. Maybe could put them in a dictionary for cleanliness.
    
    Valid set is True or False depending if you have one. If you don't, the model at the end of training is saved.
    You must still pass in dummy valid sets even if valid_set=False.
    
    Returns list of training loss, and the minimum validation loss. It also saves the best encoder, decoder and
    regressor so they can be used. 
    
    e.g. usage fit_model(X_train_omics_labelled, train_set_labelled_y, X_train_omics_unlabelled,50,X_valid_omics, valid_set_labelled_y,
              10,variational_encoder=variational_encoder,variational_decoder=variational_decoder,
             classifier=classifier,y_distribution=y_distribution,model=model,
          Sampling=Sampling,y_dist=y_dist,batch_size=32,learning_rate=0.001,codings_size=50,valid_set=True)
    """
    if valid_set is True:
    
        start = time.time()
        history = []
        K.clear_session()

        @tf.function
        def train_step(inputs):
            with tf.GradientTape() as tape:
                loss = model(inputs,training=True)
                loss = tf.add_n([loss] + model.losses)
            gradients = tape.gradient(loss, model.trainable_variables)
            optimizer.apply_gradients(zip(gradients, model.trainable_variables))
            return loss

        validation_loss = []
        optimizer=keras.optimizers.Adam(learning_rate=learning_rate)
        batch_loss = []
        batches_per_epoch = int(np.floor((X_train_la.shape[0] + X_train_un.shape[0])/batch_size))

        for epoch in range(epochs):

                print("Epoch {}/{}".format(epoch,epochs))

                for i in range(batches_per_epoch):

                    batch_x_la, batch_y_la, batch_x_un= create_batch(
                        X_train_la, y_train_la, X_train_un,batch_size)

                    inputs = [batch_x_la.to_numpy(),batch_y_la,batch_x_un.to_numpy()]
                    loss = train_step(inputs)
                    batch_loss.append(loss)
                    average_batch_loss = list_average(batch_loss)
                    print_status_bar(i*batch_size,X_train_la.shape[0] + X_train_un.shape[0],average_batch_loss)

                training_loss_for_epoch = list_average(batch_loss)
                batch_loss = []
                history.append(training_loss_for_epoch)
                val_loss = -validation_log_lik_sampling(y_valid_la,X_valid_la.to_numpy(),variational_decoder=variational_decoder,codings_size=codings_size)

                validation_loss.append(val_loss)
                print_status_bar_epoch(X_train_la.shape[0] + X_train_un.shape[0]
                                 ,(X_train_la.shape[0] + X_train_un.shape[0]),training_loss_for_epoch,val_loss )

                #callback for early stopping
                if epoch <= patience - 1:

                    if epoch == 0:

                        variational_encoder.save("variational_encoder.h5")
                        variational_decoder.save("variational_decoder.h5")
                        classifier.save("classifier.h5")
                        y_distribution.save("y_distribution.h5")

                    else:
                        if all(val_loss<i for i in validation_loss[:-1]) is True:
                            variational_encoder.save("variational_encoder.h5")
                            variational_decoder.save("variational_decoder.h5")
                            classifier.save("classifier.h5")
                            y_distribution.save("y_distribution.h5")
                #this statement means at least a model is saved. Because if the best model was before epoch > patience-1,
                #then the statement below won't save any model, which is undesirable as we need to load a model. 

                if epoch > patience - 1:

                    latest_val_loss = validation_loss[-patience:]
                    if all(val_loss<i for i in latest_val_loss[:-2]) is True:
                        variational_encoder.save("variational_encoder.h5")
                        variational_decoder.save("variational_decoder.h5")
                        classifier.save("classifier.h5")
                        y_distribution.save("y_distribution.h5")
                    if all(i>latest_val_loss[0] for i in latest_val_loss[1:]) is True:
                        break     

        #load best model#
        variational_encoder = keras.models.load_model("variational_encoder.h5", custom_objects={
           "Sampling": Sampling
        })
        variational_decoder = keras.models.load_model("variational_decoder.h5")
        classifier = keras.models.load_model("classifier.h5")     
        y_distribution = keras.models.load_model("y_distribution.h5", custom_objects={
           "y_dist": y_dist
        })    

        done = time.time()
        elapsed = done-start
        print("Elapsed/s: ",elapsed)
        print("Final training loss: ",training_loss_for_epoch)
        print("best val loss: ", min(validation_loss))
        
        return history, min(validation_loss)

        
    else:
        
        start = time.time()
        history = []
        K.clear_session()

        @tf.function
        def train_step(inputs):
            with tf.GradientTape() as tape:
                loss = model(inputs,training=True)
                loss = tf.add_n([loss] + model.losses)
            gradients = tape.gradient(loss, model.trainable_variables)
            optimizer.apply_gradients(zip(gradients, model.trainable_variables))
            return loss

        optimizer=keras.optimizers.Adam(learning_rate=learning_rate)
        batch_loss = []
        batches_per_epoch = int(np.floor((X_train_la.shape[0] + X_train_un.shape[0])/batch_size))        
        val_loss = 0

        for epoch in range(epochs):

                print("Epoch {}/{}".format(epoch,epochs))
                for i in range(batches_per_epoch):

                    batch_x_la, batch_y_la, batch_x_un= create_batch(
                        X_train_la, y_train_la, X_train_un,batch_size)

                    inputs = [batch_x_la.to_numpy(),batch_y_la,batch_x_un.to_numpy()]
                    loss = train_step(inputs)
                    batch_loss.append(loss)
                    average_batch_loss = list_average(batch_loss)
                    print_status_bar(i*batch_size,X_train_la.shape[0] + X_train_un.shape[0],average_batch_loss)

                training_loss_for_epoch = list_average(batch_loss)
                batch_loss = []
                history.append(training_loss_for_epoch)
                print_status_bar_epoch(X_train_la.shape[0] + X_train_un.shape[0]
                                 ,(X_train_la.shape[0] + X_train_un.shape[0]),training_loss_for_epoch,val_loss )
        

        variational_encoder.save("variational_encoder.h5")
        variational_decoder.save("variational_decoder.h5")
        classifier.save("classifier.h5")
        y_distribution.save("y_distribution.h5")
        
        #load best model#
        variational_encoder = keras.models.load_model("variational_encoder.h5", custom_objects={
           "Sampling": Sampling
        })
        variational_decoder = keras.models.load_model("variational_decoder.h5")
        classifier = keras.models.load_model("classifier.h5")     
        y_distribution = keras.models.load_model("y_distribution.h5", custom_objects={
           "y_dist": y_dist
        })    

        done = time.time()
        elapsed = done-start
        print("Elapsed/s: ",elapsed)
        print("Final training loss: ",training_loss_for_epoch)
        
    
        return history


def fit_model_search(X_train_la, y_train_la, X_train_un,epochs,X_valid_la, y_valid_la,
              patience,variational_encoder,variational_decoder,
             classifier,y_distribution,model,Sampling=Sampling,y_dist=y_dist,batch_size=32,learning_rate=0.001,
                    codings_size=50):

    """
    Use for hyperparameter search. 
    
    Fits the model. Gets the validation loss too. And includes a version of early stopping, given by the patience.
    Progress bars are shown too.
    Number of epochs are specified by the parameter epochs.
    
    Need to pass in all the custom components. Maybe could put them in a dictionary for cleanliness.
    
    Returns list of training loss, and the minimum validation loss. It also saves the best encoder, decoder and
    regressor so they can be used. 
    
    """
    
    start = time.time()
    history = []   
       
    @tf.function
    def train_step(inputs):
        with tf.GradientTape() as tape:
            loss = model(inputs,training=True)
            loss = tf.add_n([loss] + model.losses)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        return loss
    
    validation_loss = []
    optimizer=keras.optimizers.Adam(learning_rate=learning_rate)
    batch_loss = []    
    batches_per_epoch = int(np.floor((X_train_la.shape[0] + X_train_un.shape[0])/batch_size))

    for epoch in range(epochs):
            
            print("Epoch {}/{}".format(epoch,epochs))
            
            for i in range(batches_per_epoch):
                
                batch_x_la, batch_y_la, batch_x_un= create_batch(
                    X_train_la, y_train_la, X_train_un,batch_size)

                inputs = [batch_x_la.to_numpy(),batch_y_la,batch_x_un.to_numpy()]
                loss = train_step(inputs)
                batch_loss.append(loss)                
                average_batch_loss = list_average(batch_loss)                
                print_status_bar(i*batch_size,X_train_la.shape[0] + X_train_un.shape[0],average_batch_loss)
            
            training_loss_for_epoch = list_average(batch_loss)
            batch_loss = []                
            history.append(training_loss_for_epoch)            
            val_loss = -validation_log_lik_sampling(y_valid_la,X_valid_la.to_numpy(),variational_decoder=variational_decoder,codings_size=codings_size)
            
            validation_loss.append(val_loss)            
            print_status_bar_epoch(X_train_la.shape[0] + X_train_un.shape[0]
                             ,(X_train_la.shape[0] + X_train_un.shape[0]),training_loss_for_epoch,val_loss )
            
            #callback for early stopping
            
            if epoch <= patience - 1:
                
                if epoch == 0:
                
                    variational_encoder.save("variational_encoder_intermediate.h5")
                    variational_decoder.save("variational_decoder_intermediate.h5")
                    classifier.save("classifier_intermediate.h5")
                    y_distribution.save("y_distribution_intermediate.h5")
                    
                else:
                    if all(val_loss<i for i in validation_loss[:-1]) is True:
                        variational_encoder.save("variational_encoder_intermediate.h5")
                        variational_decoder.save("variational_decoder_intermediate.h5")
                        classifier.save("classifier_intermediate.h5")
                        y_distribution.save("y_distribution_intermediate.h5")
            #this statement means at least a model is saved. Because if the best model was before epoch > patience-1,
            #then the statement below won't save any model, which is undesirable as we need to load a model. 
            
            if epoch > patience - 1:
                                
                latest_val_loss = validation_loss[-patience:]
                if all(val_loss<i for i in latest_val_loss[:-1]) is True:
                    variational_encoder.save("variational_encoder_intermediate.h5")
                    variational_decoder.save("variational_decoder_intermediate.h5")
                    classifier.save("classifier_intermediate.h5")
                    y_distribution.save("y_distribution_intermediate.h5")
                if all(i>latest_val_loss[0] for i in latest_val_loss[1:]) is True:
                    break     
    
    #load best model#
    variational_encoder = keras.models.load_model("variational_encoder_intermediate.h5", custom_objects={
       "Sampling": Sampling
    })
    variational_decoder = keras.models.load_model("variational_decoder_intermediate.h5")
    classifier = keras.models.load_model("classifier_intermediate.h5")     
    y_distribution = keras.models.load_model("y_distribution_intermediate.h5", custom_objects={
       "y_dist": y_dist
    })    
                
    done = time.time()
    elapsed = done-start
    print("Elapsed/s: ",elapsed)
    print("Final training loss: ",training_loss_for_epoch)
    print("best val loss: ", min(validation_loss))
    
    return history, min(validation_loss)

def hyperparameter_search_mmd(param_distribs,epochs,patience,n_iter,X_train_la=X_train_omics_labelled, 
                          y_train_la=train_set_labelled_y, X_train_un=X_train_omics_unlabelled,
                          X_valid_la=X_valid_omics, y_valid_la=valid_set_labelled_y):
    
    """
    Performs hyperparameter, random search. Assesses performance by determining the score on the validation set. 
    
    Saves best models (encoder, decoder and regressor) and returns these. These can then be used downstream.
    
    Also returns dictionary of the search results.
    
    Param_distribs of the form: 
            param_distribs = {
            "n_hidden": [1],
            "n_hidden_classifier": [1],
            "beta": [1],
            "n_neurons": randint.rvs(50,1000-49,size=20,random_state=random_state).tolist(),
           "n_neurons_classifier": randint.rvs(49,1000-49,size=20,random_state=random_state).tolist(),
            "codings_size": randint.rvs(50,290-50,size=30,random_state=random_state).tolist(),
            "N" :randint.rvs().tolist(),
            "learning_rate" : ....
            #"codings_size": [50]}
            
    There must be a value for every parameter. If you know the value you want to use, set it in the param_distribs
    dictionary.
    
    Patience must be less than the number of epochs.
    
    e.g. result,variational_encoder,variational_decoder,classifier,y_distribution =
            hyperparameter_search_mmd(param_distribs,500,10,n_iter=10)

            
    """

    np.random.seed(42) #needs to be here so that everything that follows is consistent

    min_val_loss = []
    master = {}

    for i in range(n_iter): 
        K.clear_session()
        master[i] = {}
        master[i]["parameters"] = {}
        
        N= np.random.choice(param_distribs["N"])
        learning_rate= np.random.choice(param_distribs["learning_rate"])
        beta= np.random.choice(param_distribs["beta"])
        n_neurons =np.random.choice(param_distribs["n_neurons"]) 
        n_neurons_classifier =np.random.choice(param_distribs["n_neurons_classifier"]) 
        n_hidden  =np.random.choice(param_distribs["n_hidden"]) 
        n_hidden_classifier  =np.random.choice(param_distribs["n_hidden_classifier"]) 
        codings_size =np.random.choice(param_distribs["codings_size"]) 
       
        master[i]["parameters"]["N"] = N
        master[i]["parameters"]["learning_rate"] = learning_rate
        master[i]["parameters"]["beta"] = beta
        master[i]["parameters"]["n_neurons"] = n_neurons
        master[i]["parameters"]["n_neurons_classifier"] = n_neurons_classifier
        master[i]["parameters"]["n_hidden"] = n_hidden
        master[i]["parameters"]["n_hidden_classifier"] = n_hidden_classifier
        master[i]["parameters"]["codings_size"] = codings_size

        
        variational_encoder,variational_decoder,classifier,y_distribution,model = build_model_mmd(n_hidden=n_hidden,       
                                       n_neurons=n_neurons,beta=beta,n_hidden_classifier=n_hidden_classifier,
                                        n_neurons_classifier=n_neurons_classifier,N=N,codings_size=codings_size)
        
                
        history,val_loss = fit_model_search(X_train_la=X_train_la, y_train_la=y_train_la, 
                                 X_train_un=X_train_un, epochs=epochs,X_valid_la=X_valid_la, 
                                 y_valid_la=y_valid_la,patience=patience,variational_encoder=variational_encoder,
                                variational_decoder=variational_decoder, classifier=classifier,
                                y_distribution=y_distribution,model=model,Sampling=Sampling,y_dist=y_dist,
                                            batch_size=32,learning_rate=learning_rate,codings_size=codings_size)        

        master[i]["val_loss"] = val_loss
        min_val_loss.append(val_loss)

        #If val loss is lowest, save this model. 
        if val_loss <=  min(min_val_loss):
            os.rename("variational_encoder_intermediate.h5","variational_encoder.h5")
            os.rename("variational_decoder_intermediate.h5","variational_decoder.h5")
            os.rename("classifier_intermediate.h5","classifier.h5")
            os.rename("y_distribution_intermediate.h5","y_distribution.h5")

        print(master)
            
    #load best model#
    variational_encoder = keras.models.load_model("variational_encoder.h5", custom_objects={
       "Sampling": Sampling
    })
    variational_decoder = keras.models.load_model("variational_decoder.h5")
    classifier = keras.models.load_model("classifier.h5")     
    y_distribution = keras.models.load_model("y_distribution.h5", custom_objects={
       "y_dist": y_dist
    })    

    result = sorted(master.items(), key=lambda item: item[1]["val_loss"])
    return result,variational_encoder,variational_decoder,classifier,y_distribution


# Hyperparameter Search #

In [12]:
param_distribs = {
            "n_hidden": [1,2],
            "n_hidden_classifier": [1,2],
            "beta": [1,10,15],
    #"n_neurons": [300,500],
   # "n_neurons_classifier": [50,100,150],
            "n_neurons": randint.rvs(50,600-49,size=20,random_state=random_state).tolist(),
           "n_neurons_classifier": randint.rvs(20,120-20,size=20,random_state=random_state).tolist(),
            "codings_size": randint.rvs(20,290-20,size=30,random_state=random_state).tolist(),
   # "codings_size": [20,50,70],
            "N" :[0.1,1,10,50,100],
            "learning_rate" : [0.001,0.0005],
            #"codings_size": [120,60]
}

In [13]:
result,variational_encoder,variational_decoder,classifier,y_distribution = hyperparameter_search_mmd(param_distribs=param_distribs,
                                                                        epochs=100,patience=10,n_iter=10)
result

Epoch 0/100


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch

Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Elapsed/s:  105.71690797805786
Final training loss:  tf.Tensor(9.720639, shape=(), dtype=float32)
best val loss:  tf.Tensor(16.69674, shape=(), dtype=float32)
{0: {'parameters': {'n_neurons_classifier': 43, 'n_hidden_classifier': 1, 'learning_rate': 0.001, 'codings_size': 208, 'beta': 15, 'N': 50.0, 'n_neurons': 516, 'n_hidden': 1}, 'val_loss': <tf.Tensor: shape=(), dtype=float32, numpy=18.3234>}, 1: {'parameters': {'n_neuro

Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Elapsed/s:  57.837257862091064
Final training loss:  tf.Tensor(19.127752, shape=(), dtype=float32)
best val loss:  tf.Tensor(13.668679, shape=(), dtype=float32)
{0: {'parameters': {'n_neurons_classifier': 43, 'n_hidden_classifier': 1, 'learning_rate': 0.001, 'codings_size': 208, 'beta': 15, 'N': 50.0, 'n_neurons': 516, 'n_hidden': 1}, 'val_loss': <tf.Tensor: shape=(), dtype=float32, numpy=18.3234>}, 1: {'parameters': {'n_neurons_classifier': 49, 'n_hidden_classifier': 2, 'learning_rate': 0.0005, 'codings_size': 149, 'beta': 1, 'N': 50.0, '

Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Elapsed/s:  87.90529346466064
Final training loss:  tf.Tensor(4.6907015, shape=(), dtype=float32)
best val loss:  tf.Tensor(12.23399, shape=(), dtype=float32)
{0: {'parameters': {'n_neurons_classifier': 43, 'n_hidden_classifier': 1, 'learning_rate': 0.001, 'codings_size': 208, 'beta': 15, 'N': 50.0, 'n_neurons

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Elapsed/s:  97.48097538948059
Final training loss:  tf.Tensor(17.275396, shape=(), dtype=float32)
best val loss:  tf.Tensor(12.280304, shape=(), dtype=float32)
{0: {'parameters': {'n_neurons_classifier': 43, 'n_hidden_classifier': 1, 'learning_rate': 0.001, 'codings_size': 208, 'beta': 15, 'N': 50.0, 'n_neurons': 516, 'n_hidden': 1}, 'val_loss': <tf.Tensor: shape=(), dtype=float32, numpy=18.3234>}, 1: {'parameters': {'n_neurons_classifier': 49, 'n_hidden_classifier': 2, 'learning_rate': 0.0005, 'codings_size': 149, 'beta': 1, 'N': 50.0, 'n_neurons': 238, 'n_hidden': 1}, 'val_loss': <tf.Tensor: shape=(), dtype=float32, numpy=16.69674>}, 2: {'parameters': {'n_neurons_classifier': 94, 'n_hidden_classifier': 1, 'learning_rate': 0.0005, 'codings_size': 107, 'beta': 15, 'N': 100.0, 'n_neurons': 516, 'n_hidden': 2}, 'val_loss': <tf.Tensor: shape=(), dtype=float32, numpy=13.668679>}, 3: {'parameters': {'n_neurons_classifier': 52, 'n_hidden_classifier': 1, 'learning_rate': 0.001, 'codings_size'

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Elapsed/s:  103.80203437805176
Final training loss:  tf.Tensor(4.721092, shape=(), dtype=float32)
best val loss:  tf.Tensor(12.021252, shape=(), dtype=float32)
{0: {'parameters': {'n_neurons_classifier': 43, 'n_hidden_classifier': 1, 'learning_rate': 0.001, 'codings_size': 208, 'beta': 15, 'N': 50.0, 'n_neurons': 516, 'n_hidden': 1}, 'val_loss': <tf.Tensor: shape=(), dtype=float32, numpy=18.3234>}, 1: {'parameters': {'n_neurons_classifier': 49, 'n_hidden_classifier': 2, 'learning_rate': 0.0005, 'codings_size': 149, 'beta': 1, 'N': 50.0, 'n_neurons': 238, 'n_hidden': 1}, 'val_loss': <tf.Tensor: shape=(), dtype=float32, numpy=16.69674>}, 2: {'parameters': {'n_neurons_classifier': 94, 'n_hidden_classifier': 1, 'learning_rate': 0.0005, 'codings_size': 107, 'beta': 15, 'N': 100.0, 'n_neurons': 516, 'n_hidden': 2}, 'val_loss': <tf.Tensor: shape=(), dtype=float32, numpy=13.668679>}, 3: {'parameters': {'n_neurons_classifier': 52, 'n_hidden_classifier': 1, 'learning_rate': 0.001, 'codings_size'

Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Elapsed/s:  32.17266249656677
Final training loss:  tf.Tensor(7.5872684, shape=(), dtype=float32)
best val loss:  tf.Tensor(12.16589, shape=(), dtype=float32)
{0: {'parameters': {'n_neurons_classifier': 43, 'n_hidden_classifier': 1, 'learning_rate': 0.001, 'codings_size': 208, 'beta': 15, 'N': 50.0, 'n_neurons': 516, 'n_hidden': 1}, 'val_loss': <tf.Tensor: shape=(), dtype=float32, numpy=18.3234>}, 1: {'parameters': {'n_neurons_classifier': 49, 'n_hidden_classifier': 2, 'learning_rate': 0.0005, 'codings_size': 149, 'beta': 1, 'N': 50.0, 'n_neurons': 238, 'n_hidden': 1}, 'val_loss': <tf.Tensor: shape=(), dtype=float32, numpy=16.69674>}, 2: {'parameters': {'n_neurons_classifie

[(7,
  {'parameters': {'N': 1.0,
    'beta': 10,
    'codings_size': 255,
    'learning_rate': 0.001,
    'n_hidden': 2,
    'n_hidden_classifier': 2,
    'n_neurons': 137,
    'n_neurons_classifier': 79},
   'val_loss': <tf.Tensor: shape=(), dtype=float32, numpy=12.021252>}),
 (9,
  {'parameters': {'N': 10.0,
    'beta': 10,
    'codings_size': 21,
    'learning_rate': 0.0005,
    'n_hidden': 2,
    'n_hidden_classifier': 1,
    'n_neurons': 70,
    'n_neurons_classifier': 21},
   'val_loss': <tf.Tensor: shape=(), dtype=float32, numpy=12.16589>}),
 (8,
  {'parameters': {'N': 10.0,
    'beta': 10,
    'codings_size': 123,
    'learning_rate': 0.0005,
    'n_hidden': 2,
    'n_hidden_classifier': 2,
    'n_neurons': 156,
    'n_neurons_classifier': 41},
   'val_loss': <tf.Tensor: shape=(), dtype=float32, numpy=12.208234>}),
 (4,
  {'parameters': {'N': 1.0,
    'beta': 10,
    'codings_size': 91,
    'learning_rate': 0.0005,
    'n_hidden': 2,
    'n_hidden_classifier': 2,
    'n_neurons

Best MMD-based model has val nloglik of 11.8473425. 61 epochs of training.

'parameters': {'N': 0.1,
    'beta': 1,
    'codings_size': 119,
    'learning_rate': 0.001,
    'n_hidden': 2,
    'n_hidden_classifier': 2,
    'n_neurons': 137,
    'n_neurons_classifier': 80},
   'val_loss': <tf.Tensor: shape=(), dtype=float32, numpy=11.922641>}

# Single run # 

In [9]:
variational_encoder,variational_decoder,classifier,y_distribution,model = build_model_mmd(n_hidden=2, n_neurons=137,input_shape=input_shape,beta=1,n_hidden_classifier=2,
              n_neurons_classifier=137,N=0.1,codings_size=119)

In [10]:
fit_model(X_train_omics_labelled, train_set_labelled_y, X_train_omics_unlabelled,100,X_valid_omics, valid_set_labelled_y,
              10,variational_encoder=variational_encoder,variational_decoder=variational_decoder,
             classifier=classifier,y_distribution=y_distribution,model=model,
          Sampling=Sampling,y_dist=y_dist,batch_size=32,learning_rate=0.001,valid_set=True,codings_size=119)

Epoch 0/100


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch

([<tf.Tensor: shape=(), dtype=float32, numpy=17.419695>,
  <tf.Tensor: shape=(), dtype=float32, numpy=8.539524>,
  <tf.Tensor: shape=(), dtype=float32, numpy=7.3899345>,
  <tf.Tensor: shape=(), dtype=float32, numpy=6.9031367>,
  <tf.Tensor: shape=(), dtype=float32, numpy=6.602461>,
  <tf.Tensor: shape=(), dtype=float32, numpy=6.2700095>,
  <tf.Tensor: shape=(), dtype=float32, numpy=6.0789304>,
  <tf.Tensor: shape=(), dtype=float32, numpy=5.9635687>,
  <tf.Tensor: shape=(), dtype=float32, numpy=5.7620687>,
  <tf.Tensor: shape=(), dtype=float32, numpy=5.6136518>,
  <tf.Tensor: shape=(), dtype=float32, numpy=5.445345>,
  <tf.Tensor: shape=(), dtype=float32, numpy=5.4114447>,
  <tf.Tensor: shape=(), dtype=float32, numpy=5.3443465>,
  <tf.Tensor: shape=(), dtype=float32, numpy=5.132705>,
  <tf.Tensor: shape=(), dtype=float32, numpy=5.2192755>,
  <tf.Tensor: shape=(), dtype=float32, numpy=5.12508>,
  <tf.Tensor: shape=(), dtype=float32, numpy=5.0868664>,
  <tf.Tensor: shape=(), dtype=float32

In [11]:
test_nlog_lik = -validation_log_lik_sampling(test_set_labelled_y,X_test_omics.to_numpy(),
                                    variational_decoder=variational_decoder,codings_size=119,samples=2000)
print("test_nlog_lik = " + str(test_nlog_lik))

test_nlog_lik = tf.Tensor(12.35483, shape=(), dtype=float32)


This is the best score and therefore the model that will be used.