# Hyperparameter optimization
This was done using Ray Tune with the ASHA Scheduler using a simple grid search approach. <br>
We used a random 20% subset of our full data for the hyperparameter optimiaztion due to time constraints.

In [2]:
from IPython.display import Image, display, clear_output
import os
import torch
import math
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats as stats
import warnings
import time
import sklearn
import ray

from typing import *
from collections import defaultdict
from warnings import simplefilter

from ray import tune, air
from ray.air import session
from ray.tune.schedulers import ASHAScheduler
from ray.air.checkpoint import Checkpoint

from sklearn.preprocessing import quantile_transform

from torch import nn, Tensor
from torch.nn.functional import softplus
from torch.distributions import Distribution
from torch.distributions.bernoulli import Bernoulli
from torch.distributions.binomial import Binomial
from torch.distributions.log_normal import LogNormal
from torch.utils.data import random_split, DataLoader, Subset

In [3]:
# Initalize ray tune to run on GPU
ray.init(num_gpus = 1)

2022-12-21 14:06:16,783	INFO worker.py:1519 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


0,1
Python version:,3.8.13
Ray version:,2.1.0
Dashboard:,http://127.0.0.1:8265


### Function for plotting during training

In [4]:
def plotting(training_data, validation_data, x, x_hat, tmp_img="tmp_vae_out.png"):
    
    fig, axes = plt.subplot_mosaic([['top_left', 'top_centre', 'top_right'],
                                    ['mid', 'mid', 'mid'],
                                    ['bottom', 'bottom', 'bottom']])
    
    fig.set_size_inches(15, 10)
    
    # plot ELBO
    axes['top_left'].set_title(r'ELBO: $\mathcal{L} ( \mathbf{x} )$')
    axes['top_left'].plot(training_data['elbo'], label='Training')
    axes['top_left'].plot(validation_data['elbo'], label='Validation')
    axes['top_left'].legend()
    
    # plot KL
    axes['top_centre'].set_title(r'$\mathcal{D}_{\operatorname{KL}}\left(q_\phi(\mathbf{z}|\mathbf{x})\ |\ p(\mathbf{z})\right)$')
    axes['top_centre'].plot(training_data['kl'], label='Training')
    axes['top_centre'].plot(validation_data['kl'], label='Validation')
    axes['top_centre'].legend()
    
    
    # plot NLL
    axes['top_right'].set_title(r'$\log p_\theta(\mathbf{x} | \mathbf{z})$')
    axes['top_right'].plot(training_data['log_px'], label='Training')
    axes['top_right'].plot(validation_data['log_px'], label='Validation')
    axes['top_right'].legend()
    
    # plot sample
    axes['mid'].set_title(r'Observation $\mathbf{x}$')
    axes['mid'].imshow(x, cmap="plasma", aspect="auto")
    a = axes['mid'].imshow(x, cmap="plasma", aspect="auto")
    plt.colorbar(a, ax = axes['mid'], location='right')
    
    # plot reconstruction
    axes['bottom'].set_title(r'Reconstruction $\mathbf{\hat x \sim p(x|z)}$')
    axes['bottom'].imshow(x_hat, cmap="plasma", aspect="auto")
    b = axes['bottom'].imshow(x_hat, cmap="plasma", aspect="auto")
    plt.colorbar(b, ax = axes['bottom'], location='right')
    
    plt.tight_layout()
    plt.savefig(tmp_img)
    plt.close(fig)
    display(Image(filename=tmp_img))
    clear_output(wait=True)
    
    os.remove(tmp_img)

# Dataloader function

In [5]:
def data_loaders(DATA_PATH=os.getcwd() + "/Data/archs4_data_transformed.npy", batch_size=32, num_workers=4, pin_memory=True):
    # print("LOADING DATA ...")
    start = time.time()
    data = np.load(DATA_PATH)
    end = time.time()

    assert np.isnan(np.sum(data)) == False
    
    inputs = torch.from_numpy(data)
    inputs_train, inputs_test = random_split(inputs, [0.8,0.2])
    
    # Training data:
    train_loader = DataLoader(dataset = inputs_train,
                          batch_size = batch_size,
                          shuffle = True,
                          num_workers=num_workers, 
                          pin_memory=pin_memory)

    # Testing data:
    test_loader = DataLoader(dataset = inputs_test,
                         batch_size = batch_size,
                         shuffle = True,
                         num_workers=num_workers, 
                         pin_memory=pin_memory)

    return train_loader, test_loader



## Defining custom distributions:

#### After log2(x+1) and quantile transformation, our data follows a hurdle normal distribution which is implemented below:

In [6]:
class HurdleNormal(Distribution):
    """
    A hurdle model of the Bernoulli and normal distributions to model data which is normally distributed with an excess probability mass at 0
    Hurdle Normal PDF:
    p(x | mu, sigma, p) = p for x=0
    p(x | mu, sigma, p) = (1-p) * normal distribution pdf
    This is to be used as observation model p(x|z): p(x | z, mu, sigma, p)
    """
    def __init__(self, mu:Tensor, log_sigma:Tensor, p_logits:Tensor):
        assert mu.shape == log_sigma.shape == p_logits.shape, f"Tensors 'mu': {mu.shape}, 'log_sigma': {log_sigma.shape} and 'p': {p_logits.shape} must be of the same shape."
        self.mu = mu
        self.sigma = log_sigma.exp() + 0.01 # to avoid sigma being zero
        self.p_logits = p_logits

        self.Bernoulli = torch.distributions.bernoulli.Bernoulli(logits = self.p_logits)
        
    def sample(self) -> Tensor:
        """sample `x ~ hurdle_normal(x | mu, sigma, p)`"""
        
        with torch.no_grad():
            # Bernoulli returns either 0 or 1, and for 1 we want the value from the sampled log_normal distribution so we just multiply them
            B = self.Bernoulli.sample() 
            norm = torch.distributions.normal.Normal(self.mu, self.sigma).sample()
            return B*norm
            
    def log_prob(self, x:Tensor) -> Tensor:
        """
        return log probability: log p(x)
        for x=0:
            p(x|mu, sigma, p) = p 
        for x > 0
            p(x|mu, sigma, p) = (1-p) * 1 / (sigma * sqrt(2*pi)) * exp(-0.5 * ((x - mu) / sigma)) 
        """
        
        idx = x > 0.
        
        # Log prob of zero values = log p
        # Using torch.zeros as for the places where x is not 0 we want (1-p)
        bernoulli_tensor = torch.zeros_like(x)
        bernoulli_tensor[idx] = 1.
        
        log_prob_bernoulli = self.Bernoulli.log_prob(bernoulli_tensor)
        
        log_prob_normal = torch.distributions.normal.Normal(loc = self.mu[idx], scale = self.sigma[idx]).log_prob(x[idx])
        log_prob_normal = torch.zeros_like(log_prob_bernoulli).masked_scatter(idx, log_prob_normal)
        
        # FINAL LOG PROB
        log_p = log_prob_bernoulli + log_prob_normal # Log prob will be log(p) when x=0 and log(p-1) + logprob_normal when x>0

        return log_p

#### The reparameterization trick to be able to backpropagate despite random sampling is used as implemented in the course exercise.

In [7]:
class ReparameterizedDiagonalGaussian(Distribution):
    """
    A distribution `N(y | mu, sigma I)` compatible with the reparameterization trick given `epsilon ~ N(0, 1)`.
    """
    def __init__(self, mu: Tensor, log_sigma:Tensor):
        assert mu.shape == log_sigma.shape, f"Tensors `mu` : {mu.shape} and ` log_sigma` : {log_sigma.shape} must be of the same shape"
        self.mu = mu
        self.sigma = log_sigma.exp() + 0.001
        
    def sample_epsilon(self) -> Tensor:
        """`\eps ~ N(0, I)`"""
        return torch.empty_like(self.mu).normal_()
        
    def sample(self) -> Tensor:
        """sample `z ~ N(z | mu, sigma)` (without gradients)"""
        with torch.no_grad():
            return self.rsample()
        
    def rsample(self) -> Tensor:
        """sample `z ~ N(z | mu, sigma)` (with the reparameterization trick) """
        return self.mu + self.sigma * self.sample_epsilon() 
        
    def log_prob(self, z:Tensor) -> Tensor:
        """return the log probability: log `p(z)`"""
        return torch.distributions.normal.Normal(self.mu, self.sigma).log_prob(z)

    def __repr__(self):
        return f"ReparameterizedDiagonalGaussian(mu={self.mu.shape}, sigma={self.sigma.shape})"

## Defining the Variational Autoencoder network
##### Based on the original code from the course exercise, but modified to be able to control number of layers, dimensions of layers and use the hurdle-normal distribution as observation model.

In [8]:
class VariationalAutoencoder(nn.Module):
    """A Variational Autoencoder with
    * a Bernoulli observation model `p_\theta(x | z) = B(x | g_\theta(z))`
    * a Gaussian prior `p(z) = N(z | 0, I)`
    * a Gaussian posterior `q_\phi(z|x) = N(z | \mu(x), \sigma(x))`
    """
    
    def __init__(self, input_shape:torch.Size, latent_features:int, encoder_layer_sizes:list, decoder_layer_sizes:list) -> None:
        super(VariationalAutoencoder, self).__init__()
        
        # ------------------------------------------------------------------------------
        # Defining parameters of model
        self.input_shape = input_shape
        self.latent_features = latent_features
        self.observation_features = np.prod(input_shape)
        
        self.encoder_layer_sizes = encoder_layer_sizes
        self.n_encoder_layers = len(encoder_layer_sizes)
        
        self.decoder_layer_sizes = decoder_layer_sizes
        self.n_decoder_layers = len(decoder_layer_sizes)
        
        
        # ------------------------------------------------------------------------------
        # Defining flexible encoder
        encoder_layers = [nn.Linear(self.observation_features, self.encoder_layer_sizes[0]), nn.BatchNorm1d(self.encoder_layer_sizes[0])] # input layer
        encoder_layers.append(nn.LeakyReLU()) # Make it non-linear
        for i in range(self.n_encoder_layers-1):
            encoder_layers.append(nn.Linear(self.encoder_layer_sizes[i], self.encoder_layer_sizes[i+1]))
            encoder_layers.append(nn.BatchNorm1d(self.encoder_layer_sizes[i+1]))
            encoder_layers.append(nn.LeakyReLU())
        encoder_layers.append(nn.Linear(self.encoder_layer_sizes[-1], self.latent_features*2)) # output layer
        
        # Inference Network
        # Encode the observation `x` into the parameters of the posterior distribution
        # `q_\phi(z|x) = N(z | \mu(x), \sigma(x)), \mu(x),\log\sigma(x) = h_\phi(x)`
        self.encoder = nn.Sequential(*encoder_layers)
        
        # ------------------------------------------------------------------------------
        # Defining flexible decoder
        decoder_layers = [nn.Linear(self.latent_features, self.decoder_layer_sizes[-1]), nn.BatchNorm1d(self.decoder_layer_sizes[-1])] # input layer
        decoder_layers.append(nn.LeakyReLU())
        for i in range(self.n_decoder_layers-1, 0, -1):
            decoder_layers.append(nn.Linear(self.decoder_layer_sizes[i], self.decoder_layer_sizes[i-1]))
            decoder_layers.append(nn.BatchNorm1d(self.decoder_layer_sizes[i-1]))
            decoder_layers.append(nn.LeakyReLU())
        decoder_layers.append(nn.Linear(self.decoder_layer_sizes[0], self.observation_features*3)) # output layer
        
        # Generative Model
        # Decode the latent sample `z` into the parameters of the observation model
        # `p_\theta(x | z) = \prod_i B(x_i | g_\theta(x))`
        self.decoder = nn.Sequential(*decoder_layers)
        
        # ------------------------------------------------------------------------------
        # define the parameters of the prior, chosen as p(z) = N(0, I)
        self.register_buffer('prior_params', torch.zeros(torch.Size([1, 2*latent_features]))) # defines a model state variable, which is not learnable
        
    # ------------------------------------------------------------------------------
    # Distributions
    # ------------------------------------------------------------------------------
    def posterior(self, x:Tensor) -> Distribution:
        """return the distribution `q(z|x) = N(z | \mu(x), \sigma(x))`"""
        h_x = self.encoder(x) # compute the parameters of the posterior
        mu, log_sigma =  h_x.chunk(2, dim=-1) # splits h_x (which contains both both mean and std values) into 2 chunks along dim=-1
        
        # return a distribution `q(x|x) = N(z | \mu(x), \sigma(x))`
        return ReparameterizedDiagonalGaussian(mu, log_sigma)
    
    def prior(self, batch_size:int=1)-> Distribution:
        """return the distribution `p(z)`"""
        prior_params = self.prior_params.expand(batch_size, *self.prior_params.shape[-1:]) # The * unpacks the tuple that -shape returns so that we return each element in the tuple instead (opposite of zip operation)
        mu, log_sigma = prior_params.chunk(2, dim=-1)
        
        return ReparameterizedDiagonalGaussian(mu, log_sigma)
    
    def observation_model(self, z:Tensor) -> Distribution:
        """return the distribution `p(x|z)`"""
        px_params = self.decoder(z)
        px_mu, log_px_sigma, px_logit = px_params.chunk(3, dim=-1)
        
        return HurdleNormal(px_mu, log_px_sigma, px_logit)
        
    # ------------------------------------------------------------------------------
    def forward(self, x) -> Dict[str, Any]:
        """compute the posterior q(z|x) (encoder), sample z~q(z|x) and return the distribution p(x|z) (decoder)"""
        
        # define the posterior q(z|x) / encode x into q(z|x)
        qz = self.posterior(x)
        
        # define the prior p(z)
        pz = self.prior(batch_size=x.size(0))
        
        # sample the posterior using the reparameterization trick: z ~ q(z | x)
        z = qz.rsample()
        
        # define the observation model p(x|z) = B(x | g(z))
        px = self.observation_model(z)
        
        return {'px': px, 'pz': pz, 'qz': qz, 'z': z}
    
    
    def sample_from_prior(self, batch_size:int=100):
        """sample z~p(z) and return p(x|z)"""
        
        # define the prior p(z)
        pz = self.prior(batch_size=batch_size)
        
        # sample the prior 
        z = pz.rsample()
        
        # define the observation model p(x|z) = B(x | g(z))
        px = self.observation_model(z)
        
        return {'px': px, 'pz': pz, 'z': z}

## Variational inference
##### Code unchanged from course exercise

In [9]:
def reduce(x:Tensor) -> Tensor:
    """for each datapoint: sum over all dimensions"""
    return x.view(x.size(0), -1).sum(dim=1)

class VariationalInference(nn.Module):
    def __init__(self, beta:float=1.):
        super().__init__()
        self.beta = beta
        
    def forward(self, model:nn.Module, x:Tensor) -> Tuple[Tensor, Dict]:
        
        # forward pass through the model
        outputs = model(x)

        # unpack outputs
        px, pz, qz, z = [outputs[k] for k in ["px", "pz", "qz", "z"]]
        
        # evaluate log probabilities
        log_px = reduce(px.log_prob(x)) # Probability of seeing input data "x" under hurdle normal distribution "px"
        log_pz = reduce(pz.log_prob(z)) # Prior distribution
        log_qz = reduce(qz.log_prob(z)) # Posterior distribution

        # compute the ELBO with and without the beta parameter: 
        # `L^\beta = E_q [ log p(x|z) ] - \beta * D_KL(q(z|x) | p(z))`
        # where `D_KL(q(z|x) | p(z)) = log q(z|x) - log p(z)`
        kl = log_qz - log_pz

        elbo = torch.mean(log_px) - kl
        beta_elbo = torch.mean(log_px) - self.beta * kl

        # loss
        loss = -beta_elbo.mean()
        
        # prepare the output
        with torch.no_grad():
            diagnostics = {'elbo': elbo, 'log_px':log_px, 'kl': kl}
            
        return loss, diagnostics, outputs

## Train and test as functions
##### This is in order to be able to use Ray Tune to search for optimal parameters

In [10]:
# ignore all future warnings
simplefilter(action='ignore')


# Load train and test sets outside of training function as we want to compare hyperparameters between identical sets of train and test
train_loader, test_loader = data_loaders(batch_size=256, 
                                         num_workers=4, 
                                         pin_memory=True)
# Only used to get shape of data
data_train = next(iter(train_loader))


def train_and_test(config):
    
    
    # Function checkpointing (Ray Tune)
    step = 0
    loaded_checkpoint = session.get_checkpoint()
    if loaded_checkpoint:
        last_step = loaded_checkpoint.to_dict()["step"]
        step = last_step + 1
    
    # Initialize epochs
    num_epochs = 100
    epoch = 0

    
    # Initialize model
    vae = VariationalAutoencoder(data_train[0].shape,
                                 config['latent_features'], 
                                 config['encoder_layer_sizes'], 
                                 config['decoder_layer_sizes'])
    
    # Check if GPU is available, else use CPU
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    
    # Transfer model to device
    vae = vae.to(device)
    
    # Evaluator: Variational Inference
    beta = 1
    vi = VariationalInference(beta=beta)
    
    # Optimizer
    optimizer = torch.optim.AdamW(vae.parameters(), 
                                 lr = config['lr'])
    
    # define dictionary to store the training curves
    training_data = defaultdict(list)
    validation_data = defaultdict(list)
    
    while epoch < num_epochs:
        epoch+= 1
        # print(f"######## Epoch: {epoch} of {num_epochs} ########")
        training_epoch_data = defaultdict(list)
        vae.train()

        # Go through each batch in the training dataset using the loader
        for x in train_loader:

            x = x.to(device)

            # perform a forward pass through the model and compute the ELBO
            loss, diagnostics, outputs = vi(vae, x)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # gather data for the current bach
            for k, v in diagnostics.items():
                training_epoch_data[k] += [v.mean().item()]


        # gather data for the full epoch
        for k, v in training_epoch_data.items():
            training_data[k] += [np.mean(training_epoch_data[k])]

        # Evaluate on a single batch, do not propagate gradients
        with torch.no_grad():
            vae.eval()

            # Just load a single batch from the test loader
            x = next(iter(test_loader))
            x = x.to(device)


            # perform a forward pass through the model and compute the ELBO
            loss, diagnostics, outputs = vi(vae, x)
     
            
            # gather data for the validation step
            for k, v in diagnostics.items():
                validation_data[k] += [v.mean().item()]


        checkpoint = Checkpoint.from_dict({"step": step})
        session.report({"test_loss": validation_data['elbo'][-1],"train_loss": training_data['elbo'][-1]}, checkpoint=checkpoint)

        
        

#### Run the grid search using ASHA scheduler

In [11]:
asha_scheduler = ASHAScheduler(
    time_attr='training_iteration',
    metric='test_loss',
    mode='max',
    max_t=100,
    grace_period=20,
    reduction_factor=3,
    brackets=1)

trainable_with_gpu = tune.with_resources(train_and_test, {"gpu": 1})

# Here we define the hyperparameter search space for our grid search - this is nowhere near exhaustive as we are resource limited.
search_space = {    'latent_features': tune.grid_search([32,64,128]), 
                    'encoder_layer_sizes': tune.grid_search([[256], [512], [1024], [2048], [4096], [8192],
                                                            [512,512], [1024,1024], [2048,2048], [4096,4096], [8192,8192],
                                                            [4096,4096,4096], [4096,8192,4096], [4096, 16384, 4096],
                                                            [4096,4096,4096,4096], [4096,8192,8192,4096]]), 
                    'decoder_layer_sizes': tune.grid_search([[256], [512], [1024], [2048], [4096], [8192],
                                                            [512,512], [1024,1024], [2048,2048], [4096,4096], [8192,8192],
                                                            [4096,4096,4096], [4096,8192,4096], [4096, 16384, 4096],
                                                            [4096,4096,4096,4096], [4096,8192,8192,4096]]), 
                    'lr': tune.grid_search([1e-4, 1e-5])
               }

tuner = tune.Tuner(trainable_with_gpu, 
                   tune_config=tune.TuneConfig(scheduler=asha_scheduler),
                   #metric="test_loss", mode="max", 
                   run_config=air.RunConfig(name = "20_subset-04-12-2022", local_dir="./ray_results"),
                   # https://docs.ray.io/en/latest/ray-air/package-ref.html#module-ray.tune.tune_config
                   param_space = search_space)

results = tuner.fit()



0,1
Current time:,2022-12-21 14:08:20
Running for:,00:01:50.59
Memory:,5.9/50.1 GiB

Trial name,status,loc,decoder_layer_sizes,encoder_layer_sizes,latent_features,lr,iter,total time (s),test_loss,train_loss
train_and_test_48c56_00013,RUNNING,172.27.30.56:2474,"[4096, 16384, 4096]",[256],32,0.0001,9.0,1.84945,-5463.73,-2580.17
train_and_test_48c56_00014,PENDING,,"[4096, 4096, 40_aec0",[256],32,0.0001,,,,
train_and_test_48c56_00015,PENDING,,"[4096, 8192, 81_c1c0",[256],32,0.0001,,,,
train_and_test_48c56_00016,PENDING,,[256],[512],32,0.0001,,,,
train_and_test_48c56_00017,PENDING,,[512],[512],32,0.0001,,,,
train_and_test_48c56_00018,PENDING,,[1024],[512],32,0.0001,,,,
train_and_test_48c56_00019,PENDING,,[2048],[512],32,0.0001,,,,
train_and_test_48c56_00020,PENDING,,[4096],[512],32,0.0001,,,,
train_and_test_48c56_00021,PENDING,,[8192],[512],32,0.0001,,,,
train_and_test_48c56_00022,PENDING,,"[512, 512]",[512],32,0.0001,,,,


Trial name,date,done,episodes_total,experiment_id,hostname,iterations_since_restore,node_ip,pid,should_checkpoint,test_loss,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,train_loss,training_iteration,trial_id,warmup_time
train_and_test_48c56_00000,2022-12-21_14-06-44,True,,bbbeb8d506ff40419795dad4a4d4312e,DESKTOP-3I4L4RQ,100,172.27.30.56,2474,True,-3934.77,12.6599,0.111371,12.6599,1671628004,0,,-3938.52,100,48c56_00000,0.00200653
train_and_test_48c56_00001,2022-12-21_14-06-54,True,,bbbeb8d506ff40419795dad4a4d4312e,DESKTOP-3I4L4RQ,100,172.27.30.56,2474,True,-2691.8,10.5467,0.104403,10.5467,1671628014,0,,-2621.32,100,48c56_00001,0.00200653
train_and_test_48c56_00002,2022-12-21_14-07-05,True,,bbbeb8d506ff40419795dad4a4d4312e,DESKTOP-3I4L4RQ,100,172.27.30.56,2474,True,-2224.8,10.6576,0.109168,10.6576,1671628025,0,,-2126.46,100,48c56_00002,0.00200653
train_and_test_48c56_00003,2022-12-21_14-07-16,True,,bbbeb8d506ff40419795dad4a4d4312e,DESKTOP-3I4L4RQ,100,172.27.30.56,2474,True,-1907.0,10.9911,0.0966794,10.9911,1671628036,0,,-1897.19,100,48c56_00003,0.00200653
train_and_test_48c56_00004,2022-12-21_14-07-28,True,,bbbeb8d506ff40419795dad4a4d4312e,DESKTOP-3I4L4RQ,100,172.27.30.56,2474,True,-1637.8,11.7266,0.0959854,11.7266,1671628048,0,,-1568.45,100,48c56_00004,0.00200653
train_and_test_48c56_00005,2022-12-21_14-07-39,True,,bbbeb8d506ff40419795dad4a4d4312e,DESKTOP-3I4L4RQ,100,172.27.30.56,2474,True,-1347.55,11.3516,0.104327,11.3516,1671628059,0,,-1232.82,100,48c56_00005,0.00200653
train_and_test_48c56_00006,2022-12-21_14-07-41,True,,bbbeb8d506ff40419795dad4a4d4312e,DESKTOP-3I4L4RQ,20,172.27.30.56,2474,True,-7425.43,2.05978,0.100195,2.05978,1671628061,0,,-6930.75,20,48c56_00006,0.00200653
train_and_test_48c56_00007,2022-12-21_14-07-43,True,,bbbeb8d506ff40419795dad4a4d4312e,DESKTOP-3I4L4RQ,20,172.27.30.56,2474,True,-5077.32,2.04304,0.10283,2.04304,1671628063,0,,-4227.73,20,48c56_00007,0.00200653
train_and_test_48c56_00008,2022-12-21_14-07-46,True,,bbbeb8d506ff40419795dad4a4d4312e,DESKTOP-3I4L4RQ,20,172.27.30.56,2474,True,-3260.7,2.8136,0.158887,2.8136,1671628066,0,,-2447.28,20,48c56_00008,0.00200653
train_and_test_48c56_00009,2022-12-21_14-07-58,True,,bbbeb8d506ff40419795dad4a4d4312e,DESKTOP-3I4L4RQ,100,172.27.30.56,2474,True,-1573.34,12.0654,0.118652,12.0654,1671628078,0,,-1537.2,100,48c56_00009,0.00200653


2022-12-21 14:08:20,593	ERROR tune.py:773 -- Trials did not complete: [train_and_test_48c56_00013, train_and_test_48c56_00014, train_and_test_48c56_00015, train_and_test_48c56_00016, train_and_test_48c56_00017, train_and_test_48c56_00018, train_and_test_48c56_00019, train_and_test_48c56_00020, train_and_test_48c56_00021, train_and_test_48c56_00022, train_and_test_48c56_00023, train_and_test_48c56_00024, train_and_test_48c56_00025, train_and_test_48c56_00026, train_and_test_48c56_00027, train_and_test_48c56_00028, train_and_test_48c56_00029, train_and_test_48c56_00030]
2022-12-21 14:08:20,594	INFO tune.py:777 -- Total run time: 110.88 seconds (110.55 seconds for the tuning loop).


In [12]:
print("Best config is:", results.get_best_result(metric="test_loss", mode="max").config)

Best config is: {'latent_features': 32, 'encoder_layer_sizes': [256], 'decoder_layer_sizes': [8192, 8192], 'lr': 0.0001}


In [None]:
# # SAVE RESULTS DATAFRAME TO DISK
tune_df = results.get_dataframe()
tune_df.to_pickle(os.getcwd() + "/ray_results/20_subset-04-12-2022.pkl")

In [None]:
# Show the full dataframe of tested hyperparameter combinations sorted by lowest test loss
tune_df.sort_values(by = ['test_loss'], ascending=False)