[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/sebascarag/AI-SyntheticSound/blob/main/Model_VAE.ipynb)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import time
import copy

In [None]:
class VariationalEncoder(nn.Module):
  def __init__(self, latent_dims, num_features, num_hidden_layers):
    super(VariationalEncoder, self).__init__()
    # Se definen 3 capas
    self.hidden_1 = nn.Linear(num_features, num_hidden_layers)
    self.z_mean = nn.Linear(num_hidden_layers, latent_dims)
    self.z_log_var = nn.Linear(num_hidden_layers, latent_dims)
    self.kl_divergence = 0
    self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

  def latentVector(self, z_mu, z_log_var):
    # # Se encarga de calcular la distribucion normal para aplicar las distribuciones

    # Sample epsilon from standard normal distribution
    # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    eps = torch.randn(z_mu.size(0), z_mu.size(1)).to(self.device)

    # note that log(x^2) = 2*log(x); hence divide by 2 to get std_dev
    # i.e., std_dev = exp(log(std_dev^2)/2) = exp(log(var)/2)
    sigma = torch.exp(z_log_var/2.)
    z = z_mu + eps * sigma
    return z

  # Kullback-Leibler divergence
  def klDivergence(self, x):
    kl = -0.5 * torch.sum(1 + self.z_log_var(x) - self.z_mean(x)**2 - torch.exp(self.z_log_var(x)), axis=1)
    self.kl_divergence = kl.mean()

  def forward(self, features):
    # check dtype provided must be torch.float32"
    # print("encoder",features.shape)
    # x = torch.flatten(features, start_dim=1)
    x = features
    x = self.hidden_1(x)
    x = F.relu(x)

    z_mean =  self.z_mean(x) # mu o z_mean es la distribucion normal
    z_log_var = self.z_log_var(x)

    self.klDivergence(x)

    return self.latentVector(z_mean, z_log_var)


In [None]:
class Decoder(nn.Module):
  def __init__(self, latent_dims, num_features, num_hidden_layers):
    super(Decoder, self).__init__()
    # En este caso definimos la capa de manera inversa
    self.linear1 = nn.Linear(latent_dims, num_hidden_layers)
    self.linear2 = nn.Linear(num_hidden_layers, num_features)

  def forward(self, z_encoded):
    # Activacion de la primera capa a partir del codigo latente obtenido del encoder
    # print("dencoder",z_encoded.shape)
    x = self.linear1(z_encoded)
    x = F.relu(x)
    # En el resultado anterior activamos la segunda capa
    x = self.linear2(x)
    # aplicamos sigmoid para obtener la salida normalizada entre 0 y 1
    z_decoded = torch.sigmoid(x)

    return z_decoded #z_decoded.reshape((-1, 1, 28, 28)) # Reformateamos la salida a una matriz de un solo canal de 28x28px


In [None]:
class VariationalAutoencoder(nn.Module):
  # Constructor
  def __init__(self, latent_dims, num_features, num_hidden_layers, random_seed=None):
    super(VariationalAutoencoder, self).__init__()
    self.encoder = VariationalEncoder(latent_dims, num_features, num_hidden_layers)
    self.decoder = Decoder(latent_dims, num_features, num_hidden_layers)
    self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    if random_seed is not None:
      torch.manual_seed(random_seed)
      torch.cuda.manual_seed(random_seed)
    print('Device:', self.device)

  def forward(self, features):
    z = self.encoder(features)
    return self.decoder(z)

  def train_fit(self, data, learning_rate=1e-3, num_epochs=20, flatten=False, optuna_trial=None):
    start_time = time.time()
    optimizer = torch.optim.Adam(self.parameters(), lr=learning_rate)
    if optuna_trial is not None:
        import optuna
    best_model = copy.deepcopy(self) #copy base model
    best_loss = None
    epoch_losses = np.empty((0,3))
    for epoch in range(num_epochs):
      start_time_elapsed = time.time()
      batch_losses = np.empty((0,3))
      for batch_idx, (features, labels) in enumerate(data):
        if flatten:
          features = torch.flatten(features, start_dim=1).to(self.device)
        else:
          features = features.to(self.device)
        # print("features", features.shape)
        targets = labels.type(torch.int64).to(self.device)

        optimizer.zero_grad() #importante: antes de usar modelo

        decoded = self(features) # Training VAE'
        kl_divergence = self.encoder.kl_divergence
        pixelwise = ((features - decoded)**2).sum()

        loss = kl_divergence + pixelwise # cost = reconstruction loss + Kullback-Leibler divergence
        ### update model params
        loss.backward()
        optimizer.step()

        # save cost/loss
        batch_losses = np.append(batch_losses, [[loss.item(), kl_divergence.item(), pixelwise.item()]], axis=0)

        ### logging progress
        if not batch_idx % num_epochs:
          print('Epoch: %03d/%03d | Batch %03d/%03d | kl: %.4f + pw: %.4f = loss: %.4f'
                %(epoch+1, num_epochs, batch_idx, len(data.dataset)//data.batch_size, kl_divergence, pixelwise, loss))

      # averange for loss by epoch
      epoch_loss = np.mean(batch_losses[:,0])
      epoch_losses = np.append(epoch_losses, [[epoch_loss, np.mean(batch_losses[:,1]), np.mean(batch_losses[:,2])]], axis=0)

      if best_loss is None or epoch_loss < best_loss:
        best_loss = epoch_loss
        best_model.load_state_dict(self.state_dict()) #copy model

      print('Time elapsed: %.2f min' % ((time.time() - start_time_elapsed)/60))

      if optuna_trial is not None:
        optuna_trial.report(epoch_loss, epoch)
        if optuna_trial.should_prune():
          print('Prune on epoch: {:0>3} | loss:{:.4f}'.format(epoch, epoch_loss))
          raise optuna.TrialPruned()

    print('Total Training Time: %.2f min' % ((time.time() - start_time)/60))
    return best_model, best_loss, epoch_losses

