Improved VAE documentation and general fixes.

- Added improved VAE documentation. - Now the VAELoss function is working as expected. - I passed `black` to the whole code base.
muammar · Nov 6, 2019 · 1028721 · 1028721
1 parent f7cb1c9
commit 1028721
Show file tree

Hide file tree

Showing 7 changed files with 39 additions and 22 deletions.
diff --git a/docs/source/_static/Autoencoder_schema.png b/docs/source/_static/Autoencoder_schema.png
diff --git a/docs/source/_static/vae.png b/docs/source/_static/vae.png
diff --git a/docs/source/models.rst b/docs/source/models.rst
@@ -172,9 +172,10 @@ because of a hidden-layer that serves as an informational bottleneck as shown
 in the figure below. In addition, this latent code is used by the decoder to
 reconstruct the input data.
 
-.. image:: https://upload.wikimedia.org/wikipedia/commons/3/37/Autoencoder_schema.png
-
-
+.. image:: _static/Autoencoder_schema.png
+   :alt: Vanilla autoencoder
+   :scale: 50 %
+   :align: center
 :: 
 
     from ml4chem.models.autoencoders import AutoEncoder
@@ -186,18 +187,33 @@ reconstruct the input data.
     autoencoder.prepare_model(input_dimension, output_dimension, data=data_handler)
 
 
-ML4Chem also provides access to variational autoencoders. It just suffices to
-change the snippet above as follows:
+ML4Chem also provides access to variational autoencoders (VAE). These
+architectures differ from an AE in that the encoder codes a distribution with
+mean and variance (two vectors with the desired latent space dimension)
+instead of a single latent vector. Subsequently, this distribution is sampled
+and used by the decoder to reconstruct the input. This creates a generative
+model because now we will generate a latent distribution that allows a
+continuous change from one class to another.
+
+.. image:: _static/vae.png
+   :alt: VAE
+   :scale: 50 %
+   :align: center
+:: 
+
+To use this architecture, it just suffices to change the snippet shown above
+for an AE as follows:
+
 
 :: 
 
     from ml4chem.models.autoencoders import VAE
 
     hiddenlayers = {"encoder": (20, 10, 4), "decoder": (4, 10, 20)}
     activation = "tanh"
-    autoencoder = VAE(hiddenlayers=hiddenlayers, activation=activation)
+    vae = VAE(hiddenlayers=hiddenlayers, activation=activation, multivariate=True)
     data_handler.get_unique_element_symbols(images, purpose=purpose)
-    autoencoder.prepare_model(input_dimension, output_dimension, data=data_handler)
+    vae.prepare_model(input_dimension, output_dimension, data=data_handler)
 
 
 Kernel Ridge Regression

diff --git a/examples/autoencoder/cu_inference.py b/examples/autoencoder/cu_inference.py
@@ -18,9 +18,8 @@ def autoencode():
     for e in list(latent_space.values()):
         for symbol, features in e:
             latent_load.append(features)
-
-    latent_load = np.array(latent_load).flatten()
 
+    latent_load = np.array(latent_load).flatten()
 
     images = Trajectory("cu_training.traj")
     purpose = "training"
@@ -59,8 +58,8 @@ def autoencode():
     for e in list(fingerprints.values()):
         for symbol, features in e:
             latent_svm.append(features)
-    
-    latent_svm = np.array(latent_svm).flatten() 
+
+    latent_svm = np.array(latent_svm).flatten()
 
     assert np.allclose(latent_load, latent_svm)
 

diff --git a/ml4chem/data/preprocessing.py b/ml4chem/data/preprocessing.py
@@ -86,9 +86,10 @@ def set(self, purpose):
             self.preprocessor = joblib.load(self.preprocessing)
 
         else:
-            logger.warning("{} with {} is not supported.".format(self.preprocessing, self.kwargs))
+            logger.warning(
+                "{} with {} is not supported.".format(self.preprocessing, self.kwargs)
+            )
             self.preprocessor = preprocessor_name = None
-
 
         if purpose == "training" and preprocessor_name is not None:
             logger.info("Data preprocessing")

diff --git a/ml4chem/data/visualization.py b/ml4chem/data/visualization.py
@@ -174,7 +174,7 @@ def read_log(logfile, metric="loss", refresh=None):
         plt.show(block=True)
 
 
-def plot_atomic_features(latent_space, method="PCA", dimensions=3, backend="seaborn"):
+def plot_atomic_features(latent_space, method="PCA", dimensions=2, backend="seaborn"):
     """Plot high dimensional atomic feature vectors
 
     This function can take a feature space dictionary, or a database file

diff --git a/ml4chem/models/loss.py b/ml4chem/models/loss.py
@@ -296,15 +296,16 @@ def VAELoss(
 
     loss = []
 
+    dim = 1
     if multivariate:
         # loss_rec = LOG_2_PI + logvar_x + (x - mu_x)**2 / (2*torch.exp(logvar_x))
         # loss_rec = -torch.mean(torch.sum(-(0.5 * np.log(2 * np.pi) + 0.5 * logvars_decoder) - 0.5 * ((targets - mus_decoder)**2 / torch.exp(logvars_decoder)), dim=0))
-        loss_rec = torch.sum(
+        loss_rec = -torch.sum(
             (-0.5 * np.log(2.0 * np.pi))
             + (-0.5 * logvars_decoder)
-            + ((-0.5 / torch.exp(logvars_decoder)) * (targets - mus_decoder) ** 2.0)
+            + ((-0.5 / torch.exp(logvars_decoder)) * (targets - mus_decoder) ** 2.0),
+            dim=dim,
         )
-        loss_rec *= -1.0
 
     else:
         loss_rec = torch.nn.functional.binary_cross_entropy(
@@ -319,20 +320,20 @@ def VAELoss(
     # https://arxiv.org/abs/1312.6114
     # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
 
-    annealing = 1.0
-
     kld = (
         -0.5
-        * torch.sum(1 + logvars_latent - mus_latent.pow(2) - logvars_latent.exp())
+        * torch.sum(
+            1 + logvars_latent - mus_latent.pow(2) - logvars_latent.exp(), dim=dim
+        )
         * annealing
     )
     loss.append(kld)
 
     if latent is not None:
-        activation_reg = torch.mean(torch.pow(latent, 2))
+        activation_reg = torch.mean(torch.pow(latent, 2), dim=dim)
         loss.append(activation_reg)
 
-    print(loss)
+    # Mini-batch mean
     loss = torch.mean(torch.stack(loss))
 
     return loss