Skip to content

Commit

Permalink
General improvements to autoencoders modules.
Browse files Browse the repository at this point in the history
- VAE reparameterize() function returns just mean values at prediction
  time.
- Updated examples/autoencoder directory.
- Renamed docs/source/ml4chem.fingerprints.rst to
  docs/source/ml4chem.features.rst
- Potentials() class updated to properly work with new VAE changes.
  • Loading branch information
muammar committed Nov 15, 2019
1 parent 97efd97 commit 1b4dd95
Show file tree
Hide file tree
Showing 11 changed files with 437 additions and 788 deletions.
Original file line number Diff line number Diff line change
@@ -1,35 +1,35 @@
ml4chem.features package
============================
========================

Submodules
----------

ml4chem.features.autoencoders module
----------------------------------------
------------------------------------

.. automodule:: ml4chem.features.autoencoders
:members:
:undoc-members:
:show-inheritance:

ml4chem.features.cartesian module
-------------------------------------
---------------------------------

.. automodule:: ml4chem.features.cartesian
:members:
:undoc-members:
:show-inheritance:

ml4chem.features.cutoff module
----------------------------------
------------------------------

.. automodule:: ml4chem.features.cutoff
:members:
:undoc-members:
:show-inheritance:

ml4chem.features.gaussian module
------------------------------------
--------------------------------

.. automodule:: ml4chem.features.gaussian
:members:
Expand Down
2 changes: 1 addition & 1 deletion docs/source/models.rst
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ reconstruct the input data.
autoencoder.prepare_model(input_dimension, output_dimension, data=data_handler)


ML4Chem also provides access to variational autoencoders (VAE)[Kingma2013]_.
ML4Chem also provides access to variational autoencoders (VAE) [Kingma2013]_.
These architectures differ from an AE in that the encoder codes a
distribution with mean and variance (two vectors with the desired latent
space dimension) instead of a single latent vector. Subsequently, this
Expand Down
2 changes: 1 addition & 1 deletion examples/autoencoder/cu_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,5 +67,5 @@ def autoencode():
if __name__ == "__main__":
logger("cu_inference.log")
cluster = LocalCluster()
client = Client(cluster, asyncronous=True)
client = Client(cluster)
autoencode()
Binary file modified examples/autoencoder/cu_training.latent
Binary file not shown.
1,173 changes: 404 additions & 769 deletions examples/autoencoder/cu_training.log

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion examples/autoencoder/cu_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,5 +82,5 @@ def autoencode():
if __name__ == "__main__":
logger(filename="cu_training.log")
cluster = LocalCluster()
client = Client(cluster, asyncronous=True)
client = Client(cluster)
inputs, outputs, data_handler = autoencode()
Binary file modified examples/autoencoder/cu_training.scaler
Binary file not shown.
Binary file modified examples/autoencoder/inference.scaler
Binary file not shown.
4 changes: 3 additions & 1 deletion ml4chem/features/autoencoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,9 @@ def calculate(self, images, purpose="training", data=None, svm=False):
del _latent_space

else:
latent_space = encoder.get_latent_space(feature_space, svm=svm)
if encoder.name() == "VAE":
purpose = "inference"
latent_space = encoder.get_latent_space(feature_space, svm=svm, purpose=purpose)

return latent_space

Expand Down
22 changes: 16 additions & 6 deletions ml4chem/models/autoencoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -579,7 +579,7 @@ def decode(self, z, symbol=None):
else:
raise NotImplementedError

def reparameterize(self, mu, logvar):
def reparameterize(self, mu, logvar, purpose=None):
"""Reparameterization trick
This trick samples the posterior (a latent vector) from a
Expand All @@ -598,9 +598,16 @@ def reparameterize(self, mu, logvar):
Sample vector
A sample from the distribution.
"""
std = torch.exp(0.5 * logvar)
eps = torch.randn_like(std)
return mu + eps * std
if purpose is None:
raise("You need to provide a purpose")

elif purpose == "training":
std = torch.exp(0.5 * logvar)
eps = torch.randn_like(std)
return mu + eps * std

else:
return mu

def forward(self, X):
"""Forward propagation
Expand Down Expand Up @@ -692,6 +699,9 @@ def get_latent_space(self, X, svm=False, purpose=None):
forward propagate and get the latent_space.
"""

if purpose is None:
raise("You need to provide a purpose")

# FIXME parallelize me
if purpose == "preprocessing":
hashes = []
Expand All @@ -706,7 +716,7 @@ def get_latent_space(self, X, svm=False, purpose=None):
mu_latent, logvar_latent = self.encode(x)
else:
mu_latent, logvar_latent = self.encode(x, symbol=symbol)
latent_vector = self.reparameterize(mu_latent, logvar_latent)
latent_vector = self.reparameterize(mu_latent, logvar_latent, purpose="latent")
_symbols.append(symbol)

if svm:
Expand Down Expand Up @@ -735,7 +745,7 @@ def get_latent_space(self, X, svm=False, purpose=None):
mu_latent, logvar_latent = self.encode(x)
else:
mu_latent, logvar_latent = self.encode(x, symbol=symbol)
latent_vector = self.reparameterize(mu_latent, logvar_latent)
latent_vector = self.reparameterize(mu_latent, logvar_latent, purpose=purpose)

if svm:
_latent_vector = latent_vector.detach().numpy()
Expand Down
10 changes: 6 additions & 4 deletions ml4chem/potentials.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,16 +240,17 @@ def train(
None.
"""

data_handler = Data(training_set, purpose="training")
purpose = "training"
data_handler = Data(training_set, purpose=purpose)
# Raw input and targets aka X, y
training_set, targets = data_handler.get_data(purpose="training")
training_set, targets = data_handler.get_data(purpose=purpose)

# Now let's train
# SVM models
if self.model.name() in Potentials.svm_models:
# Mapping raw positions into a feature space aka X
feature_space, reference_features = self.features.calculate(
training_set, data=data_handler, purpose="training", svm=True
training_set, data=data_handler, purpose=purpose, svm=True
)
self.model.prepare_model(
feature_space, reference_features, data=data_handler
Expand All @@ -258,8 +259,9 @@ def train(
self.model.train(feature_space, targets)
else:
# Mapping raw positions into a feature space aka X

feature_space = self.features.calculate(
training_set, data=data_handler, purpose="training", svm=False
training_set, data=data_handler, purpose=purpose, svm=False
)

# Fixed fingerprint dimension
Expand Down

0 comments on commit 1b4dd95

Please sign in to comment.