Skip to content

Commit

Permalink
More advances towards a VAE.
Browse files Browse the repository at this point in the history
  • Loading branch information
muammar committed Oct 15, 2019
1 parent bff44b6 commit 1c258a2
Show file tree
Hide file tree
Showing 4 changed files with 165 additions and 56 deletions.
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
[![License](https://img.shields.io/badge/license-BSD-green)](https://github.com/muammar/ml4chem/blob/master/LICENSE)
[![Downloads](https://img.shields.io/github/downloads/muammar/ml4chem/total.svg?maxAge=2592000?style=flat-square)](https://github.com/muammar/ml4chem/releases)
[![GitHub release](https://img.shields.io/github/release/muammar/ml4chem.svg)](https://github.com/muammar/ml4chem/releases/latest)

--------------------------------------------------------------------------------

This package is written in Python 3, and intends to offer modern and rich
features to perform machine learning workflows for chemical physics.
Expand All @@ -29,14 +29,14 @@ A list of features and methods are shown below.
- [Messagepack serialization](https://msgpack.org/index.html).


## Citing
### Citing

If you find this software useful, please use this DOI to cite it:

[![DOI](https://zenodo.org/badge/161847010.svg)](https://zenodo.org/badge/latestdoi/161847010)


## Documentation
### Documentation

You can read the documentation at [https://ml4chem.dev](https://ml4chem.dev)
where you can get started. It is arranged in a way that you can go through
Expand All @@ -46,12 +46,12 @@ index](https://ml4chem.dev/genindex.html) to get more information about
different classes and functions of ML4Chem.


## Dask dashboard
### Dask dashboard
![](https://raw.githubusercontent.com/muammar/ml4chem/master/docs/source/_static/dask_dashboard.png)

Note: This package is under development.

## Copyright
### Copyright
ML4Chem: Machine Learning for Chemistry and Materials (ML4Chem) Copyright (c) 2019, The
Regents of the University of California, through Lawrence Berkeley National
Laboratory (subject to receipt of any required approvals from the U.S.
Expand Down
1 change: 1 addition & 0 deletions ml4chem/data/visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import seaborn as sns
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from ml4chem.data.serialization import load
import time


def parity(predictions, true, scores=False, filename=None, **kwargs):
Expand Down
184 changes: 145 additions & 39 deletions ml4chem/models/autoencoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,8 @@ def prepare_model(
logger.info("==============")
logger.info("Model name: {}.".format(self.name()))
logger.info(
"Structure of Autoencoder: {}".format(
"(input, " + str(self.hiddenlayers)[1:-1] + ", output)"
"Structure of {}: {}".format(
self.name(), "(input, " + str(self.hiddenlayers)[1:-1] + ", output)"
)
)

Expand Down Expand Up @@ -133,21 +133,24 @@ def prepare_model(

# Stacking up the layers.
if self.name() == "VAE":
keys = ["pre", "mu", "logvar"]
keys = ["h", "mu", "logvar"]
mu = []
logvar = []

index = -3
for i in range(2):
for _ in range(2):
index += 1
mu.append(encoder.pop(index))
if index == -2:
mu.append(encoder.pop(index))
else:
encoder.pop(index)

pre = torch.nn.Sequential(*encoder)
h = torch.nn.Sequential(*encoder)
logvar = torch.nn.Linear(inp_dim, out_dim)
logvar = torch.nn.Sequential(*[logvar, activation[self.activation]()])
logvar = torch.nn.Sequential(*[logvar])
mu = torch.nn.Sequential(*mu)

values = [pre, mu, logvar]
values = [h, mu, logvar]
encoder = torch.nn.ModuleDict(list(map(list, zip(keys, values))))
else:
encoder = torch.nn.Sequential(*encoder)
Expand All @@ -158,20 +161,31 @@ def prepare_model(
Decoder
"""
for inp_dim, out_dim in zip(decoder_layers, decoder_layers[1:]):
_decoder = torch.nn.Linear(inp_dim, out_dim)
decoder.append(_decoder)
decoder.append(torch.nn.Linear(inp_dim, out_dim))
decoder.append(activation[self.activation]())

# The last decoder layer for symbol
inp_dim = out_dim
_decoder = torch.nn.Linear(inp_dim, output_dimension)
decoder.append(_decoder)

"""
if self.name() == "VAE":
h = torch.nn.Sequential(*decoder)
mu = torch.nn.Linear(inp_dim, output_dimension)
mu = torch.nn.Sequential(*[mu])
logvar = torch.nn.Linear(inp_dim, output_dimension)
logvar = torch.nn.Sequential(*[logvar])
values = [h, mu, logvar]
decoder = torch.nn.ModuleDict(list(map(list, zip(keys, values))))
else:
"""
# The last decoder layer for symbol
decoder.append(torch.nn.Linear(inp_dim, output_dimension))
# According to this video https://youtu.be/xTU79Zs4XKY?t=416
# real numbered inputs need no activation function in the output
# layer decoder.append(activation[self.activation]())

# Stacking up the layers.
decoder = torch.nn.Sequential(*decoder)

symbol_decoder_pair.append([symbol, decoder])

self.encoders = torch.nn.ModuleDict(symbol_encoder_pair)
Expand All @@ -180,7 +194,7 @@ def prepare_model(
logger.info(self.decoders)

if purpose == "training":
# Iterate over all modules and just intialize those that are
# Iterate over all modules and just initialize those that are
# a linear layer.
logger.warning(
"Initialization of weights with Xavier Uniform by " "default."
Expand Down Expand Up @@ -331,6 +345,34 @@ def get_latent_space(self, X, svm=False, purpose=None):


class VAE(AutoEncoder):
"""Variational Autoencoder (VAE)
This module uses variational autoencoders for pipelines in chemistry.
Parameters
----------
hiddenlayers : dict
Dictionary with encoder, and decoder layers in the Auto Encoder.
activation : str
The activation function.
Notes
-----
When defining the hiddenlayers keyword argument, input and output
dimensions are automatically determined. For example, suppose you have an
input data point with 10 dimensions and you want to autoencode with
targets having 14 dimensions, a latent space with 4 dimensions and just one
hidden layer with 5 nodes between input-layer / latent-layer and
latent-layer / output-layer. Your `hiddenlayers` dictionary would look like
this:
>>> hiddenlayers = {'encoder': (5, 4), 'decoder': (4, 5)}
That would generate an autoencoder with topology (10, 5, 4 | 4, 5, 14).
"""

NAME = "VAE"

@classmethod
Expand All @@ -350,15 +392,59 @@ def encode(self, symbol, x):
Returns
-------
z
Latent vector.
mu, logvar
Mean and variance.
"""
pre = self.encoders[symbol]["pre"](x)
mu = self.encoders[symbol]["mu"](pre)
logvar = self.encoders[symbol]["logvar"](pre)
h = self.encoders[symbol]["h"](x)
mu = self.encoders[symbol]["mu"](h)
logvar = self.encoders[symbol]["logvar"](h)
return mu, logvar

# def decode(self, symbol, z):
# """Decode latent vector, z
#
# Parameters
# ----------
# symbol : str
# Chemical symbol.
# z : array
# Latent vector.
#
# Returns
# -------
# mu, logvar
# Mean and variance.
#
# Notes
# -----
# See page 11 "Kingma, D. P. & Welling, M. Auto-Encoding Variational
# Bayes. (2013)".
# """

# h = self.decoders[symbol]["h"](z)
# mu = self.decoders[symbol]["mu"](h)
# logvar = self.decoders[symbol]["logvar"](h)
# return mu, logvar

def reparameterize(self, mu, logvar):
"""Reparameterization trick
This trick samples the posterior (a latent vector) from a
multivariate Gaussian probability distribution. At the same time it
allows the model to be backward-propagated.
Parameters
----------
mu : [type]
[description]
logvar : [type]
[description]
Returns
-------
[type]
[description]
"""
std = torch.exp(0.5 * logvar)
eps = torch.randn_like(std)
return mu + eps * std
Expand All @@ -375,25 +461,35 @@ def forward(self, X):
Returns
-------
outputs : tensor
mu and lovar for two multivariate gaussian
Decoded latent vector.
"""

mus_latent = []
logvars_latent = []
# mus_output = []
# logvars_output = []
outputs = []
mus = []
logvars = []
for hash, image in X.items():
for symbol, x in image:
mu, logvar = self.encode(symbol, x)
z = self.reparameterize(mu, logvar)
output = self.decode(symbol, z)
outputs.append(output)
mus.append(mu)
logvars.append(logvar)
mu_latent, logvar_latent = self.encode(symbol, x)
z = self.reparameterize(mu_latent, logvar_latent)
mus_latent.append(mu_latent)
logvars_latent.append(logvar_latent)
reconstruction = self.decode(symbol, z)
# mu_output, logvar_output = self.decode(symbol, z)
# mus_output.append(mu_output)
# logvars_output.append(logvar_output)
outputs.append(reconstruction)

mus_latent = torch.stack(mus_latent)
logvars_latent = torch.stack(logvars_latent)
# mus_output = torch.stack(mus_output)
# logvars_output = torch.stack(logvars_output)
outputs = torch.stack(outputs)
mus = torch.stack(mus)
logvars = torch.stack(logvars)
return outputs, mus, logvars

# return outputs, mus_latent, logvars_latent, mus_output, logvars_output
return outputs, mus_latent, logvars_latent


class train(object):
Expand Down Expand Up @@ -697,11 +793,11 @@ def train_batches(
"""
inputs = OrderedDict(chunk)
if model.name() == "VAE":
outputs, mus, logvars = model(inputs)
# outputs, mus_latent, logvars_latent, mus_output, logvars_output = model(inputs)
outputs, mus_latent, logvars_latent, = model(inputs)
else:
outputs = model(inputs)

args = {"outputs": outputs, "targets": targets[index]}
args = {"outputs": outputs, "targets": targets[index]}

_args, _varargs, _keywords, _defaults = inspect.getargspec(lossfxn)

Expand All @@ -713,11 +809,21 @@ def train_batches(
# In the case of using EncoderMapLoss the inputs are needed, too.
args.update({"inputs": inputs_chunk_vals[index]})

elif "mus" in _args and "logvars" in _args:
mus = {"mus": mus}
logvars = {"logvars": logvars}
args.update(mus)
args.update(logvars)
elif "mus_latent" in _args and "logvars_latent" in _args:
args = {
"outputs": outputs,
"targets": targets[index],
"mus_latent": mus_latent,
"logvars_latent": logvars_latent,
}
# elif "mus_latent" in _args and "logvars_latent" in _args:
# args = {
# "targets": targets[index],
# "mus_latent": mus_latent,
# "logvars_latent": logvars_latent,
# "mus_output": mus_output,
# "logvars_output": logvars_output,
# }

loss = lossfxn(**args)
loss.backward()
Expand Down
26 changes: 14 additions & 12 deletions ml4chem/models/loss.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import torch
import numpy as np


def AtomicMSELoss(outputs, targets, atoms_per_image):
Expand Down Expand Up @@ -33,8 +34,6 @@ def AtomicMSELoss(outputs, targets, atoms_per_image):
def SumSquaredDiff(outputs, targets):
"""Sum of squared differences loss function
This is the default loss function for a real-valued autoencoder.
Parameters
----------
outputs : tensor
Expand All @@ -57,10 +56,7 @@ def SumSquaredDiff(outputs, targets):


def MSELoss(outputs, targets):
"""Default loss function
If user does not input loss function we provide mean-squared error loss
function.
"""Mean-squared error loss function
Parameters
----------
Expand Down Expand Up @@ -254,15 +250,21 @@ def get_pairwise_distances(positions, squared=False):

return distances

def VAELoss(outputs, targets, mus, logvars):
# BCE = F.binary_cross_entropy(recon_x, x.view(-1, 784), reduction='sum')

## def VAELoss(targets, mus_latent, logvars_latent, mus_output, logvars_output):
def VAELoss(outputs, targets, mus_latent, logvars_latent):

# LOG_2_PI = np.log(2 * np.pi)
# loss_rec = LOG_2_PI + torch.sum(logvars_output + (targets - mus_output) ** 2 / (2 * torch.exp(logvars_output)))

loss_rec = MSELoss(outputs, targets)

# see Appendix B from VAE paper:
# Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
# https://arxiv.org/abs/1312.6114
# 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)

criterion = torch.nn.MSELoss()
mse = criterion(outputs, targets) * 0.5
kld = -0.5 * torch.sum(1 + logvars - mus.pow(2) - logvars.exp())
return mse + kld
kld = -0.5 * torch.sum(
1 + logvars_latent - mus_latent.pow(2) - logvars_latent.exp()
)
return loss_rec + kld

0 comments on commit 1c258a2

Please sign in to comment.