Skip to content

Commit

Permalink
Verifaction of autoencoder features consistency.
Browse files Browse the repository at this point in the history
- Some small changes were needed in fingerprints/autoencoders.py to fix
  the get_latent_space() function.
- Updated README.
- Improved autoencoder example.
  • Loading branch information
muammar committed Oct 24, 2019
1 parent 4df8988 commit ddb1d42
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 15 deletions.
9 changes: 3 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,15 @@ features to perform machine learning workflows for chemical physics.

A list of features and methods are shown below.

- Atom-centered Neural Networks, and Kernel Ridge Regression for the prediction
of total energies.
processes for the prediction of properties.
- PyTorch backend.
- GPU support.
- ASE interface.
- Completely modular. You can use any part of this package in your project.
- Free software <3. No secrets! Pull requests and additions are more than
welcome!
- Good documentation (I hope!).
- Documentation (work in progress).
- Explicit and idiomatic: `ml4chem.get_me_a_coffee()`.
- Distributed training in a data parallelism paradigm (mini-batches).
- Scalability and distributed computations are powered by Dask <3.
- Scalability and distributed computations are powered by Dask.
- Real-time tools to track status of your computations.
- [Messagepack serialization](https://msgpack.org/index.html).

Expand Down
30 changes: 23 additions & 7 deletions examples/autoencoder/cu_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,20 @@
from ml4chem.fingerprints import LatentFeatures
from ml4chem.data.serialization import load
from ml4chem.utils import logger
import numpy as np


def autoencode():
# Load the images with ASE
latent_space = load("cu_training.latent")
print("Latent space from file")
print(latent_space)

latent_load = []
for e in list(latent_space.values()):
for symbol, features in e:
latent_load.append(features)

latent_load = np.array(latent_load).flatten()


images = Trajectory("cu_training.traj")
purpose = "training"
Expand All @@ -24,29 +31,38 @@ def autoencode():
data_handler = DataSet(images, purpose=purpose)
images, energies = data_handler.get_data(purpose=purpose)

preprocessor = ("MinMaxScaler", {"feature_range": (-1, 1)})

fingerprints = (
"Gaussian",
{
"cutoff": 6.5,
"normalized": normalized,
"preprocessor": preprocessor,
"save_preprocessor": "inference.scaler",
},
)
encoder = {"model": "ml4chem.ml4c", "params": "ml4chem.params"}
preprocessor = ("MinMaxScaler", {"feature_range": (-1, 1)})

fingerprints = LatentFeatures(
features=fingerprints,
encoder=encoder,
preprocessor=preprocessor,
preprocessor=None,
save_preprocessor="latent_space_min_max.scaler",
)

fingerprints = fingerprints.calculate_features(
images, purpose=purpose, data=data_handler, svm=False
images, purpose=purpose, data=data_handler, svm=True
)

print("Latent space from LatentFeatures class")
print(fingerprints)
latent_svm = []
for e in list(fingerprints.values()):
for symbol, features in e:
latent_svm.append(features)

latent_svm = np.array(latent_svm).flatten()

assert np.allclose(latent_load, latent_svm)


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion ml4chem/data/visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def read_log(logfile, metric="loss", refresh=None):
plt.show(block=True)


def plot_atomic_features(latent_space, method="PCA", dimensions=3, backend="seaborn"):
def plot_atomic_features(latent_space, method="PCA", dimensions=2, backend="seaborn"):
"""Plot high dimensional atomic feature vectors
This function can take a feature space dictionary, or a database file
Expand Down
2 changes: 1 addition & 1 deletion ml4chem/fingerprints/autoencoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def calculate_features(self, images, purpose="training", data=None, svm=False):
features = features(**kwargs)

feature_space = features.calculate_features(
images, data=data, purpose=purpose, svm=svm
images, data=data, purpose=purpose, svm=False
)

preprocessor = Preprocessing(self.preprocessor, purpose=purpose)
Expand Down

0 comments on commit ddb1d42

Please sign in to comment.