Verifaction of autoencoder features consistency.

- Some small changes were needed in fingerprints/autoencoders.py to fix the get_latent_space() function. - Updated README. - Improved autoencoder example.
muammar · Oct 24, 2019 · ddb1d42 · ddb1d42
1 parent 4df8988
commit ddb1d42
Show file tree

Hide file tree

Showing 4 changed files with 28 additions and 15 deletions.
diff --git a/README.md b/README.md
@@ -16,18 +16,15 @@ features to perform machine learning workflows for chemical physics.
 
 A list of features and methods are shown below.
 
-- Atom-centered Neural Networks, and Kernel Ridge Regression for the prediction
-  of total energies.
+  processes for the prediction of properties. 
 - PyTorch backend.
-- GPU support.
-- ASE interface.
 - Completely modular. You can use any part of this package in your project.
 - Free software <3. No secrets! Pull requests and additions are more than
   welcome!
-- Good documentation (I hope!).
+- Documentation (work in progress).
 - Explicit and idiomatic: `ml4chem.get_me_a_coffee()`.
 - Distributed training in a data parallelism paradigm (mini-batches).
-- Scalability and distributed computations are powered by Dask <3.
+- Scalability and distributed computations are powered by Dask.
 - Real-time tools to track status of your computations.
 - [Messagepack serialization](https://msgpack.org/index.html).
 

diff --git a/examples/autoencoder/cu_inference.py b/examples/autoencoder/cu_inference.py
@@ -7,13 +7,20 @@
 from ml4chem.fingerprints import LatentFeatures
 from ml4chem.data.serialization import load
 from ml4chem.utils import logger
+import numpy as np
 
 
 def autoencode():
     # Load the images with ASE
     latent_space = load("cu_training.latent")
-    print("Latent space from file")
-    print(latent_space)
+
+    latent_load = []
+    for e in list(latent_space.values()):
+        for symbol, features in e:
+            latent_load.append(features)
+
+    latent_load = np.array(latent_load).flatten()
+
 
     images = Trajectory("cu_training.traj")
     purpose = "training"
@@ -24,29 +31,38 @@ def autoencode():
     data_handler = DataSet(images, purpose=purpose)
     images, energies = data_handler.get_data(purpose=purpose)
 
+    preprocessor = ("MinMaxScaler", {"feature_range": (-1, 1)})
+
     fingerprints = (
         "Gaussian",
         {
             "cutoff": 6.5,
             "normalized": normalized,
+            "preprocessor": preprocessor,
             "save_preprocessor": "inference.scaler",
         },
     )
     encoder = {"model": "ml4chem.ml4c", "params": "ml4chem.params"}
-    preprocessor = ("MinMaxScaler", {"feature_range": (-1, 1)})
 
     fingerprints = LatentFeatures(
         features=fingerprints,
         encoder=encoder,
-        preprocessor=preprocessor,
+        preprocessor=None,
         save_preprocessor="latent_space_min_max.scaler",
     )
+
     fingerprints = fingerprints.calculate_features(
-        images, purpose=purpose, data=data_handler, svm=False
+        images, purpose=purpose, data=data_handler, svm=True
     )
 
-    print("Latent space from LatentFeatures class")
-    print(fingerprints)
+    latent_svm = []
+    for e in list(fingerprints.values()):
+        for symbol, features in e:
+            latent_svm.append(features)
+
+    latent_svm = np.array(latent_svm).flatten() 
+
+    assert np.allclose(latent_load, latent_svm)
 
 
 if __name__ == "__main__":

diff --git a/ml4chem/data/visualization.py b/ml4chem/data/visualization.py
@@ -174,7 +174,7 @@ def read_log(logfile, metric="loss", refresh=None):
         plt.show(block=True)
 
 
-def plot_atomic_features(latent_space, method="PCA", dimensions=3, backend="seaborn"):
+def plot_atomic_features(latent_space, method="PCA", dimensions=2, backend="seaborn"):
     """Plot high dimensional atomic feature vectors
 
     This function can take a feature space dictionary, or a database file

diff --git a/ml4chem/fingerprints/autoencoders.py b/ml4chem/fingerprints/autoencoders.py
@@ -103,7 +103,7 @@ def calculate_features(self, images, purpose="training", data=None, svm=False):
         features = features(**kwargs)
 
         feature_space = features.calculate_features(
-            images, data=data, purpose=purpose, svm=svm
+            images, data=data, purpose=purpose, svm=False
         )
 
         preprocessor = Preprocessing(self.preprocessor, purpose=purpose)