Renamed CoulombMatrix module and fixed KernelRidge class.

- data.parser: `ani_to_ase()` function now supports list of hdf5 loaded ANI datasets. - data.features: * __init__.py: renamed CoulombMatrix module. * coulombmatrix module is more efficient when preparing SVM features, and its parameters can be saved to file. - model.kernelridge: KernelRidge class was missing a variable. - ml4chem.potentials: changed some of the functions to allow compatibility.
muammar · Jan 15, 2020 · 94ff3d5 · 94ff3d5
1 parent bb83fd1
commit 94ff3d5
Show file tree

Hide file tree

Showing 6 changed files with 374 additions and 22 deletions.
diff --git a/ml4chem/data/parser.py b/ml4chem/data/parser.py
@@ -119,8 +119,8 @@ def ani_to_ase(hdf5file, data_keys, trajfile=None):
     
     Parameters
     ----------
-    hdf5file : hdff
-        hdf5 file loaded using pyanitools.
+    hdf5file : hdf5, list
+        hdf5 file loaded using pyanitools (or list of them).
     data_keys : list
         List of keys to extract data.
     trajfile : str, optional
@@ -132,35 +132,39 @@ def ani_to_ase(hdf5file, data_keys, trajfile=None):
         A list of Atoms objects.
     """
 
+    if isinstance(hdf5file, list) is False:
+        hdf5file = [hdf5file]
+
     atoms = []
     prop = {"energies": "energy", "energy": "energy"}
 
     if trajfile is not None:
         traj = Trajectory(trajfile, mode="w")
 
-    for data in hdf5file:
+    for hdf5 in hdf5file:
+        for data in hdf5:
+
+            symbols = data["species"]
+            conformers = data["coordinates"]
 
-        symbols = data['species']
-        conformers = data['coordinates']
+            for index, conformer in enumerate(conformers):
+                molecule = Atoms(positions=conformer, symbols=symbols)
+                molecule.set_calculator(SinglePointCalculator())
 
-        for index, conformer in enumerate(conformers):
-            molecule = Atoms(positions=conformer, symbols=symbols)
-            molecule.set_calculator(SinglePointCalculator())
+                _prop = {}
 
-            _prop = {}
+                for key in data_keys:
+                    value = data[key][index]
 
-            for key in data_keys:
-                value = data[key][index]
+                    # Mutate key because ANI naming is not standard.
+                    key = prop[key]
+                    _prop[key] = value
 
-                # Mutate key because ANI naming is not standard.
-                key = prop[key]
-                _prop[key] = value
+                    molecule.calc.results[key] = value
 
-                molecule.calc.results[key] = value
-
-            atoms.append(molecule)
+                atoms.append(molecule)
 
-            if trajfile is not None:
-                traj.write(molecule, **_prop)
+                if trajfile is not None:
+                    traj.write(molecule, **_prop)
 
     return atoms
diff --git a/ml4chem/features/__init__.py b/ml4chem/features/__init__.py
@@ -1,6 +1,6 @@
 from ml4chem.features.autoencoders import LatentFeatures
 from ml4chem.features.cartesian import Cartesian
-from ml4chem.features.coulomb_matrix import CoulombMatrix
+from ml4chem.features.coulombmatrix import CoulombMatrix
 from ml4chem.features.gaussian import Gaussian
 
 __all__ = ["LatentFeatures", "Cartesian", "CoulombMatrix", "Gaussian"]
diff --git a/ml4chem/features/base.py b/ml4chem/features/base.py
@@ -65,7 +65,6 @@ def restack_image(self, index, image, scaled_feature_space, svm):
 
         return hash, features
 
-    @dask.delayed
     def restack_atom(self, image_index, atom, scaled_feature_space):
         """Restack atoms to a raveled list to use with SVM