In [1]:
from aglaia import aglaia
import qml
import os
import numpy as np
import joblib

  from ._conv import register_converters as _register_converters


In [2]:
def list_files(dir, key):
    """
    This function walks through a directory and makes a list of the files that have a name containing a particular string
    :dir: path to the directory to explore
    :key: string to look for in file names
    :return: list of files containing "key" in their filename
    """

    r = []  # List of files to be joined together
    subdirs = [x[0] for x in os.walk(dir)]
    for subdir in subdirs:
        files = next(os.walk(subdir))[2]

        for file in files:
            isTrajectory = file.find(key)
            if isTrajectory >= 0:
                r.append(subdir + "/" + file)
    return r

In [4]:
# Making a list of all the compounds
filenames = list_files("/Volumes/Transcend/data_sets/vr_ccsd", ".xyz")
compounds = []
for file in filenames:
    compound = qml.Compound(xyz=file)
    compounds.append(compound)

In [5]:
mbtypes = qml.representations.get_slatm_mbtypes([mol.nuclear_charges for mol in compounds])

In [6]:
list_descriptors = []
max_n_atoms = 0
for compound in compounds:
    compound.generate_slatm(mbtypes, local=True, dgrids=[0.06, 0.06])
    descriptor = compound.representation
    if max_n_atoms < descriptor.shape[0]:
        max_n_atoms = descriptor.shape[0]
    list_descriptors.append(descriptor)
print(len(list_descriptors))
print(max_n_atoms)

17864
7


In [7]:
n_samples = len(list_descriptors)
n_features = list_descriptors[0].shape[1]
print(n_samples, n_features)

17864 1245


In [8]:
padded_descriptors = np.zeros((n_samples, max_n_atoms, n_features))
for i, item in enumerate(list_descriptors):
    padded_descriptors[i, :item.shape[0], :] = item
padded_descriptors.shape

(17864, 7, 1245)

In [9]:
energies = np.loadtxt("/Volumes/Transcend/data_sets/vr_ccsd/properties.txt", usecols=1)
energies.shape

(17864,)

In [11]:
zs = np.zeros((n_samples, max_n_atoms))
for i, mol in enumerate(compounds):
    zs[i, :mol.nuclear_charges.shape[0]] =  mol.nuclear_charges
zs.shape

(17864, 7)