In [1]:
import qml 

In [2]:
from glob import glob
import numpy as np

In [3]:
NUCLEAR_CHARGE = {
    "H":1,
    "C":6,
    "O":8,
    "N":7,
    "F":9,
    "Cl":17,
    "S":16
}

In [4]:
def read_xyz(filename):
    with open(filename, "r") as f:
        lines = f.readlines()

    natoms = int(lines[0])
    nuclear_charges = []
    coordinates = []

    for i, line in enumerate(lines[2:natoms+2]):
        tokens = line.split()

        if len(tokens) < 4:
            break
        
        ncharge = tokens[0]
        if ncharge != 'H':
            nuclear_charges.append(NUCLEAR_CHARGE[tokens[0]])
            coordinates.append([float(token) for token in tokens[1:4]])
   
    return nuclear_charges, coordinates

In [5]:
qm7_files = sorted(glob("../qm7-xyz/*.xyz"))

In [6]:
conf_data = [read_xyz(x) for x in qm7_files]

In [7]:
ncharges_list, coords_list = zip(*conf_data)

In [8]:
elements = [6, 7, 8, 16]

In [9]:
qm7_reps = [np.sum(np.array(qml.representations.generate_fchl_acsf(
                                                ncharges_list[i],
                                                coords_list[i],
                                                elements=elements)),axis=0) for i in 
       range(len(ncharges_list))]

In [10]:
qm7_reps = np.array(qm7_reps)

In [11]:
qm7_reps[0].shape

(496,)

In [12]:
qm7_labels = [t.split("/")[-1].split(".xyz")[0] for t in qm7_files]

In [34]:
# np save 

In [13]:
np.savez("../representations/qm7_FCHL_global_data.npz", 
         qm7_labels=qm7_labels,
         qm7_ncharges=ncharges_list,
         qm7_reps=qm7_reps)

  return array(a, dtype, copy=False, order=order, subok=True)
