In [None]:
!pip install qml --user -U

In [None]:
import qml
from glob import glob
import os
import pickle
import sys
import numpy as np
import copy
import scipy
import scipy.io
from sklearn.decomposition import PCA, TruncatedSVD

from tqdm.notebook import tqdm
np.random.seed(123)

In [None]:
if not os.path.exists('qm7.mat'):
    os.system('wget http://www.quantum-machine.org/data/qm7.mat')
dataset = scipy.io.loadmat('qm7.mat')

In [None]:
X = dataset['X']
T = dataset['T']
R = dataset['R']
Z = dataset['Z']

In [None]:
class Mol:
    def __init__(self, coordinates, nuclear_charges):
        self.coordinates = coordinates
        self.nuclear_charges = nuclear_charges
        self.remove_zero()

    def remove_zero(self):
        index = np.nonzero(self.nuclear_charges)
        if len(index) == 0:
            print(self.nuclear_charges)
        self.coordinates = self.coordinates[index, :]
        self.nuclear_charges = self.nuclear_charges[index]

In [None]:
mols = []

for i in range(X.shape[0]):
    r = R[i]
    z = Z[i]
    mols.append(Mol(r, z))

In [None]:
mbtypes = qml.representations.get_slatm_mbtypes([mol.nuclear_charges for mol in mols])

In [None]:
reps = []
for idx, mol in tqdm(enumerate(mols), total = len(mols)):
    aslatm = np.array(qml.representations.generate_slatm(mol.coordinates, mol.nuclear_charges,
                                          mbtypes, local=True))
    reps.append(aslatm)

In [None]:
reduced_feats = []
for idx, feat in tqdm(enumerate(reps), total = len(reps)):
    pca = PCA()
    feat = pca.fit_transform(feat)
    reduced_feats.append(feat)

In [None]:
max_len = 23

In [None]:
for i in range(len(reduced_feats)):
    new_feat = np.zeros((reduced_feats[i].shape[0], max_len))
    new_feat[:, : reduced_feats[i].shape[1]] = reduced_feats[i] 
    reduced_feats[i] = new_feat

In [None]:
np.save("feat.npy", reduced_feats, allow_pickle = True)