# SOAP example

To install rascal:
(NOTE: See the top-level README for the most up-to-date installation instructions.)
+ mkdir ../build 
+ cd build
+ cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTS=ON ..
+ make -j 4
+ make install

TODO: Compare against an existing SOAP implementation in Python (e.g. quippy)

In [1]:
%env OMP_NUM_THREADS=1

env: OMP_NUM_THREADS=1


In [2]:
%matplotlib inline
from matplotlib import pyplot as plt

In [3]:
import os, sys
from ase.io import read
sys.path.insert(0,"../build/")

In [4]:
import rascal
import json

In [None]:
import ase
#from ase.io import read
from ase.build import make_supercell
from ase.visualize import view
import numpy as np
import sys

In [None]:
from multiprocessing.dummy import Pool as ThreadPool
import queue
from copy import deepcopy,copy

In [None]:
import json

# from rascal.neighbourlist import get_neighbourlist
# from rascal.lib import RepresentationManager, FeatureManager
# from rascal.representation.base import RepresentationFactory
from rascal.representation import SphericalExpansion
# from rascal.neighbourlist.base import NeighbourListFactory


In [None]:
def setup_neighbourlist(options):
    names = []
    args = []
    full_name = []
    for opt in options:
        full_name.insert(0, opt['name'])
        name = '_'.join(full_name)
        names.append(name)
        args.append(opt['args'])

    managers = [NeighbourListFactory(names[0], *args[0])]
    for name, arg in zip(names[1:], args[1:]):
        manager = NeighbourListFactory(name, managers[-1], *arg)
        managers.append(manager)
    manager = managers[-1]
    return managers[-1]

In [None]:
class SphericalExpansion_test(object):
    def __init__(self, interaction_cutoff, cutoff_smooth_width,
                 max_radial, max_angular, gaussian_sigma_type,
                 gaussian_sigma_constant=0., n_species=1,n_thread=5):
        
        self.name = 'sphericalexpansion'
        self.hypers = dict()
        self.update_hyperparameters(
            interaction_cutoff=interaction_cutoff,
            cutoff_smooth_width=cutoff_smooth_width,
            max_radial=max_radial, max_angular=max_angular,
            gaussian_sigma_type=gaussian_sigma_type,
            gaussian_sigma_constant=gaussian_sigma_constant,
            n_species=n_species)

        self.nl_options = [
            dict(name='centers', args=[]),
            dict(name='neighbourlist', args=[interaction_cutoff]),
            dict(name='strict', args=[interaction_cutoff])
        ]

        neighbourlist_full_name = get_neighbourlist_full_name(self.nl_options)
        self.name = self.name + '_' + neighbourlist_full_name
        
        self.n_thread = n_thread
        
    def update_hyperparameters(self, **hypers):
        
        allowed_keys = {'interaction_cutoff', 'cutoff_smooth_width',
                        'max_radial', 'max_angular', 'gaussian_sigma_type',
                        'gaussian_sigma_constant', 'n_species'}
        hypers_clean = {key: hypers[key] for key in hypers
                        if key in allowed_keys}
        self.hypers.update(hypers_clean)
        

    def transform(self, frames):
        from copy import deepcopy
        
        
        n_frames = len(frames)
        # managers = nl_pool.starmap(get_neighbourlist,[(frame,self.nl_options) for frame in frames])
        # nl_pool.close()
        # nl_pool.join()
        # managers = list(map(get_neighbourlist, frames,
        #                     [self.nl_options]*n_frames))
        hypers_str = json.dumps(self.hypers)
        n_features = self.get_num_coefficients()
        # features = FeatureManager.Dense_double(n_features, hypers_str)
        features = []
        global managers, representations
        managers = [setup_neighbourlist(self.nl_options) for _ in range(self.n_thread)]
        representations = [RepresentationFactory(self.name,manager,hypers_str) for manager in managers]
        

        
        q = queue.Queue()
        pool = ThreadPool(self.n_thread)
        for ii in range(self.n_thread):
            q.put(ii)
            
        structures = [(unpack_ase(frame),q,n_features, hypers_str) for frame in frames]
        cms = pool.starmap(compute_wrapper,structures)
        for cm in cms:
            features.append(cm)
        
        features = np.concatenate(features,axis=0)
        
        pool.close()
        pool.join()
        # cms = map(RepresentationFactory, [self.name]*n_frames,
        #           managers, [hypers_str, ] * n_frames)
        
        return features

    def get_num_coefficients(self):
        """Return the number of coefficients in the spherical expansion

        (this is the descriptor size per atomic centre)

        """
        return (self.hypers['n_species'] * self.hypers['max_radial']
                * (self.hypers['max_angular'] + 1)**2)


def compute_wrapper(structure, q,n_features, hypers_str ):
    # print('Id:{} starts'.format(os.getgid()))
    idx = q.get()
    features = FeatureManager.Dense_double(n_features, hypers_str)
    managers[idx].update(**structure)
    representations[idx].compute()
    features.append(representations[idx])
    q.put(idx)
    # print('Id:{} ends'.format(os.getgid()))
    return features.get_feature_matrix().T


In [None]:
frames = ase.io.read('../tests/reference_data/dft-smiles_500.xyz',index=':10')
new_frames = []
aa = []
for frame in frames:
    new_frames.append(make_supercell(frame,np.eye(3)*1))
print(np.max(list(map(len,new_frames))))

21


In [None]:
test_hypers = {"interaction_cutoff": 6.0, "cutoff_smooth_width": 1.0, "max_radial": 10, "max_angular": 10, "gaussian_sigma_type": "Constant", "gaussian_sigma_constant": 0.5}

In [None]:
sph_expn = SphericalExpansion(**test_hypers)
# sph_expn_test = SphericalExpansion_test(n_thread=5,**test_hypers)

In [None]:
# X_test = sph_expn_test.transform(new_frames)
expansions = sph_expn.transform(new_frames)
X = expansions.get_feature_matrix().T
# np.allclose(X,X_test)

In [None]:
np.where(X > 1e12)

In [47]:
new_frames[0].get_positions()

array([[6.98317171, 8.30586308, 7.12062128],
       [6.93583192, 6.96431033, 7.0455309 ],
       [5.87723484, 6.1931998 , 7.03875667],
       [6.46803413, 4.97494009, 7.00081846],
       [7.77060465, 4.92298065, 6.98009097],
       [8.10112648, 6.2922714 , 7.01042973],
       [5.68535004, 3.7303275 , 6.97814458],
       [4.47376713, 3.85444197, 6.98233375],
       [6.31750187, 2.68820005, 6.95641409],
       [7.84655425, 8.77185729, 6.8970139 ],
       [6.12347267, 8.80379422, 6.96024197]])

In [26]:
%timeit -n 2 -r 1 expansions = sph_expn_test.transform(new_frames)

2.51 s ± 0 ns per loop (mean ± std. dev. of 1 run, 2 loops each)


In [27]:
%timeit -n 2 -r 1 expansions = sph_expn.transform(new_frames)

9.11 s ± 0 ns per loop (mean ± std. dev. of 1 run, 2 loops each)


In [None]:
%timeit -n 2 -r 1 expansions = sph_expn_test.transform(new_frames)

In [17]:
expansions.get_feature_matrix()

array([[ 1.45614444e-01,  8.34125492e-02,  8.34121180e-02,
         8.34118823e-02,  8.34129731e-02],
       [ 1.51520440e-01,  8.56712202e-02,  8.56707820e-02,
         8.56705343e-02,  8.56716713e-02],
       [ 1.15017638e-01,  6.47833515e-02,  6.47830269e-02,
         6.47828393e-02,  6.47836962e-02],
       ...,
       [ 1.33473428e-09,  2.00386882e-08, -1.36507936e-02,
        -9.58668851e-03,  2.32344582e-02],
       [ 4.83086450e-10,  2.04702850e-08, -1.19357179e-02,
        -8.38226947e-03,  2.03149496e-02],
       [ 1.13720106e-10,  1.24756641e-08, -6.45847379e-03,
        -4.53570756e-03,  1.09923510e-02]])

Good, should be 5 centres (5 atoms) * 10 radial * 81 ${}=(l+1)^2$

In [None]:
# load a small subset of structures from QM9
frames = read(p+'tests/reference_data/dft-smiles_500.xyz',':')
cutoff = 3.

In [None]:
# Compute the sorted coulomb matrices for the list of structures
#%timeit -n 20 -r 5 features = rep.transform(frames)

In [None]:
# Compare with reference
#%timeit -n 20 -r 5 ref = get_coulomb_refs(frames,rep.size,cutoff,flavour='row-norm')
#np.allclose(test,ref)