To install rascal:
(NOTE: See the top-level README for the most up-to-date installation instructions.)
+ mkdir ../build 
+ cd build
+ cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTS=ON ..
+ make -j 4
+ make install

In [None]:
!export OMP_NUM_THREADS=1
from mkl import set_num_threads
set_num_threads(1)

In [None]:
%matplotlib inline
from matplotlib import pylab as plt

import os, sys
from ase.io import read
sys.path.insert(0,"../build/")

import sys
import time
import rascal
import json

import ase
from ase.io import read, write
from ase.build import make_supercell
from ase.visualize import view
import numpy as np
import sys

import json

from rascal.representations import SphericalInvariants
from rascal.models import Kernel
from rascal.utils import CURFilter

# test computing sparse kernel gradient with finite differences

In [None]:
# Load the small molecules 
frames = read('../reference_data/inputs/small_molecules-1000.xyz',':100')

In [None]:
hypers = dict(soap_type="PowerSpectrum",
              interaction_cutoff=3.5, 
              max_radial=3, 
              max_angular=3, 
              gaussian_sigma_constant=0.4,
              gaussian_sigma_type="Constant",
              cutoff_smooth_width=0.5,
              normalize=False,
              radial_basis="GTO",
              compute_gradients=False,
              expansion_by_species_method='structure wise',
              )
soap = SphericalInvariants(**hypers)

managers = soap.transform(frames)

# Select pseudo input with CUR decomposition
n_pseudo = {1:10,6:10,8:10,7:10}

compressor = CURFilter(soap, n_pseudo, act_on='sample per specie')

X_pseudo = compressor.fit_transform(managers)

In [None]:
hypers['compute_gradients'] =  True
soap = SphericalInvariants(**hypers)

kernel = Kernel(soap,name='GAP', zeta=1, target_type='Structure', kernel_type='Sparse')

In [None]:
# compute the reference
managers = soap.transform([frames[0]])
KNM_ref = kernel(managers, X_pseudo, compute_gradients=(True, False))

In [None]:
# test for one row
def update_frame(frame, i_at, disp):
    ff = frame.copy()
    ff.positions[i_at] += np.asarray(disp)
    return ff

def compute_displaced_kernel(kernel, rep, frame, i_atom, disp, X_pseudo):
    frame_d = update_frame(frame, i_atom, disp)
    managers = rep.transform([frame_d])
    KNM_d = kernel(managers, X_pseudo, compute_gradients=(False, False))
    return KNM_d


frame = frames[0]
eps = 1e-5
i_atom = 0
disps = eps*np.array([[1,0,0],[-1,0,0],[0,1,0],[0,-1,0],[0,0,1],[0,0,-1],])
kernel = Kernel(soap,name='GAP', zeta=1, target_type='Atom', kernel_type='Sparse')

KNM_p = compute_displaced_kernel(kernel, soap, frame, i_atom, disps[0],X_pseudo)
KNM_m = compute_displaced_kernel(kernel, soap, frame, i_atom, disps[1],X_pseudo)
KNM_r_x = ((KNM_p - KNM_m) / (2*eps)).sum(axis=0)
np.allclose(KNM_r_x,KNM_ref[0])

In [None]:
# test for a whole structure
def compute_numerical_kernel_gradient(kernel, representation, frame, X_pseudo, eps=1e-5):
    disps = eps*np.array([[[1,0,0],[-1,0,0]],[[0,1,0],[0,-1,0]],[[0,0,1],[0,0,-1]]])
    KNM_der = np.zeros((len(frame)*3, X_pseudo.size()))

    for i_atom in range(len(frame)):
        for i_der, disp in enumerate(disps):
            KNM_p = compute_displaced_kernel(kernel, representation, frame, i_atom, disp[0], X_pseudo)
            KNM_m = compute_displaced_kernel(kernel, representation, frame, i_atom, disp[1], X_pseudo)
            KNM_der[i_atom*3+i_der] = ((KNM_p - KNM_m) / (2*eps)).sum(axis=0)
    return KNM_der

KNM_test = compute_numerical_kernel_gradient(kernel, soap, frame, X_pseudo)
np.allclose(KNM_test,KNM_ref)

In [None]:
np.vstack([KNM_test,KNM_ref]).shape,KNM_ref.shape

# test the rascal infrastructure

In [None]:
# Load the small molecules 
frames = read('../reference_data/inputs/small_molecules-1000.xyz',':100')

In [None]:
hypers = dict(soap_type="PowerSpectrum",
              interaction_cutoff=3.5, 
              max_radial=3, 
              max_angular=3, 
              gaussian_sigma_constant=0.4,
              gaussian_sigma_type="Constant",
              cutoff_smooth_width=0.5,
              normalize=False,
              radial_basis="GTO",
              compute_gradients=False,
              expansion_by_species_method='structure wise',
              )
soap = SphericalInvariants(**hypers)

managers = soap.transform(frames)

# Select pseudo input with CUR decomposition
n_pseudo = {1:10,6:10,8:10,7:10}

compressor = CURFilter(soap, n_pseudo, act_on='sample per specie')

X_pseudo = compressor.fit_transform(managers)

In [None]:
hypers['compute_gradients'] =  True
soap = SphericalInvariants(**hypers)

kernel = Kernel(soap, name='GAP', zeta=1, target_type='Structure', kernel_type='Sparse')

In [None]:
%%time
# compute the reference
managers = soap.transform(frames)
KNM_ref = kernel(managers, X_pseudo, compute_gradients=(False, False))

In [None]:
%%time
# compute the reference
managers = soap.transform(frames)
KNM_ref = kernel(managers, X_pseudo, compute_gradients=(True, False))

In [None]:
%%time
KNM_test = kernel(frames, X_pseudo, compute_gradients=(True, False), numerical_gradients=True)
np.allclose(KNM_test,KNM_ref)

In [None]:
# the numerical gradients are quite slow to compute so here is a way to speed it up
# the kernel used has to be already in the namespace and in variable 'kernel'
from concurrent.futures import as_completed, ProcessPoolExecutor, ThreadPoolExecutor
from tqdm.notebook import tqdm

def get_strides(frames):
    Nstructures = len(frames)
    Ngrad_stride = [0]
    Ngrads = 0
    for frame in frames:
        n_at = len(frame)
        Ngrad_stride.append(n_at*3)
        Ngrads += n_at*3
    Ngrad_stride = np.cumsum(Ngrad_stride)
    return Nstructures,Ngrads,Ngrad_stride

def compute_grad(i_frame, frame):
    feat = kernel._rep.transform([frame])
    grad_rows = kernel(feat, X_pseudo, grad=(True, False))
    return grad_rows

def compute_grad_numerical(i_frame,frame):
    grad_rows = kernel([frame], X_pseudo, grad=(True, False), numerical_grad=True)
    return grad_rows

def compute_KNM(frames,max_workers=20, numerical_gradients=False):    
    Nstructures,Ngrads,Ngrad_stride = get_strides(frames)
    KNM = np.zeros((Ngrads, X_pseudo.size()))
    pbar = tqdm(frames,desc='kernel',leave=False)
    with ProcessPoolExecutor(max_workers=max_workers) as executor:
        if not numerical_grad:
            future_to_compute = {executor.submit(compute_grad,i_frame, frame):i_frame
                                                     for i_frame,frame in enumerate(frames)}
        else:
            future_to_compute = {executor.submit(compute_grad_numerical,i_frame, frame):i_frame
                                                     for i_frame,frame in enumerate(frames)}
        for future in as_completed(future_to_compute):
            i_frame = future_to_compute[future]
            grad_rows = future.result()
            KNM[Ngrad_stride[i_frame]:Ngrad_stride[i_frame+1]] = grad_rows
            pbar.update()
    return KNM


In [None]:
%%time
kk = compute_KNM(frames, max_workers=1, numerical_gradients=False)
np.allclose(kk,KNM_ref)

In [None]:
%%time
kk = compute_KNM(frames, max_workers=4, numerical_gradients=True)
np.allclose(kk,KNM_ref)

# Compute derivatives with the bispectrum

In [None]:
# Load the small molecules 
frames = read('../reference_data/inputs/small_molecules-1000.xyz',':100')

In [None]:
hypers = dict(soap_type="BiSpectrum",
              interaction_cutoff=3.5, 
              max_radial=2, 
              max_angular=2, 
              gaussian_sigma_constant=0.5,
              gaussian_sigma_type="Constant",
              cutoff_smooth_width=0.5,
              normalize=False,
              radial_basis="GTO",
              compute_gradients=False,
              expansion_by_species_method='structure wise',
              )
soap = SphericalInvariants(**hypers)

managers = soap.transform(frames)

# Select pseudo input with CUR decomposition
n_pseudo = {1:10,6:10,8:10,7:10}

compressor = CURFilter(soap, n_pseudo, act_on='sample per specie')

X_pseudo = compressor.fit_transform(managers)

In [None]:
hypers['compute_gradients'] =  False
soap = SphericalInvariants(**hypers)

kernel = Kernel(soap, name='GAP', zeta=1, target_type='Structure', kernel_type='Sparse')

In [None]:
%%time
KNM = kernel(frames, X_pseudo, compute_gradients=(True, False), numerical_gradients=True)