# Compute the atom centered Coulomb Matrix representation

To install QML library and compare rascal results with it
+ pip install qml ase

To install rascal:
+ mkdir ../build 
+ cd build
+ cmake -DCMAKE_BUILD_TYPE=Release ..
+ make install -j 4

In [6]:
%env OMP_NUM_THREADS=1

env: OMP_NUM_THREADS=1


In [7]:
%matplotlib inline
from matplotlib import pyplot as plt

In [13]:
import qml
from ase.io import read
from ase.visualize import view
import ase
import numpy as np
import sys, os
from copy import copy
from ase.build import make_supercell

In [14]:
import sys, os
sys.path.insert(0,'../build/')
import rascal as rc
from rascal.representations import SphericalInvariant
from rascal.models import Kernel

In [15]:
from rascal.lib import NeighbourList
from rascal.neighbourlist.structure_manager import convert_to_structure_list
from rascal.neighbourlist.base import StructureCollectionFactory

In [16]:
cutoff = 3
nl_options = [
            dict(name='centers', args=dict()),
            dict(name='neighbourlist', args=dict(cutoff=cutoff)),
            dict(name='strict', args=dict(cutoff=cutoff))
        ]
aa = StructureCollectionFactory(nl_options)
aa

<rascal.lib._rascal.NeighbourList.ManagerCollection_Strict_NeighbourList_Centers at 0x7f43d33ec3e8>

In [17]:
print(aa.get_parameters())

[
  {
    "initialization_arguments": {
      "cutoff": 3
    },
    "name": "neighbourlist"
  },
  {
    "initialization_arguments": {
      "cutoff": 3
    },
    "name": "strict"
  }
]


In [18]:
aa.add_structures('../tests/reference_data/dft-smiles_500.ubjson', 0,20)

In [19]:
frames = read('../tests/reference_data/dft-smiles_500.xyz',':10')
structures = convert_to_structure_list(frames)

In [20]:
aa.add_structures(structures)

In [21]:
view(frames)

In [21]:
rep = SphericalInvariant(interaction_cutoff=3, cutoff_smooth_width=0.5,
                 max_radial=6, max_angular=6, gaussian_sigma_type='Constant',
                 gaussian_sigma_constant=0.4)

In [22]:
ee = rep.transform(frames)
len(ee)

10

In [24]:
Kernel?

In [31]:
import ubjson
with open('../'+"tests/reference_data/kernel_reference.ubjson",'rb') as f:
    data = ubjson.load(f)
data['rep_info']['spherical_invariants'][0]

[{'kernel_matrix': [[0.529983827541185,
    0.3696433751038865,
    0.37875916154245304,
    0.4061923213685353,
    0.3791555142155063],
   [0.3696433751038865,
    0.76306623623047,
    0.7391219549564123,
    0.7019336072288856,
    0.7039398406743274],
   [0.37875916154245304,
    0.739121954956412,
    0.7538498686500099,
    0.6931319247321315,
    0.7111996728711326],
   [0.4061923213685351,
    0.701933607228885,
    0.693131924732132,
    0.6673470869856871,
    0.6784774294820263],
   [0.3791555142155063,
    0.7039398406743272,
    0.7111996728711317,
    0.6784774294820265,
    0.7786376202808231]],
  'hypers_rep': {'max_radial': 6,
   'max_angular': 0,
   'n_species': 1,
   'soap_type': 'RadialSpectrum',
   'normalize': True,
   'cutoff_function': {'type': 'Cosine',
    'cutoff': {'value': 3.5, 'unit': 'A'},
    'smooth_width': {'value': 0.5, 'unit': 'A'}},
   'gaussian_density': {'type': 'Constant',
    'gaussian_sigma': {'value': 0.5, 'unit': 'A'}},
   'radial_contributi

In [25]:
aa = 'SADF'
aa.lower()

'sadf'

In [23]:
kernel = Kernel(rep, zeta=2)
kernel(ee)

array([[0.20215704, 0.07144007, 0.07137627, 0.0893289 , 0.0701958 ,
        0.09587878, 0.08068481, 0.06840189, 0.06463663, 0.06607975],
       [0.07144007, 0.36635113, 0.33585565, 0.27939179, 0.28290875,
        0.27815235, 0.309234  , 0.33704307, 0.33964739, 0.35948015],
       [0.07137627, 0.33585565, 0.3649611 , 0.25795098, 0.27918284,
        0.25565276, 0.29993178, 0.35284034, 0.33284948, 0.34314555],
       [0.0893289 , 0.27939179, 0.25795098, 0.2482194 , 0.23830768,
        0.24513184, 0.24674855, 0.25922117, 0.26277728, 0.27774887],
       [0.0701958 , 0.28290875, 0.27918284, 0.23830768, 0.35122181,
        0.21506458, 0.25148475, 0.30149473, 0.28314907, 0.31387769],
       [0.09587878, 0.27815235, 0.25565276, 0.24513184, 0.21506458,
        0.28735338, 0.26221447, 0.25023787, 0.26734011, 0.28274654],
       [0.08068481, 0.309234  , 0.29993178, 0.24674855, 0.25148475,
        0.26221447, 0.31290544, 0.31188068, 0.31651137, 0.32808109],
       [0.06840189, 0.33704307, 0.3528403

In [9]:
ll = NeighbourList.AtomicStructureList()
ll.append(**structures[0])
ll.append(**structures[0])

In [10]:
for l in ll:
    print(l.get_positions())

[[6.98317171 6.93583192 5.87723484 6.46803413 7.77060465 8.10112648
  5.68535004 4.47376713 6.31750187 7.84655425 6.12347267]
 [8.30586308 6.96431033 6.1931998  4.97494009 4.92298065 6.2922714
  3.7303275  3.85444197 2.68820005 8.77185729 8.80379422]
 [7.12062128 7.0455309  7.03875667 7.00081846 6.98009097 7.01042973
  6.97814458 6.98233375 6.95641409 6.8970139  6.96024197]]


In [8]:
len(ll)

1

In [6]:
structures[0]

{'cell': array([[14.,  0.,  0.],
        [ 0., 14.,  0.],
        [ 0.,  0., 14.]]),
 'positions': array([[6.98317171, 6.93583192, 5.87723484, 6.46803413, 7.77060465,
         8.10112648, 5.68535004, 4.47376713, 6.31750187, 7.84655425,
         6.12347267],
        [8.30586308, 6.96431033, 6.1931998 , 4.97494009, 4.92298065,
         6.2922714 , 3.7303275 , 3.85444197, 2.68820005, 8.77185729,
         8.80379422],
        [7.12062128, 7.0455309 , 7.03875667, 7.00081846, 6.98009097,
         7.01042973, 6.97814458, 6.98233375, 6.95641409, 6.8970139 ,
         6.96024197]]),
 'atom_types': array([[7],
        [6],
        [7],
        [6],
        [7],
        [8],
        [7],
        [8],
        [8],
        [1],
        [1]]),
 'pbc': array([[0],
        [0],
        [0]])}

In [9]:
Kernel

rascal.models.kernels.Kernel

In [11]:
rc.lib.NeighbourList

AttributeError: module 'rascal' has no attribute 'lib'

In [1]:
import json,ubjson

In [6]:
with open("../tests/reference_data/dft-smiles_500.json", 'r') as f:
    data = json.load(f)
with open("../tests/reference_data/dft-smiles_500.ubjson", 'wb') as f:
    ubjson.dump(data,f)

In [9]:
!ls -la  ../tests/reference_data/

total 18572
drwxr-xr-x 2 musil cosmo    4096 giu 14 16:46 .
drwxr-xr-x 4 musil cosmo    4096 giu 14 14:20 ..
-rw-r--r-- 1 musil cosmo    5016 apr 15 14:42 behler_parinello_pair_hypers.json
-rw-r--r-- 1 musil cosmo    1174 feb 19 14:19 CaCrP2O7_mvc-11955_symmetrized.cif
-rw-r--r-- 1 musil cosmo    2447 giu 14 14:06 CaCrP2O7_mvc-11955_symmetrized.json
-rw-r--r-- 1 musil cosmo  499992 giu 14 16:28 dft-smiles_500.json
-rw-r--r-- 1 musil cosmo  381444 giu 14 16:47 dft-smiles_500.ubjson
-rw-r--r-- 1 musil cosmo  545257 feb 19 14:19 dft-smiles_500.xyz
-rw-r--r-- 1 musil cosmo  288802 giu  6 10:12 hyp1f1_reference.ubjson
-rw-r--r-- 1 musil cosmo     451 apr 12 10:51 methane.json
-rw-r--r-- 1 musil cosmo     232 mag  9 19:17 methane.xyz
-rw-r--r-- 1 musil cosmo   16738 giu 14 14:06 molecular_crystal.json
-rw-r--r-- 1 musil cosmo     349 giu 14 14:06 simple_cubic_3.json
-rw-r--r-- 1 musil cosmo     433 giu 14 14:06 simple_cubic_8.json
-rw-r--r-- 1 musil cosmo    1276 giu 14 14:06 

In [5]:
json.dump?

In [4]:
ubjson.dump?

In [None]:
# load a small subset of structures from QM9
frames = read('../tests/reference_data/dft-smiles_500.xyz',':')
cutoff = 3.
rep = SortedCoulombMatrix(cutoff,sorting_algorithm='row_norm')

In [None]:
new_frames = []
aa = []
for frame in frames:
    new_frames.append(make_supercell(frame,np.eye(3)*1))
print(np.max(list(map(len,new_frames))))

In [None]:
# have a look at them
view(frames)

In [None]:
# Compute the sorted coulomb matrices for the list of structures
%timeit -n 3 -r 3 features = rep.transform(new_frames)

In [None]:
# extract the feature matrix to compare with 
features = rep.transform(new_frames)
test = features.get_feature_matrix().T

In [31]:
import json
def load_json(fn):
    with open(fn,'r') as f:
        data = json.load(f)
    return data[str(data['ids'][0])]
def json2ase(f):
    return ase.Atoms(**{v:f[k] for k,v in
dict(positions='positions',atom_types='numbers',pbc='pbc',cell='cell').items()
})

# To get the coulomb matrices from QML
def get_coulomb_ref(frame,size,cutoff,flavour):
    from qml.representations import generate_atomic_coulomb_matrix
    
    nuclear_charges = frame.get_atomic_numbers()
    coordinates = frame.get_positions()
    
    
    cm = generate_atomic_coulomb_matrix(nuclear_charges, coordinates, size = size, sorting = flavour,
    central_cutoff = cutoff, central_decay = -1, interaction_cutoff = 1e6, interaction_decay = -1,indices = None)
    
    return cm
def get_coulomb_refs(frames,size,cutoff,flavour='distance'):  
    cms = []
    for frame in frames:
        cms.append(get_coulomb_ref(frame,size,cutoff,flavour))
    return np.vstack(cms)

In [32]:
frames = [json2ase(load_json('../tests/reference_data/CaCrP2O7_mvc-11955_symmetrized.json'))]

In [33]:
from scipy.spatial.distance import pdist, cdist, squareform

In [34]:
ee = lambda x: x *(x+1)/2
ee(5)


15.0

In [35]:
# Compare with reference
cutoff = 3
size = 10
ref = get_coulomb_refs(frames,
                       size,cutoff,flavour='row-norm')
# ref = get_coulomb_refs(new_frames,rep.size,cutoff,flavour='row-norm')
# np.allclose(test,ref)
ref[0]

array([662.89080347,  68.65083598,  73.51669472,  66.46023916,
        22.54405928,  73.51669472,  65.3230041 ,  16.61846506,
        13.46061234,  73.51669472,  60.74626661,  17.35297313,
        21.70425427,  18.59569351,  73.51669472,   0.        ,
         0.        ,   0.        ,   0.        ,   0.        ,
         0.        ,   0.        ,   0.        ,   0.        ,
         0.        ,   0.        ,   0.        ,   0.        ,
         0.        ,   0.        ,   0.        ,   0.        ,
         0.        ,   0.        ,   0.        ,   0.        ,
         0.        ,   0.        ,   0.        ,   0.        ,
         0.        ,   0.        ,   0.        ,   0.        ,
         0.        ,   0.        ,   0.        ,   0.        ,
         0.        ,   0.        ,   0.        ,   0.        ,
         0.        ,   0.        ,   0.        ])

In [38]:
ff = frames[0]
pos = ff.get_positions()
dd = squareform(pdist(pos))
ids = np.where(dd[0] <  3.1)[0]
ids

array([ 0, 10, 17, 18, 19])

In [39]:
dd[0, ids]

array([0.        , 2.44936684, 2.63390672, 2.3306344 , 2.40745447])

In [23]:
dd[0,dd[0]<3.1]

array([0.        , 2.44936684, 2.63390672, 2.3306344 , 2.40745447])

In [41]:
dd[np.ix_(ids,ids)]

array([[0.        , 2.44936684, 2.63390672, 2.3306344 , 2.40745447],
       [2.44936684, 0.        , 3.44165707, 3.85113786, 4.75461282],
       [2.63390672, 3.44165707, 0.        , 3.6881288 , 2.94873066],
       [2.3306344 , 3.85113786, 3.6881288 , 0.        , 2.83888537],
       [2.40745447, 4.75461282, 2.94873066, 2.83888537, 0.        ]])

In [40]:
pos

array([[3.68954016, 5.03568186, 4.64369552],
       [5.12301681, 2.13482791, 2.66220405],
       [1.99411973, 0.94691001, 1.25068234],
       [6.81843724, 6.22359976, 6.05521724],
       [2.63005662, 4.16863452, 0.86090529],
       [6.18250036, 3.00187525, 6.44499428],
       [2.11497733, 1.98032773, 4.53610884],
       [6.69757964, 5.19018203, 2.76979073],
       [1.39215545, 2.94386142, 5.60917746],
       [7.42040152, 4.22664834, 1.69672212],
       [2.43224207, 5.4571615 , 6.70305327],
       [6.3803149 , 1.71334827, 0.6028463 ],
       [1.11265639, 1.50166318, 3.48760997],
       [7.69990058, 5.66884659, 3.8182896 ],
       [3.56971588, 5.20836551, 1.43673437],
       [5.2428411 , 1.96214426, 5.8691652 ],
       [3.12282634, 2.72812741, 1.05450432],
       [5.68973063, 4.44238236, 6.25139525],
       [3.24868468, 2.83997522, 3.99842386],
       [5.56387229, 4.33053455, 3.30747571],
       [2.60835346, 0.74421609, 5.3236629 ],
       [6.20420351, 6.42629368, 1.98223667]])

In [None]:
from rascal.utils import fps

In [None]:
result = fps(test,n_select=500,starting_index=None,method='simple')

In [None]:
plt.plot(result['fps_minmax_d2'])