In [1]:
%load_ext autoreload
%autoreload 2
%load_ext line_profiler

In [2]:
import numpy as np
import json
from equistore import Labels, TensorBlock, TensorMap
from utils.builder import TensorBuilder
import ase.io
from itertools import product
from utils.acdc_mini import acdc_standardize_keys, cg_increment, cg_combine, _remove_suffix
from utils.clebsh_gordan import ClebschGordanReal
from utils.hamiltonians import hamiltonian_features
import matplotlib.pyplot as plt

from utils.librascal import  RascalSphericalExpansion, RascalPairExpansion
from rascal.representations import SphericalExpansion
import copy
from utils.model_hamiltonian import *
from itertools import product

In [3]:
from utils.mp_utils import * 

In [4]:
frames1 = ase.io.read("./data/hamiltonian/water-hamiltonian/water_coords_1000.xyz",":2")
# frames2 = ase.io.read("mp_equistore/data/hamiltonian/ethanol-hamiltonian/ethanol_4500.xyz",":1")
# frames3= [ase.build.molecule('NH3')]
frames = frames1#+frames2+frames3#+frames2
for f in frames:
    f.cell = [100,100,100]
    f.positions += 50

In [5]:
rascal_hypers = {
    "interaction_cutoff": 3.5,
    "cutoff_smooth_width": 0.3,
    "max_radial": 2,
    "max_angular": 2,
    "gaussian_sigma_type": "Constant",
    "compute_gradients":  False,
#     "expansion_by_species_method": "user defined",
#     "global_species": [1,6,8,7]
    
}
spex = RascalSphericalExpansion(rascal_hypers)
rhoi = spex.compute(frames)

In [6]:
cg = ClebschGordanReal(5)

In [7]:
pairs = RascalPairExpansion(rascal_hypers)
gij = pairs.compute(frames)

In [8]:
rho1i = acdc_standardize_keys(rhoi)
rho1i.keys_to_properties(['species_neighbor'])
gij =  acdc_standardize_keys(gij)

In [9]:
rhoii1i2_nu0 = cg_combine(gij, gij, clebsch_gordan=cg, other_keys_match=['species_center'])

In [10]:
rhoii1i2_nu0

TensorMap with 64 blocks
keys: ['order_nu' 'inversion_sigma' 'spherical_harmonics_l' 'species_center' 'species_neighbor_a' 'species_neighbor_b']
           2             1                    0                   1                 1                   1
           2             1                    0                   1                 1                   8
           2             1                    1                   1                 1                   1
        ...
           2             1                    4                   8                 8                   1
           2            -1                    3                   8                 8                   8
           2             1                    4                   8                 8                   8

In [11]:
print(rhoii1i2_nu0.keys[rhoii1i2_nu0.blocks_matching(species_center=8, species_neighbor_a = 8, species_neighbor_b=8)])
print(rhoii1i2_nu0.blocks_matching(species_center=8, species_neighbor_a = 8, species_neighbor_b=8))

[(2,  1, 0, 8, 8, 8) (2,  1, 1, 8, 8, 8) (2,  1, 2, 8, 8, 8)
 (2, -1, 1, 8, 8, 8) (2, -1, 2, 8, 8, 8) (2,  1, 3, 8, 8, 8)
 (2, -1, 3, 8, 8, 8) (2,  1, 4, 8, 8, 8)]
[19, 21, 23, 43, 46, 47, 62, 63]


In [12]:
rhoii1i2_nu0.block(19).samples

Labels([(0, 0, 0, 0), (1, 0, 0, 0)],
       dtype=[('structure', '<i4'), ('center', '<i4'), ('neighbor_1', '<i4'), ('neighbor_2', '<i4')])

In [14]:
rhoii1i2_nu1 =  cg_combine(rho1i, rhoii1i2_nu0, clebsch_gordan=cg, other_keys_match = ['species_center'])

ValueError: ('Requested CG entry ', (2, 4, 6), ' has not been precomputed')

## old

In [84]:
x_a = rho1i#
x_b = rhoii1i2_nu0 #gij#rho1i#
feature_names=None
clebsch_gordan=None
lcut=None
other_keys_match=['species_center']
mp=False

In [85]:
print(x_b.sample_names)
if any([x.find("neighbor") for x in x_b.sample_names]):
    print('h')

('structure', 'center', 'neighbor_1', 'neighbor_2')
h


In [86]:
any([x.find("neighbor") for x in x_b.sample_names])

True

In [95]:
# determines the cutoff in the new features
lmax_a = max(x_a.keys["spherical_harmonics_l"])
lmax_b = max(x_b.keys["spherical_harmonics_l"])
if lcut is None:
    lcut = lmax_a + lmax_b

clebsch_gordan = cg 

similar=True
if "neighbor" or "neighbor_1" in x_b.sample_names: #and "neighbor" not in x_a.sample_names:
    #similar only when combining two rho1i's (not rho1i with gij or |r_ij> with |r_ik>)
    similar = False

other_keys_a = tuple(name for name in x_a.keys.names if name not in ["spherical_harmonics_l", "order_nu", "inversion_sigma"] )
other_keys_b = tuple(name for name in x_b.keys.names if name not in ["spherical_harmonics_l", "order_nu", "inversion_sigma"] )
if mp: 

    if other_keys_match is None:
        OTHER_KEYS = [ k+"_a" for k in other_keys_a ] + [ k+"_b" for k in other_keys_b ]
    else:     
        OTHER_KEYS =  [ 
            k+("_a" if k in other_keys_b else "") for k in other_keys_a if k not in other_keys_match ] + [
            k+("_b" if k in other_keys_a else "") for k in other_keys_b if k not in other_keys_match ]  +other_keys_match    
else: 
    if other_keys_match is None:
        OTHER_KEYS = [ k+"_a" for k in other_keys_a ] + [ k+"_b" for k in other_keys_b ]
    else:     
        OTHER_KEYS = other_keys_match + [ 
            k+("_a" if k in other_keys_b else "") for k in other_keys_a if k not in other_keys_match ] + [
            k+("_b" if k in other_keys_a else "") for k in other_keys_b if k not in other_keys_match ]  

if x_a.block(0).has_gradient("positions"):
    grad_components = x_a.block(0).gradient("positions").components
else:
    grad_components = None

# automatic generation of the output features names
# "x1 x2 x3 ; x1 x2 -> x1_a x2_a x3_a k_nu x1_b x2_b l_nu"
if feature_names is None:
    NU = x_a.keys[0]["order_nu"] + x_b.keys[0]["order_nu"]
    feature_names = (
        tuple(n + "_a" for n in x_a.property_names)
        + ("k_" + str(NU),)
        + tuple(n + "_b" for n in x_b.property_names)
        + ("l_" + str(NU),)
    )

X_idx = {}
X_blocks = {}
X_samples = {}
X_grad_samples = {}
X_grads = {}

In [96]:
for index_a, block_a in x_a:
    lam_a = index_a["spherical_harmonics_l"]
    sigma_a = index_a["inversion_sigma"]
    order_a = index_a["order_nu"]                
    properties_a = block_a.properties  # pre-extract this block as accessing a c property has a non-zero cost
    samples_a = block_a.samples
    for index_b, block_b in x_b:
        lam_b = index_b["spherical_harmonics_l"]
        sigma_b = index_b["inversion_sigma"]
        order_b = index_b["order_nu"]       
        properties_b = block_b.properties
        samples_b = block_b.samples
        samples_final = samples_b
        b_slice = list(range(len(samples_b)))
        if similar and lam_b<lam_a:
            continue

        if other_keys_match is None:            
            OTHERS = tuple( index_a[name] for name in other_keys_a ) + tuple( index_b[name] for name in other_keys_b )
        else:
            OTHERS = tuple(index_a[k] for k in other_keys_match if index_a[k]==index_b[k])
            if len(OTHERS)<len(other_keys_match):
                continue
            # adds non-matching keys to build outer product
            if mp: 

                OTHERS = tuple(index_a[k] for k in other_keys_a if k not in other_keys_match) + OTHERS 
                OTHERS = tuple(index_b[k] for k in other_keys_b if k not in other_keys_match) + OTHERS 
            else: 
                OTHERS = OTHERS + tuple(index_a[k] for k in other_keys_a if k not in other_keys_match)
                OTHERS = OTHERS + tuple(index_b[k] for k in other_keys_b if k not in other_keys_match)
        
        if mp: 
            if "neighbor" in samples_b.names and "neighbor" not in samples_a.names:
                center_slice = []
                smp_a, smp_b = 0, 0
                while smp_b < samples_b.shape[0]:               
                    #print(index_b, samples_b[smp_b][["structure", "center", "neighbor"]], index_a, samples_a[smp_a])
                    idx= [idx for idx, tup in enumerate(samples_a) if tup[0] ==samples_b[smp_b]["structure"] and tup[1] == samples_b[smp_b]["neighbor"] ][0]
                    center_slice.append(idx)
                    smp_b+=1
                center_slice = np.asarray(center_slice)
#                     print(index_a, index_b, center_slice,  block_a.samples, block_b.samples)
            else: 
                center_slice = slice(None)
        else:
            if "neighbor" in samples_b.names and "neighbor" not in samples_a.names:
                #rhoi and g_ij
                neighbor_slice = []
                smp_a, smp_b = 0, 0
                while smp_b < samples_b.shape[0]:                    
                    if samples_b[smp_b][["structure","center"]] != samples_a[smp_a]:
                        if(smp_a+1 < samples_a.shape[0]):
                            smp_a+=1
                    neighbor_slice.append(smp_a)
                    smp_b+=1
                neighbor_slice = np.asarray(neighbor_slice)
                print(index_a, index_b, neighbor_slice,  block_a.samples[neighbor_slice], block_b.samples)
                
            elif "neighbor" in samples_b.names and "neighbor" in samples_a.names:
                #taking tensor products of gij and gik
                neighbor_slice = []
                b_slice = []
                samples_final = []
                smp_a, smp_b = 0, 0
                while smp_b < samples_b.shape[0]:
                    idx= [idx for idx, tup in enumerate(samples_a) if tup[0] ==samples_b[smp_b]["structure"] and tup[1] == samples_b[smp_b]["center"]]
                    neighbor_slice.extend(idx)
                    b_slice.extend([smp_b]*len(idx))
                    samples_final.extend(flatten(list(product([samples_b[smp_b]],block_a.samples.asarray()[idx][:,-1]))))
                    smp_b+=1
                neighbor_slice = np.asarray(neighbor_slice)
#                 print(index_a, index_b, neighbor_slice)#,  block_a.samples[neighbor_slice], block_b.samples)
                samples_final = Labels(["structure", "center", "i1", "i2"], np.asarray(samples_final, dtype=np.int32))
            
            elif "neighbor_1" in samples_b.names: 
                # combining three center feature with rho_{i i1 i2}
                neighbor_slice = []
                b_slice = []
                smp_a, smp_b = 0, 0
                while smp_b < samples_b.shape[0]:
                    idx= [idx for idx, tup in enumerate(samples_a) if tup[0] ==samples_b[smp_b]["structure"] and tup[1] == samples_b[smp_b]["center"]]
                    neighbor_slice.extend(idx)
                    b_slice.extend([smp_b]*len(idx))
                    smp_b+=1
                neighbor_slice = np.asarray(neighbor_slice)
                print(samples_b[b_slice], samples_a[neighbor_slice])
            else:
                neighbor_slice = slice(None) 
                
                
        # determines the properties that are in the select list  
        sel_feats = []
        sel_idx = []
        sel_feats = np.indices((len(properties_a), len(properties_b))).reshape(2,-1).T

        prop_ids_a = []
        prop_ids_b = []
        for n_a, f_a in enumerate(properties_a):
            prop_ids_a.append( tuple(f_a) + (lam_a,))
        for n_b, f_b in enumerate(properties_b):
            prop_ids_b.append( tuple(f_b) + (lam_b,))
        prop_ids_a = np.asarray(prop_ids_a) 
        prop_ids_b = np.asarray(prop_ids_b)
        sel_idx = np.hstack([prop_ids_a[sel_feats[:,0]],prop_ids_b[sel_feats[:,1]] ])    #creating a tensor product          
        if len(sel_feats) == 0:
            continue            
        # loops over all permissible output blocks. note that blocks will
        # be filled from different la, lb
        for L in range(np.abs(lam_a - lam_b), 1 + min(lam_a + lam_b, lcut)):
            # determines parity of the block
            S = sigma_a * sigma_b * (-1) ** (lam_a + lam_b + L)
            NU = order_a + order_b                
            KEY = (NU, S, L,) + OTHERS
            if not KEY in X_idx:
                X_idx[KEY] = []
                X_blocks[KEY] = []
                X_samples[KEY] = samples_final
                if grad_components is not None:
                    X_grads[KEY] = []  
                    X_grad_samples[KEY] = block_b.gradient("positions").samples
                
            # builds all products in one go
            if mp:
                if isinstance(center_slice,slice) or  len(center_slice):
                    one_shot_blocks = clebsch_gordan.combine_einsum(
                        block_a.values[center_slice][:, :, sel_feats[:, 0]],
                        block_b.values[:, :, sel_feats[:, 1]],
                        L,
                        combination_string="iq,iq->iq",
                    )

                    if grad_components is not None: 
                        raise ValueError("grads not implemented with MP") 
                else:
                    one_shot_blocks = []

            else: 
                if isinstance(neighbor_slice,slice) or  len(neighbor_slice) :
                    one_shot_blocks = clebsch_gordan.combine_einsum(
                    block_a.values[neighbor_slice][:, :, sel_feats[:, 0]],
                    block_b.values[b_slice][:, :, sel_feats[:, 1]],
                    L,
                    combination_string="iq,iq->iq",
                )

                    if grad_components is not None:
                        grad_a = block_a.gradient("positions")
                        grad_b = block_b.gradient("positions")
                        grad_a_data = np.swapaxes(grad_a.data, 1,2)
                        grad_b_data = np.swapaxes(grad_b.data, 1,2)
                        one_shot_grads = clebsch_gordan.combine_einsum(
                            block_a.values[grad_a.samples["sample"]][neighbor_slice, :, sel_feats[:, 0]],
                            grad_b_data[b_slice][..., sel_feats[:, 1]],
                            L=L,
                            combination_string="iq,iaq->iaq",
                        ) + clebsch_gordan.combine_einsum(
                            block_b.values[grad_b.samples["sample"]][b_slice][:, :, sel_feats[:, 1]],
                            grad_a_data[neighbor_slice, ..., sel_feats[:, 0]],
                            L=L,
                            combination_string="iq,iaq->iaq",
                        )
                else:
                    one_shot_blocks = []



            # now loop over the selected features to build the blocks

            X_idx[KEY].append(sel_idx)
            if len(one_shot_blocks):
                X_blocks[KEY].append(one_shot_blocks)
            if grad_components is not None:
                X_grads[KEY].append(one_shot_grads)

# turns data into sparse storage format (and dumps any empty block in the process)
nz_idx = []
nz_blk = []
for KEY in X_blocks:
    L = KEY[2]
    # create blocks
    if len(X_blocks[KEY]) == 0:
        continue  # skips empty blocks
    nz_idx.append(KEY)
#         print(KEY, X_samples[KEY], len(X_blocks[KEY]) , X_blocks[KEY][0])
    block_data = np.concatenate(X_blocks[KEY], axis=-1)
    sph_components = Labels(
            ["spherical_harmonics_m"], np.asarray(range(-L, L + 1), dtype=np.int32).reshape(-1, 1)
        )
    newblock = TensorBlock(
        values=block_data,
        samples=X_samples[KEY],
        components=[sph_components],
        properties=Labels(feature_names, np.asarray(np.vstack(X_idx[KEY]), dtype=np.int32)),
    )

    nz_blk.append(newblock)
X = TensorMap(
    Labels(["order_nu", "inversion_sigma", "spherical_harmonics_l"] + OTHER_KEYS, np.asarray(nz_idx, dtype=np.int32)), nz_blk
)

here
[(0, 1, 1, 1) (0, 1, 1, 2) (0, 1, 2, 1) (0, 1, 2, 2) (0, 2, 1, 1)
 (0, 2, 1, 2) (0, 2, 2, 1) (0, 2, 2, 2) (1, 1, 1, 1) (1, 1, 1, 2)
 (1, 1, 2, 1) (1, 1, 2, 2) (1, 2, 1, 1) (1, 2, 1, 2) (1, 2, 2, 1)
 (1, 2, 2, 2)] [(0, 1) (0, 1) (0, 1) (0, 1) (0, 2) (0, 2) (0, 2) (0, 2) (1, 1) (1, 1)
 (1, 1) (1, 1) (1, 2) (1, 2) (1, 2) (1, 2)]
here
[(0, 1, 0, 1) (0, 1, 0, 2) (0, 2, 0, 1) (0, 2, 0, 2) (1, 1, 0, 1)
 (1, 1, 0, 2) (1, 2, 0, 1) (1, 2, 0, 2)] [(0, 1) (0, 1) (0, 2) (0, 2) (1, 1) (1, 1) (1, 2) (1, 2)]
here
[(0, 1, 1, 1) (0, 1, 1, 2) (0, 1, 2, 1) (0, 1, 2, 2) (0, 2, 1, 1)
 (0, 2, 1, 2) (0, 2, 2, 1) (0, 2, 2, 2) (1, 1, 1, 1) (1, 1, 1, 2)
 (1, 1, 2, 1) (1, 1, 2, 2) (1, 2, 1, 1) (1, 2, 1, 2) (1, 2, 2, 1)
 (1, 2, 2, 2)] [(0, 1) (0, 1) (0, 1) (0, 1) (0, 2) (0, 2) (0, 2) (0, 2) (1, 1) (1, 1)
 (1, 1) (1, 1) (1, 2) (1, 2) (1, 2) (1, 2)]
here
[(0, 1, 0, 1) (0, 1, 0, 2) (0, 2, 0, 1) (0, 2, 0, 2) (1, 1, 0, 1)
 (1, 1, 0, 2) (1, 2, 0, 1) (1, 2, 0, 2)] [(0, 1) (0, 1) (0, 2) (0, 2) (1, 1) (1, 1) (1, 2) (1

here
[(0, 1, 0, 0) (0, 2, 0, 0) (1, 1, 0, 0) (1, 2, 0, 0)] [(0, 1) (0, 2) (1, 1) (1, 2)]
here
[(0, 1, 1, 1) (0, 1, 1, 2) (0, 1, 2, 1) (0, 1, 2, 2) (0, 2, 1, 1)
 (0, 2, 1, 2) (0, 2, 2, 1) (0, 2, 2, 2) (1, 1, 1, 1) (1, 1, 1, 2)
 (1, 1, 2, 1) (1, 1, 2, 2) (1, 2, 1, 1) (1, 2, 1, 2) (1, 2, 2, 1)
 (1, 2, 2, 2)] [(0, 1) (0, 1) (0, 1) (0, 1) (0, 2) (0, 2) (0, 2) (0, 2) (1, 1) (1, 1)
 (1, 1) (1, 1) (1, 2) (1, 2) (1, 2) (1, 2)]
here
[(0, 1, 1, 1) (0, 1, 1, 2) (0, 1, 2, 1) (0, 1, 2, 2) (0, 2, 1, 1)
 (0, 2, 1, 2) (0, 2, 2, 1) (0, 2, 2, 2) (1, 1, 1, 1) (1, 1, 1, 2)
 (1, 1, 2, 1) (1, 1, 2, 2) (1, 2, 1, 1) (1, 2, 1, 2) (1, 2, 2, 1)
 (1, 2, 2, 2)] [(0, 1) (0, 1) (0, 1) (0, 1) (0, 2) (0, 2) (0, 2) (0, 2) (1, 1) (1, 1)
 (1, 1) (1, 1) (1, 2) (1, 2) (1, 2) (1, 2)]


ValueError: ('Requested CG entry ', (2, 4, 6), ' has not been precomputed')

In [90]:
rhoii1i2_nu0.block(0).samples

Labels([(0, 1, 1, 1), (0, 1, 1, 2), (0, 1, 2, 1), (0, 1, 2, 2),
        (0, 2, 1, 1), (0, 2, 1, 2), (0, 2, 2, 1), (0, 2, 2, 2),
        (1, 1, 1, 1), (1, 1, 1, 2), (1, 1, 2, 1), (1, 1, 2, 2),
        (1, 2, 1, 1), (1, 2, 1, 2), (1, 2, 2, 1), (1, 2, 2, 2)],
       dtype=[('structure', '<i4'), ('center', '<i4'), ('neighbor_1', '<i4'), ('neighbor_2', '<i4')])

In [91]:
samples_b

Labels([(0, 1, 1, 1), (0, 1, 1, 2), (0, 1, 2, 1), (0, 1, 2, 2),
        (0, 2, 1, 1), (0, 2, 1, 2), (0, 2, 2, 1), (0, 2, 2, 2),
        (1, 1, 1, 1), (1, 1, 1, 2), (1, 1, 2, 1), (1, 1, 2, 2),
        (1, 2, 1, 1), (1, 2, 1, 2), (1, 2, 2, 1), (1, 2, 2, 2)],
       dtype=[('structure', '<i4'), ('center', '<i4'), ('neighbor_1', '<i4'), ('neighbor_2', '<i4')])

In [92]:
samples_a

Labels([(0, 1), (0, 2), (1, 1), (1, 2)],
       dtype=[('structure', '<i4'), ('center', '<i4')])