In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import json
from equistore import Labels, TensorBlock, TensorMap
from utils.builder import TensorBuilder
import ase.io
from itertools import product
from utils.acdc_mini import acdc_standardize_keys, cg_increment, cg_combine
from utils.clebsh_gordan import ClebschGordanReal
from utils.hamiltonians import fix_pyscf_l1, dense_to_blocks, blocks_to_dense, couple_blocks, decouple_blocks
import matplotlib.pyplot as plt
from utils.librascal import  RascalSphericalExpansion, RascalPairExpansion
from rascal.representations import SphericalExpansion
import copy
from utils.pair_features import *

## Manipulate Hamiltonian into blocks

In [68]:
frames = ase.io.read("data/hamiltonian/water-hamiltonian/water_coords_1000.xyz",":10")
for f in frames:
    f.cell = [100,100,100]
    f.positions += 50

In [69]:
#jorbs = json.load(open('data/water-hamiltonian/water_orbs.json', "r"))
jorbs = json.loads(json.load(open('data/hamiltonian/water-hamiltonian/water_orbs.json', "r")))
orbs = {}
zdic = {"O" : 8, "H":1}
for k in jorbs:
    orbs[zdic[k]] = jorbs[k]

In [70]:
hams = np.load("data/hamiltonian/water-hamiltonian/water_saph_orthogonal.npy", allow_pickle=True)[:len(frames)]
# NO NEED TO CORRECT L1 ORDER FOR SAPH ORTHOGONALIZED MATRICES...
#for i, f in enumerate(frames):
#    hams[i] = fix_pyscf_l1(hams[i], f, orbs)

blocks = dense_to_blocks(hams, frames, orbs)

In [71]:
cg = ClebschGordanReal(5)

In [72]:
coupled = couple_blocks(blocks, cg)

fock_bc = coupled

## Feature computation

In [73]:
rascal_hypers = {
    "interaction_cutoff": 2.5,
    "cutoff_smooth_width": 0.5,
    "max_radial": 4,
    "max_angular": 3,
    "gaussian_sigma_type": "Constant",
    "compute_gradients":  False,
}

spex = RascalSphericalExpansion(rascal_hypers)
rhoi = spex.compute(frames)

In [74]:
pairs = RascalPairExpansion(rascal_hypers)
gij = pairs.compute(frames)

In [75]:
mc_rho1i = acdc_standardize_keys(rhoi)
mc_rho1i.keys_to_properties(['species_neighbor'])
mc_gij =  acdc_standardize_keys(gij)

In [76]:
mc_rho2i = cg_increment(mc_rho1i, mc_rho1i, lcut=4, other_keys_match=["species_center"])

In [77]:
mc_rho3i = cg_increment(mc_rho2i, mc_rho1i, lcut=4, other_keys_match=["species_center"])

In [78]:
mc_rho1ij = cg_increment(mc_rho1i, mc_gij, lcut=4, other_keys_match=["species_center"])

In [79]:
mc_rho2ij = cg_increment(mc_rho2i, mc_gij, lcut=4, other_keys_match=["species_center"])

In [80]:
def mk_hamiltonian_feats(centers, pairs):
    """ Builds Hermitian, HAM-learning adapted features starting from generic center and pair atoms feats """
    keys = []
    blocks = []
    # central blocks
    for k, b in centers:
        keys.append(tuple(k)+(k["species_center"], 0,))
        samples_array = np.vstack(b.samples.tolist())
        blocks.append(TensorBlock(
            samples = Labels(names = b.samples.names + ("neighbor",),                             
                             values = np.asarray(np.hstack([ samples_array, samples_array[:,-1:]]), dtype=np.int32) ),
            components = b.components,
            properties = b.properties,
            values = b.values
        ))
            
    for k, b in pairs:                        
        if k["species_center"] == k["species_neighbor"]:
            # off-site, same species
            idx_up = np.where(b.samples["center"]<b.samples["neighbor"])[0]
            if len(idx_up) ==0:
                continue
            idx_lo = np.where(b.samples["center"]>b.samples["neighbor"])[0]
            # we need to find the "ji" position that matches each "ij" sample. 
            # we exploit the fact that the samples are sorted by structure to do a "local" rearrangement
            smp_up, smp_lo = 0, 0
            for smp_up in range(len(idx_up)):
                ij = b.samples[idx_up[smp_up]][["center", "neighbor"]]
                for smp_lo in range(smp_up, len(idx_lo)):
                    ij_lo = b.samples[idx_up[smp_up]][["neighbor", "center"]]
                    if b.samples[idx_up[smp_up]]["structure"] != b.samples[idx_lo[smp_lo]]["structure"]:
                        raise ValueError(f"Could not find matching ji term for sample {b.samples[idx_up[smp_up]]}") 
                    if ij == ij_lo:
                        idx_lo[smp_up], idx_lo[smp_lo] = idx_lo[smp_lo], idx_lo[smp_up]                        
                        break            
            
            keys.append(tuple(k)+(1,))
            keys.append(tuple(k)+(-1,))            
            blocks.append(TensorBlock(
                samples = Labels(names = b.samples.names,
                                 values = np.asarray(b.samples[idx_up].tolist(), dtype=np.int32) ),
                components = b.components,
                properties = b.properties,
                values = (b.values[idx_up] + b.values[idx_lo])/np.sqrt(2)
            ))
            blocks.append(TensorBlock(
                samples = Labels(names = b.samples.names,
                                 values = np.asarray(b.samples[idx_up].tolist(), dtype=np.int32) ),
                components = b.components,
                properties = b.properties,
                values = (b.values[idx_up] - b.values[idx_lo])/np.sqrt(2)
            ))
        elif k["species_center"] < k["species_neighbor"]:
            # off-site, different species
            keys.append(tuple(k)+(2,))
            blocks.append(TensorBlock(
                samples = b.samples, 
                components = b.components,
                properties = b.properties,
                values = b.values.copy()
            ))
                                
    return TensorMap(
        keys = Labels(names=pairs.keys.names + ("block_type",), values =np.asarray(keys, dtype=np.int32)),
        blocks = blocks
    )    

In [81]:
ham_feats = mk_hamiltonian_feats(mc_rho3i, mc_rho2ij)

## Testing model on one structure

In [82]:
from utils.Hamiltonian_model_utils import *

In [83]:
np.linalg.norm(ham_feats.block(0).values)/len(ham_feats.block(0).values)

1.4160044338588412e-06

In [106]:
FR = Fock_regression(jorbs, alpha=1e-24)

In [107]:
FR.fit(ham_feats, fock_bc)

In [108]:
pred = FR.predict(ham_feats)

In [109]:
fock_bc.keys

Labels([(0, 8, 2, 0, 8, 2, 0, 0), (0, 8, 2, 0, 8, 2, 1, 1),
        (0, 8, 2, 1, 8, 2, 1, 0), (0, 8, 2, 1, 8, 2, 1, 2),
        (2, 1, 1, 0, 8, 2, 0, 0), (2, 1, 1, 0, 8, 2, 1, 1),
        (0, 1, 1, 0, 1, 1, 0, 0), (1, 1, 1, 0, 1, 1, 0, 0)],
       dtype=[('block_type', '<i4'), ('a_i', '<i4'), ('n_i', '<i4'), ('l_i', '<i4'), ('a_j', '<i4'), ('n_j', '<i4'), ('l_j', '<i4'), ('L', '<i4')])

In [110]:
for i in range(len(fock_bc.keys)):
    print(pred.keys[i])
    print(pred.block(i).values.flatten() - fock_bc.block(i).values.flatten())

(0, 8, 2, 0, 8, 2, 0, 0)
[ 2.26485497e-14 -1.63979941e-13  2.61679567e-13 -7.88258347e-14
  1.03983488e-12 -1.84741111e-13  2.62345701e-13 -7.73381359e-13
  8.20454815e-14 -7.20423721e-13]
(0, 8, 2, 0, 8, 2, 1, 1)
[ 2.94309022e-12 -2.92061474e-16 -1.52156066e-12  1.30799538e-12
 -4.23050211e-17  2.58419675e-12  1.69716310e-17 -1.38515643e-10
 -8.15546797e-11 -2.20126695e-12 -1.16991984e-16 -3.04652137e-13
  1.25871535e-13  2.74911317e-17 -5.55680502e-13 -7.58478003e-12
  1.13671163e-16  9.60621860e-12 -1.91894417e-11 -5.05634873e-17
  2.79035406e-11 -6.28736338e-11 -5.16147997e-16  4.38676873e-13
 -7.33932359e-12  1.81985154e-17  1.62306280e-12 -3.30496741e-12
  8.69280670e-17  2.41594245e-12]
(0, 8, 2, 1, 8, 2, 1, 0)
[-1.02140518e-13  1.48547841e-13  2.01172412e-12  2.23154828e-13
 -1.07536202e-12  7.99804667e-13  1.12576615e-13 -1.41375800e-12
 -6.16173779e-14  1.02917674e-13]
(0, 8, 2, 1, 8, 2, 1, 2)
[ 3.17633420e-12  1.28241623e-15  2.32180941e-12 -1.15190702e-15
  1.49269486e-13  

In [111]:
pred.keys

Labels([(0, 8, 2, 0, 8, 2, 0, 0), (0, 8, 2, 0, 8, 2, 1, 1),
        (0, 8, 2, 1, 8, 2, 1, 0), (0, 8, 2, 1, 8, 2, 1, 2),
        (2, 1, 1, 0, 8, 2, 0, 0), (2, 1, 1, 0, 8, 2, 1, 1),
        (0, 1, 1, 0, 1, 1, 0, 0), (1, 1, 1, 0, 1, 1, 0, 0)],
       dtype=[('block_type', '<i4'), ('a_i', '<i4'), ('n_i', '<i4'), ('l_i', '<i4'), ('a_j', '<i4'), ('n_j', '<i4'), ('l_j', '<i4'), ('L', '<i4')])

In [112]:
pred_decoupled = decouple_blocks(pred)
pred_dense = blocks_to_dense(pred_decoupled, frames, orbs)

In [113]:
np.linalg.eigvalsh(hams[0])

array([-1.37730419, -0.69024968, -0.61827408, -0.51012916,  0.17444351,
        0.26734281])

In [114]:
np.linalg.eigvalsh(pred_dense[0])

array([-1.37730419, -0.69024968, -0.61827408, -0.51012916,  0.17444351,
        0.26734281])

rotated features for a rotated molecule

In [115]:
rot_frame = frames[0].copy()
rot_frame.positions = 50+(50-frames[0].positions[:,[2,0,1]])
rot_frame.positions[:,]

array([[50.      , 50.      , 50.      ],
       [50.      , 49.165109, 50.      ],
       [50.      , 49.905969, 49.038666]])

In [116]:
rhoi = spex.compute([rot_frame])

In [117]:
gij = pairs.compute([rot_frame])

In [118]:
rot_rho1i = acdc_standardize_keys(rhoi)
rot_rho1i.keys_to_properties(['species_neighbor'])
rot_gij =  acdc_standardize_keys(gij)

In [119]:
rot_rho2i = cg_increment(rot_rho1i, rot_rho1i, lcut=4, other_keys_match=["species_center"])

In [None]:
rot_rho3i = cg_increment(rot_rho2i, rot_rho1i, lcut=4, other_keys_match=["species_center"])

In [None]:
rot_rho1ij = cg_increment(rot_rho1i, rot_gij, lcut=4, other_keys_match=["species_center"])

In [None]:
rot_rho2ij = cg_increment(rot_rho2i, rot_gij, lcut=4, other_keys_match=["species_center"])

In [None]:
rot_feats = mk_hamiltonian_feats(rot_rho3i, rot_rho2ij)

In [None]:
rot_pred = FR.predict(rot_feats)

In [None]:
rot_decoupled = decouple_blocks(rot_pred)
rot_dense = blocks_to_dense(rot_decoupled, [rot_frame], orbs)

In [None]:
np.linalg.eigvalsh(pred_dense[0])

In [None]:
np.linalg.eigvalsh(rot_dense[0])

In [None]:
plt.matshow(pred_dense[0]- rot_dense[0])

In [None]:
pred_dense[0]- rot_dense[0]