In [452]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [453]:
import numpy as np
import json
from equistore import Labels, TensorBlock, TensorMap
from utils.builder import TensorBuilder
import ase.io
from itertools import product
from utils.acdc_mini import acdc_standardize_keys, cg_increment, cg_combine
from utils.clebsh_gordan import ClebschGordanReal
from utils.hamiltonians import fix_pyscf_l1, dense_to_blocks, blocks_to_dense, couple_blocks, decouple_blocks
import matplotlib.pyplot as plt
from utils.librascal import  RascalSphericalExpansion, RascalPairExpansion
from rascal.representations import SphericalExpansion
import copy
from utils.pair_features import *

## Manipulate Hamiltonian into blocks

In [454]:
frames = ase.io.read("data/hamiltonian/water-hamiltonian/water_coords_1000.xyz",":100")
for f in frames:
    f.cell = [100,100,100]
    f.positions += 50

In [455]:
#jorbs = json.load(open('data/water-hamiltonian/water_orbs.json', "r"))
jorbs = json.loads(json.load(open('data/hamiltonian/water-hamiltonian/water_orbs.json', "r")))
orbs = {}
zdic = {"O" : 8, "H":1}
for k in jorbs:
    orbs[zdic[k]] = jorbs[k]

In [456]:
hams = np.load("data/hamiltonian/water-hamiltonian/water_saph_orthogonal.npy", allow_pickle=True)
for i, f in enumerate(frames):
    hams[i] = fix_pyscf_l1(hams[i], f, orbs)

blocks = dense_to_blocks(hams, frames, orbs)

In [466]:
cg = ClebschGordanReal(5)

In [467]:
coupled = couple_blocks(blocks, cg)

fock_bc = coupled

## Feature computation

In [468]:
rascal_hypers = {
    "interaction_cutoff": 2.5,
    "cutoff_smooth_width": 0.5,
    "max_radial": 4,
    "max_angular": 3,
    "gaussian_sigma_type": "Constant",
    "compute_gradients":  False,
}

spex = RascalSphericalExpansion(rascal_hypers)
rhoi = spex.compute(frames)

In [469]:
pairs = RascalPairExpansion(rascal_hypers)
gij = pairs.compute(frames)

In [470]:
mc_rho1i = acdc_standardize_keys(rhoi)
mc_rho1i.keys_to_properties(['species_neighbor'])
mc_gij =  acdc_standardize_keys(gij)

In [472]:
mc_rho2i = cg_increment(mc_rho1i, mc_rho1i, lcut=4, other_keys_match=["species_center"])

In [473]:
mc_rho2i.block(0).properties.names

('species_neighbor_1', 'n_1', 'l_1', 'species_neighbor_2', 'n_2', 'l_2')

In [475]:
mc_rho3i = cg_increment(mc_rho2i, mc_rho1i, lcut=4, other_keys_match=["species_center"])

In [476]:
mc_rho1ij = cg_increment(mc_rho1i, mc_gij, lcut=4, other_keys_match=["species_center"])

In [477]:
mc_rho2ij = cg_increment(mc_rho2i, mc_gij, lcut=4, other_keys_match=["species_center"])

In [478]:
def mk_hamiltonian_feats(centers, pairs):
    """ Builds Hermitian, HAM-learning adapted features starting from generic center and pair atoms feats """
    keys = []
    blocks = []
    # central blocks
    for k, b in centers:
        keys.append(tuple(k)+(k["species_center"], 0,))
        samples_array = np.vstack(b.samples.tolist())
        blocks.append(TensorBlock(
            samples = Labels(names = b.samples.names + ("neighbor",),                             
                             values = np.asarray(np.hstack([ samples_array, samples_array[:,-1:]]), dtype=np.int32) ),
            components = b.components,
            properties = b.properties,
            values = b.values
        ))
            
    for k, b in pairs:                        
        if k["species_center"] == k["species_neighbor"]:
            # off-site, same species
            idx_up = np.where(b.samples["center"]<b.samples["neighbor"])[0]
            if len(idx_up) ==0:
                continue
            idx_lo = np.where(b.samples["center"]>b.samples["neighbor"])[0]
            # we need to find the "ji" position that matches each "ij" sample. 
            # we exploit the fact that the samples are sorted by structure to do a "local" rearrangement
            smp_up, smp_lo = 0, 0
            for smp_up in range(len(idx_up)):
                ij = b.samples[idx_up[smp_up]][["center", "neighbor"]]
                for smp_lo in range(smp_up, len(idx_lo)):
                    ij_lo = b.samples[idx_up[smp_up]][["neighbor", "center"]]
                    if b.samples[idx_up[smp_up]]["structure"] != b.samples[idx_lo[smp_lo]]["structure"]:
                        raise ValueError(f"Could not find matching ji term for sample {b.samples[idx_up[smp_up]]}") 
                    if ij == ij_lo:
                        idx_lo[smp_up], idx_lo[smp_lo] = idx_lo[smp_lo], idx_lo[smp_up]                        
                        break            
            
            keys.append(tuple(k)+(1,))
            keys.append(tuple(k)+(-1,))            
            blocks.append(TensorBlock(
                samples = Labels(names = b.samples.names,
                                 values = np.asarray(b.samples[idx_up].tolist(), dtype=np.int32) ),
                components = b.components,
                properties = b.properties,
                values = (b.values[idx_up] + b.values[idx_lo])/np.sqrt(2)
            ))
            blocks.append(TensorBlock(
                samples = Labels(names = b.samples.names,
                                 values = np.asarray(b.samples[idx_up].tolist(), dtype=np.int32) ),
                components = b.components,
                properties = b.properties,
                values = (b.values[idx_up] - b.values[idx_lo])/np.sqrt(2)
            ))
        elif k["species_center"] < k["species_neighbor"]:
            # off-site, different species
            keys.append(tuple(k)+(2,))
            blocks.append(TensorBlock(
                samples = b.samples, 
                components = b.components,
                properties = b.properties,
                values = b.values.copy()
            ))
                                
    return TensorMap(
        keys = Labels(names=pairs.keys.names + ("block_type",), values =np.asarray(keys, dtype=np.int32)),
        blocks = blocks
    )    

In [491]:
hams = mk_hamiltonian_feats(mc_rho3i, mc_rho2ij)

In [482]:
rho0ij = rho0ij_builder(rascal_hypers, frames)

In [483]:
total_species = sorted(set(rhoi.keys['species_center']))
# total_species = list(np.sort(np.asarray(total_species)))
lmax=rascal_hypers["max_angular"]
nmax=rascal_hypers["max_radial"]

In [484]:
blocks = []
for l in range(lmax+1):
    for sp_i in total_species:
        for sp_k in total_species:
            n_selected = nmax#len(np.where(opt_eva[l] > sel_thresh)[0])    
            de_block = rhoi.block(species_center = sp_i, species_neighbor=sp_k, spherical_harmonics_l = l)
            block = TensorBlock(
                values = de_block.values,
                samples = de_block.samples,
                components = [Labels(["m"],np.asarray(range(-l,l+1), dtype=np.int32).reshape(-1,1))],
                properties = Labels(["n"], np.asarray([[n] for n in range(nmax)], dtype=np.int32))
            )
            
            blocks.append( block )

acdc_nu1 = TensorMap(
    keys = Labels(names=["L", "nu", "sigma","species_i", "species_neighbor"], 
                        values=np.asarray([[ l, 1, 1, sp_i, sp_k] for l in range(rascal_hypers["max_angular"]+1) 
                                                        for sp_i in total_species
                                                        for sp_k in total_species], dtype=np.int32)
                                     ), 
                      blocks = blocks
                     )
#move neighbor species to features  
# acdc_nu1.sparse_to_features('species_neighbor')

In [485]:
rho1 = acdc_nu1.keys_to_properties("species_neighbor")

In [145]:
rho1ij=tensor_g_rho_nu(rho0ij, acdc_nu1, rascal_hypers, cg)

  if i not in new_sparse_labels:


(0, 0, 1, 1, 1, 1) done
(0, 1, 1, 1, 1, 1) done
(0, 1, 1, -1, 1, 1) done
(0, 2, 1, 1, 1, 1) done
(0, 2, 1, -1, 1, 1) done
(1, 0, 1, 1, 1, 1) done
(-1, 0, 1, 1, 1, 1) done
(1, 1, 1, 1, 1, 1) done
(1, 1, 1, -1, 1, 1) done
(-1, 1, 1, 1, 1, 1) done
(-1, 1, 1, -1, 1, 1) done
(1, 2, 1, 1, 1, 1) done
(1, 2, 1, -1, 1, 1) done
(-1, 2, 1, 1, 1, 1) done
(-1, 2, 1, -1, 1, 1) done
(2, 0, 1, 1, 1, 8) done
(2, 1, 1, 1, 1, 8) done
(2, 1, 1, -1, 1, 8) done
(2, 2, 1, 1, 1, 8) done
(2, 2, 1, -1, 1, 8) done
(0, 0, 1, 1, 8, 8) done
(0, 1, 1, 1, 8, 8) done
(0, 1, 1, -1, 8, 8) done
(0, 2, 1, 1, 8, 8) done
(0, 2, 1, -1, 8, 8) done


In [181]:
rho0ij.block(1).components

[Labels([(-1,), ( 0,), ( 1,)], dtype=[('mu', '<i4')])]

In [196]:
np.sort(rho1ij.block(block_type=2, species_i=1, species_j=8, L=1, sigma=1).values[0,0])

array([-2.22295075e-05, -1.34518727e-05, -2.04396491e-06, -1.30506543e-06,
       -1.05731498e-06, -9.13467773e-07, -7.55466596e-07, -6.72168965e-07,
       -6.12050654e-07, -3.89100186e-07, -1.16211902e-07, -4.49275155e-08,
       -1.25317547e-08,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  

In [197]:
np.sort(mc_rho1ij.block(species_center=1, species_neighbor_b=8, spherical_harmonics_l=1, inversion_sigma=1).values[0,0])

array([-2.22295075e-05, -1.34518727e-05, -2.04396491e-06, -1.30506543e-06,
       -1.05731498e-06, -9.13467773e-07, -7.55466596e-07, -6.72168965e-07,
       -6.12050654e-07, -3.89100186e-07, -1.16211902e-07, -4.49275155e-08,
       -1.25317547e-08,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  

In [189]:
mc_rho1ij.block(species_center=1, species_neighbor_b=1, spherical_harmonics_l=1, inversion_sigma=1).properties

Labels([(1, 0, 0, 0, 1), (1, 0, 0, 1, 1), (1, 0, 0, 2, 1),
        (1, 1, 0, 0, 1), (1, 1, 0, 1, 1), (1, 1, 0, 2, 1),
        (1, 2, 0, 0, 1), (1, 2, 0, 1, 1), (1, 2, 0, 2, 1),
        (8, 0, 0, 0, 1), (8, 0, 0, 1, 1), (8, 0, 0, 2, 1),
        (8, 1, 0, 0, 1), (8, 1, 0, 1, 1), (8, 1, 0, 2, 1),
        (8, 2, 0, 0, 1), (8, 2, 0, 1, 1), (8, 2, 0, 2, 1),
        (1, 0, 1, 0, 0), (1, 0, 1, 1, 0), (1, 0, 1, 2, 0),
        (1, 1, 1, 0, 0), (1, 1, 1, 1, 0), (1, 1, 1, 2, 0),
        (1, 2, 1, 0, 0), (1, 2, 1, 1, 0), (1, 2, 1, 2, 0),
        (8, 0, 1, 0, 0), (8, 0, 1, 1, 0), (8, 0, 1, 2, 0),
        (8, 1, 1, 0, 0), (8, 1, 1, 1, 0), (8, 1, 1, 2, 0),
        (8, 2, 1, 0, 0), (8, 2, 1, 1, 0), (8, 2, 1, 2, 0),
        (1, 0, 1, 0, 2), (1, 0, 1, 1, 2), (1, 0, 1, 2, 2),
        (1, 1, 1, 0, 2), (1, 1, 1, 1, 2), (1, 1, 1, 2, 2),
        (1, 2, 1, 0, 2), (1, 2, 1, 1, 2), (1, 2, 1, 2, 2),
        (8, 0, 1, 0, 2), (8, 0, 1, 1, 2), (8, 0, 1, 2, 2),
        (8, 1, 1, 0, 2), (8, 1, 1, 1, 2), (8, 1, 1, 2, 2

In [15]:
feats = rho1ij

## Testing model on one structure

In [16]:
from utils.Hamiltonian_model_utils import *

In [492]:
FR = Fock_regression(jorbs, alpha=1e-16)

In [493]:
FR.fit(hams, fock_bc)

In [494]:
pred = FR.predict(hams)

In [495]:
pred.keys

Labels([(0, 8, 2, 0, 8, 2, 0, 0), (0, 8, 2, 0, 8, 2, 1, 1),
        (0, 8, 2, 1, 8, 2, 1, 0), (0, 8, 2, 1, 8, 2, 1, 2),
        (2, 1, 1, 0, 8, 2, 0, 0), (2, 1, 1, 0, 8, 2, 1, 1),
        (0, 1, 1, 0, 1, 1, 0, 0), (1, 1, 1, 0, 1, 1, 0, 0)],
       dtype=[('block_type', '<i4'), ('a_i', '<i4'), ('n_i', '<i4'), ('l_i', '<i4'), ('a_j', '<i4'), ('n_j', '<i4'), ('l_j', '<i4'), ('L', '<i4')])

In [496]:
for i in range(len(fock_bc.keys)):
    print(pred.keys[i])
    print(pred.block(i).values.flatten() - fock_bc.block(i).values.flatten())

(0, 8, 2, 0, 8, 2, 0, 0)
[ 1.03181868e-05  2.51833542e-05 -4.47999140e-05 -1.26436921e-04
 -5.21515355e-05 -4.60978033e-05 -1.55661799e-04 -3.58206924e-05
  8.14545137e-05  9.67463023e-06 -1.21725770e-05  1.71874356e-05
 -3.25408391e-05  2.68985238e-05  3.25493702e-05  3.39773099e-05
 -7.08484537e-05 -4.48001506e-05 -4.48002891e-05  5.60322316e-05
 -4.48000772e-05 -3.62584788e-05 -9.83950439e-05  8.30184605e-05
 -9.10475001e-06  6.96145348e-05  1.44992423e-05 -4.03802049e-05
  6.07944404e-05  5.57001393e-05 -8.18997186e-05  1.47451628e-05
  4.80493784e-05  6.00737348e-05 -4.48004571e-05 -4.37981967e-06
  9.53959591e-06  1.10090997e-05 -4.48001530e-05 -3.27575401e-05
  2.27241026e-05  4.94007180e-05  9.14671134e-05  4.93070630e-06
  4.51955266e-05 -3.15522561e-05 -6.38548040e-05  8.40010721e-05
  5.03228817e-05  5.28972633e-05 -2.97679111e-05  7.33941642e-05
  7.56846482e-05  1.08160837e-04  8.21122686e-05 -7.11143709e-05
  5.78641285e-05 -7.06121010e-05  2.89325286e-07  2.99792196e-05


In [451]:
pred.keys

Labels([(0, 8, 2, 0, 8, 2, 0, 0), (0, 8, 2, 0, 8, 2, 1, 1),
        (0, 8, 2, 1, 8, 2, 1, 0), (0, 8, 2, 1, 8, 2, 1, 2),
        (2, 1, 1, 0, 8, 2, 0, 0), (2, 1, 1, 0, 8, 2, 1, 1),
        (0, 1, 1, 0, 1, 1, 0, 0), (1, 1, 1, 0, 1, 1, 0, 0)],
       dtype=[('block_type', '<i4'), ('a_i', '<i4'), ('n_i', '<i4'), ('l_i', '<i4'), ('a_j', '<i4'), ('n_j', '<i4'), ('l_j', '<i4'), ('L', '<i4')])