In [1]:
print("Loading numpy...")
import numpy as np
print("Loading torch...")
import torch
torch.set_default_dtype(torch.float64)
print("Loading e3nn...")
import e3nn
import torch_geometric as tg
print("Loading time...")
import time
from collections.abc import Mapping
print("Loading sparse_kernel_conv...")
from sparse_kernel_conv import SparseKernelConv, DummyConvolution
print("Loading laurent...")
from laurent import LaurentPolynomial
print("Loading functools...")
from functools import partial
print("Loading variable_networks...")
from variable_networks import VariableParityNetwork
print("Loading diagnostics...")
from diagnostics import print_parameter_size, count_parameters, get_object_size
print("Loading collections...")
from collections import deque
print("Loading copy...")
from copy import copy
print("Loading datetime...")
from datetime import timedelta
print("Loading re...")
import re
print("Loading sys...")
import sys
print("Loading os...")
import os
import traceback
print("Loading math...")
import math
print("Loading glob...")
from glob import glob
print("done loading modules.\n", flush=True)

os.environ["CUDA_VISIBLE_DEVICES"]=""
device = "cpu"
torch.device(device)
temp_tensor = torch.rand(10).to(device)
print("test tensor:")
print(temp_tensor)

Loading numpy...
Loading torch...
Loading e3nn...
Loading time...
Loading sparse_kernel_conv...
Loading laurent...
Loading functools...
Loading variable_networks...
Loading diagnostics...
Loading collections...
Loading copy...
Loading datetime...
Loading re...
Loading sys...
Loading os...
Loading math...
Loading glob...
done loading modules.

test tensor:
tensor([0.5690, 0.8332, 0.9633, 0.2949, 0.5114, 0.2948, 0.2132, 0.1833, 0.8012,
        0.7929])


In [2]:
# read the model from disk
model_filename = "checkpoints/fluidstack_7-e003_b172869-checkpoint.torch"

print(f"Loading model from {model_filename}...", end="", flush=True)
model_dict = torch.load(model_filename, map_location=torch.device('cpu'))
print("done.", flush=True)

Loading model from checkpoints/fluidstack_7-e003_b172869-checkpoint.torch...done.


In [3]:
model_kwargs = model_dict['model_kwargs']
for k,v in model_kwargs.items():
    print(k, ":", v)

kernel : <class 'sparse_kernel_conv.SparseKernelConv'>
convolution : <class 'sparse_kernel_conv.DummyConvolution'>
batch_norm : False
muls : [[30, 20, 10, 5, 5], [30, 20, 10, 5], [30, 30, 15], [30, 30, 15]]
lmaxes : [4, 3, 2, 2]
max_radius : 3.0
number_of_basis : 20
radial_h : 20
radial_layers : 1
n_norm : 8.0
batch_norm_momentum : 0.02
radial_model : None
Rs_in : [(7, 0, 1)]
Rs_out : [(1, 0, 1)]


In [4]:
model = VariableParityNetwork(**model_kwargs)

In [5]:
model.load_state_dict(model_dict["state_dict"])

<All keys matched successfully>

In [6]:
all_elements = model_dict["all_elements"]
print(all_elements)

[6, 1, 7, 8, 16, 9, 17]


In [7]:
# read a random geometry of acetone and its atomic numbers
import h5py
with h5py.File("../acetone/acetone-b3lyp_d3bj-631gd-gas-NMR-pcSseg_1.hdf5", "r") as h5:
    geoms_and_shieldings = np.array(h5.get("data"))
with h5py.File("../acetone/acetone-b3lyp_d3bj-631gd-gas-equilibrium_geometry.hdf5", "r") as h5:
    atomic_numbers = np.array(h5.get("atomic_numbers"))
# x,y,z,shielding
geometry = torch.tensor(geoms_and_shieldings[0,:,:3], dtype=torch.float64)
shieldings = torch.tensor(geoms_and_shieldings[0,:,-1], dtype=torch.float64)
print(geometry)
print(atomic_numbers)

tensor([[ 0.0187,  0.1518,  0.0397],
        [-0.0137,  1.3979,  0.0135],
        [-1.3000, -0.5873, -0.0089],
        [-1.0145, -1.4166, -0.5953],
        [-1.5199, -1.2240,  0.9375],
        [-2.2508,  0.0196, -0.4406],
        [ 1.2645, -0.5870, -0.0262],
        [ 1.5569, -1.0289,  1.0086],
        [ 1.2408, -1.4952, -0.7166],
        [ 2.1978,  0.0966, -0.3300]])
[6 8 6 1 1 1 6 1 1 1]


In [8]:
# generates one-hots for a list of atomic_symbols
def get_one_hots(atomic_symbols):
    one_hots = []
    for symbol in atomic_symbols:
        inner_list = [ 1. if symbol == i else 0. for i in all_elements ]
        one_hots.append(inner_list)
    return torch.tensor(one_hots, dtype=torch.float64)
one_hots = get_one_hots(atomic_numbers)
print(all_elements)
print(atomic_numbers)
print(one_hots)

[6, 1, 7, 8, 16, 9, 17]
[6 8 6 1 1 1 6 1 1 1]
tensor([[1., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0.]])


In [9]:
# turn input into a form that e3nn can understand
import e3nn.point.data_helpers as dh 
data = dh.DataNeighbors(x=one_hots, Rs_in = model_kwargs["Rs_in"], pos = geometry, r_max=model_kwargs["max_radius"], Rs_out=model_kwargs["Rs_out"])

In [10]:
model.eval()  # because of batchnorm
with torch.no_grad():  # do not compute gradients
    output = model(data.x, data.edge_index, data.edge_attr, n_norm=model_kwargs["n_norm"])
print(output) # this is the prediction of the model
print(shieldings) # sanity check

tensor([[ -32.3423],
        [-445.4471],
        [ 149.4786],
        [  30.5736],
        [  27.0376],
        [  26.9489],
        [ 152.5420],
        [  27.5167],
        [  28.2830],
        [  27.9769]])
tensor([  91.3635, -402.2257,  199.6065,   31.0486,   28.9889,   28.2823,
         200.1423,   29.0782,   29.3665,   28.2578])


In [11]:
print("  #     Z   predicted   expected   residual")
for i,atomic_number in enumerate(atomic_numbers):
    error = float(output[i])-float(shieldings[i])
    print(f"{i:3d}    {atomic_number:2d}    {float(output[i]):8.2f}   {float(shieldings[i]):8.2f}   {error:8.2f}")

  #     Z   predicted   expected   residual
  0     6      -32.34      91.36    -123.71
  1     8     -445.45    -402.23     -43.22
  2     6      149.48     199.61     -50.13
  3     1       30.57      31.05      -0.47
  4     1       27.04      28.99      -1.95
  5     1       26.95      28.28      -1.33
  6     6      152.54     200.14     -47.60
  7     1       27.52      29.08      -1.56
  8     1       28.28      29.37      -1.08
  9     1       27.98      28.26      -0.28


In [12]:
import cctk, glob
import e3nn.point.data_helpers as dh 

def compare_predictions(gaussian_output):
    molecule = gaussian_output.get_molecule()
    # make geometry and shieldings a tensor
    geometry = torch.tensor(molecule.geometry.view(np.ndarray), dtype=torch.float64)
    shieldings = torch.tensor(gaussian_output.ensemble[:,"isotropic_shielding"], dtype=torch.float64)
    # generate one-hots
    one_hots = []
    for symbol in molecule.atomic_numbers.view(np.ndarray):
        inner_list = [ 1. if symbol == i else 0. for i in all_elements ]
        one_hots.append(inner_list)
    one_hots = torch.tensor(one_hots, dtype=torch.float64)
    
    assert len(one_hots) == molecule.num_atoms()
    
    # something data something
    data = dh.DataNeighbors(x=one_hots, Rs_in = model_kwargs["Rs_in"], pos = geometry, r_max=model_kwargs["max_radius"], Rs_out=model_kwargs["Rs_out"])
    
    model.eval()  # because of batchnorm
    with torch.no_grad():  # do not compute gradients
        output = model(data.x, data.edge_index, data.edge_attr, n_norm=model_kwargs["n_norm"])

    residuals = [float(output[i]) - float(shieldings[i]) for i in range(molecule.num_atoms())]
    return output, shieldings, np.array(residuals)


In [18]:
filenames = "../../../testing/final_nmrs/*.out" # buncha nmr outfiles i have. need to change relative path for your system
for filename in glob.glob(filenames):
    name = filename.split("/")[-1].replace("_nmr.out", "")
    print(name + " (11 files):")
    outfiles = cctk.GaussianFile.read_file(filename)
    Z = outfiles[0].get_molecule().atomic_numbers.view(np.ndarray)
    
    row1 = "#"
    row2 = "H"
    row3 = "C"
    
    avgh = 0
    avgc = 0
    
    for jigglenum, outfile in enumerate(outfiles):
        #print("pred\tactual\tresid")
        pred, actual, resid = compare_predictions(outfile)
        #for i in range(len(resid)):
            #print(f"{float(pred[i]):.2f}\t{float(actual[i]):.2f}\t{resid[i]:.2f}")
        h_resids = []
        c_resids = []
        
        for i in range(len(resid)):
            if Z[i] == 1:
                h_resids.append(resid[i])
            elif Z[i] == 6:
                c_resids.append(resid[i])
                
        h_mae, c_mae = 0, 0
        if len(h_resids):
            h_mae = sum([abs(h) for h in h_resids]) / len(h_resids)
        if len(c_resids):
            c_mae = sum([abs(c) for c in c_resids]) / len(c_resids)

        if jigglenum:
            avgh += h_mae
            avgc += c_mae
        
        row1 += f"\t{jigglenum:02d}  "
        row2 += f"\t{h_mae:.2f}"
        row3 += f"\t{c_mae:.2f}"
        
    row1 += "\tmean"
    row2 += f"\t{avgh/10:.2f}"
    row3 += f"\t{avgc/10:.2f}"
    
    print(row1)
    print(row2)
    print(row3)
    print("")

thiophene (11 files):
#	00  	01  	02  	03  	04  	05  	06  	07  	08  	09  	10  	mean
H	0.31	0.58	0.51	0.28	0.30	0.58	0.51	0.31	0.89	0.47	0.56	0.50
C	1.09	0.52	0.82	1.60	2.35	1.75	1.89	1.02	0.76	2.15	1.13	1.40

tetrahydrofuran (11 files):
#	00  	01  	02  	03  	04  	05  	06  	07  	08  	09  	10  	mean
H	0.23	0.22	0.29	0.23	0.28	0.15	0.21	0.40	0.17	0.44	0.19	0.26
C	1.06	0.78	1.33	0.72	1.31	0.54	0.80	0.63	0.99	1.21	0.89	0.92

triethylamine (11 files):
#	00  	01  	02  	03  	04  	05  	06  	07  	08  	09  	10  	mean
H	0.32	0.31	0.38	0.34	0.30	0.34	0.49	0.28	0.34	0.38	0.33	0.35
C	1.14	1.36	0.76	0.89	0.33	0.78	0.97	0.94	0.61	0.97	0.89	0.85

methane (11 files):
#	00  	01  	02  	03  	04  	05  	06  	07  	08  	09  	10  	mean
H	0.70	0.53	0.21	0.59	0.16	0.49	0.37	0.40	0.45	0.38	0.26	0.38
C	0.24	0.02	0.28	0.45	0.52	0.52	0.08	0.22	0.19	0.33	0.09	0.27

pyridine (11 files):


KeyboardInterrupt: 

In [15]:
filename = "../../../testing/final_nmrs/methane_nmr.out" 

name = filename.split("/")[-1].replace("_nmr.out", "")
print(name + " (11 files):")
outfiles = cctk.GaussianFile.read_file(filename)
Z = outfiles[0].get_molecule().atomic_numbers.view(np.ndarray)

H_shifts = np.zeros(shape=(11,4))
H_expect = np.zeros(shape=(11,4))

C_shifts = np.zeros(shape=(11,1))
C_expect = np.zeros(shape=(11,1))

for i, outfile in enumerate(outfiles):
    pred, actual, resid = compare_predictions(outfile)
    
    for j in range(len(resid)):
        if Z[j] == 1:
            next_idx = (H_shifts[i] == 0)[0]
            H_shifts[i][next_idx] = float(pred[j])
            H_expect[i][next_idx] = float(actual[j])
        elif Z[j] == 6:
            next_idx = (C_shifts[i] == 0)[0]
            C_shifts[i][next_idx] = float(pred[j])
            C_expect[i][next_idx] = float(actual[j])

print(H_shifts)
print(H_expect)

print(C_shifts)
print(C_expect)

methane (11 files):


FileNotFoundError: [Errno 2] No such file or directory: '../../../testing/final_nmrs/methane_nmr.out'