In [1]:
from functools import partial

import numpy as np
import h5py
import pandas as pd
from pandas import DataFrame, Series

import torch
torch.set_default_dtype(torch.float64)

import e3nn
import e3nn.point
import e3nn.radial
import e3nn.kernel
from e3nn.point.operations import Convolution
from e3nn.non_linearities import rescaled_act

In [2]:
# read equilibrium geometry data
with h5py.File("carbon_monoxide-b3lyp_d3bj-631gd-gas-equilibrium_geometry.hdf5", "r") as h5:
    equilibrium_geometry = np.array(h5.get("equilibrium_geometry"))
    atomic_numbers = np.array(h5.get("atomic_numbers"), dtype=np.int16)
    isotropic_shieldings = np.array(h5.get("isotropic_shieldings"))
    hirshfeld_charges = np.array(h5.get("hirshfeld_charges"))
    mulliken_charges = np.array(h5.get("mulliken_charges"))

In [3]:
# print out equilibrium data
# not including charge properties for now
print("equilibrium geometry")
print(atomic_numbers)
print(equilibrium_geometry)
equilibrium_list = [atomic_numbers, isotropic_shieldings, hirshfeld_charges, mulliken_charges]
equilibrium_list = np.array(equilibrium_list).T.copy()
columns = ["Z", "sigma", "z_hir", "z_mul"]
equilibrium_df = DataFrame(equilibrium_list, columns=columns)
equilibrium_df.index = [ i+1 for i in range(len(equilibrium_df)) ]
equilibrium_df.index.name = "atom#"
print()
print("equilibrium properties")
display(equilibrium_df)

equilibrium geometry
[6 8]
[[ 0.        0.       -0.650261]
 [ 0.        0.        0.487695]]

equilibrium properties


Unnamed: 0_level_0,Z,sigma,z_hir,z_mul
atom#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,6.0,-3.6465,0.0828,0.143074
2,8.0,-64.82,-0.0828,-0.143074


In [4]:
# read perturbed data
with h5py.File("carbon_monoxide-b3lyp_d3bj-631gd-gas-NMR-pcSseg_1.hdf5", "r") as h5:
    geoms_and_shieldings = np.array(h5.get("data"))

In [5]:
# print out non-equilbrium data
print("near-equilibrium data")
shape = np.shape(geoms_and_shieldings)
n_geometries = shape[0]
print(f"there are {n_geometries} geometries ({shape[1]} atoms per geometry)")
print()
geometries = geoms_and_shieldings[:,:,:3]
shieldings = geoms_and_shieldings[:,:,-1]
geoms_and_shieldings = None
print("first geometry")
print(geometries[0])
print()
print("first set of shieldings")
print(shieldings[0])

near-equilibrium data
there are 126 geometries (2 atoms per geometry)

first geometry
[[ 0.        0.       -0.428571]
 [ 0.        0.        0.321429]]

first set of shieldings
[119.2698 152.1464]


In [6]:
# create one-hot encodings for Z (atomic number)
atomic_number_dummies = pd.get_dummies(equilibrium_df.Z)
atomic_number_dummies["equilibrium shielding"] = equilibrium_df["sigma"]
display(atomic_number_dummies)
atomic_number_dummies = atomic_number_dummies.to_numpy()
atomic_number_dummies = np.repeat(atomic_number_dummies[None,:], n_geometries, axis=0)
print(np.shape(atomic_number_dummies))
print(atomic_number_dummies[0])

Unnamed: 0_level_0,6.0,8.0,equilibrium shielding
atom#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1,0,-3.6465
2,0,1,-64.82


(126, 2, 3)
[[  1.       0.      -3.6465]
 [  0.       1.     -64.82  ]]


In [7]:
# setup torch training data
torch_geometry = torch.tensor(geometries)
print("point clouds: 126 geometries, 2 atoms, xyz")
print(torch_geometry[0])
print(torch_geometry.shape)
print()
torch_features = torch.tensor(atomic_number_dummies)
print("features: two one-hots for carbon, oxygen; one shielding in the equilibrium structure")
print(torch_features[0])
print(torch_features.shape)
print()
print("outputs: perturbed shieldings")
torch_outputs = torch.tensor(shieldings)
print(torch_outputs[0])
print(torch_outputs.shape)

point clouds: 126 geometries, 2 atoms, xyz
tensor([[ 0.0000,  0.0000, -0.4286],
        [ 0.0000,  0.0000,  0.3214]], dtype=torch.float32)
torch.Size([126, 2, 3])

features: two one-hots for carbon, oxygen; one shielding in the equilibrium structure
tensor([[  1.0000,   0.0000,  -3.6465],
        [  0.0000,   1.0000, -64.8200]], dtype=torch.float32)
torch.Size([126, 2, 3])

outputs: perturbed shieldings
tensor([119.2698, 152.1464], dtype=torch.float32)
torch.Size([126, 2])


In [8]:
### "hello world": train on non-rotated data, test on rotated data

# define radial basis functions
# what are "h" and "number_of_basis"?
radial_layers = 2
softplus = rescaled_act.Softplus(beta=5)
max_radius = 3.0
number_of_basis = 3

RadialModel = partial(e3nn.radial.CosineBasisModel, max_radius=max_radius,
                      number_of_basis=number_of_basis, h=100,
                      L=radial_layers, act=softplus)
radial_kernel = partial(e3nn.kernel.Kernel, RadialModel=RadialModel)

# define the convolution
Rs_in = [(3, 0)] # Two (3) scalar (L=0) channels: carbon one-hot, oxygen one-hot, equilibrium shielding
Rs_out = [(4, 0), (4, 1), (4, 2)]
convolution = Convolution(radial_kernel, Rs_in, Rs_out) 

TypeError: __init__() takes 2 positional arguments but 4 were given