In [1]:
import sys
from pathlib import Path

# change to your NFF path
sys.path.insert(0, "..")
sys.path.insert(0, "../..")
sys.path.insert(0, "../../../")

import os
import shutil
import numpy as np
import matplotlib.pyplot as plt
import copy

import torch
from torch.optim import Adam
from torch.utils.data import DataLoader
from torch.utils.data.sampler import RandomSampler

from nff.data import Dataset, split_train_validation_test, collate_dicts, to_tensor
from nff.train import Trainer, get_trainer, get_model, load_model, loss, hooks, metrics, evaluate

import argparse
from sigopt import Connection

from train import train
from forceconv import *

from MD17data import *

from forcepai import ForcePai
# from nff.nn.models import Painn
from forcedime import ForceDime

In [2]:
parser = argparse.ArgumentParser()
parser.add_argument("-logdir", type=str, default='./output')
parser.add_argument("-device", type=int, default=0)
parser.add_argument("-data", type=str, default='ethanol_dft')
params = vars(parser.parse_args([]))

DEVICE = params['device']
OUTDIR = '{}/{}/sandbox'.format(params['logdir'], 'test_ForcePai')

BATCH_SIZE = 10
lr = 1e-5
n_epochs = 100

In [3]:
data = get_MD17data(params['data'])
dataset = pack_MD17data(data, 10000)
dataset.generate_angle_list()
dataset.generate_kj_ji()

100%|██████████| 10000/10000 [00:31<00:00, 316.11it/s]


Adding kj and ji indices with 1 parallel processes


100%|██████████| 10000/10000 [00:03<00:00, 2741.51it/s]


In [4]:
train, val, test = split_train_validation_test(dataset, val_size=0.05, test_size=0.85)
train_loader = DataLoader(train, batch_size=BATCH_SIZE, collate_fn=collate_dicts)
val_loader = DataLoader(val, batch_size=BATCH_SIZE, collate_fn=collate_dicts)
test_loader = DataLoader(test, batch_size=BATCH_SIZE, collate_fn=collate_dicts)

In [5]:
# modelparams = {"feat_dim": 128,
#               "activation": "swish",
#               "n_rbf": 20,
#               "cutoff": 5.0,
#               "num_conv": 3,
#               "output_keys": ["energy"],
#               "grad_keys": ["energy_grad"],
#                # whether to sum outputs from all blocks in the model
#                # or just the final output block. False in the original
#                # implementation
#               "skip_connection": False,
#                # Whether the k parameters in the Bessel basis functions
#                # are learnable. False originally
#               "learnable_k": False,
#                # dropout rate in the convolution layers, originally 0
#                "conv_dropout": 0.0,
#                # dropout rate in the readout layers, originally 0
#                "readout_dropout": 0.0,
#                # dictionary of means to add to each output key
#                # (this is optional - if you don't supply it then
#                # nothing will be added)
#                # "means": {"energy": train.props['energy'].mean().item()},
#                # dictionary of standard devations with which to 
#                # multiply each output key
#                # (this is optional - if you don't supply it then
#                # nothing will be multiplied)
#                # "stddevs": {"energy": train.props['energy'].std().item()}
#               }
# model = ForcePai(modelparams).to(DEVICE)
model = ForceDime(n_rbf=16, cutoff=5, 
                envelope_p=8, l_spher=6, 
                n_spher=6, embed_dim=128, 
                activation='ReLU', 
                n_bilinear=8, 
                n_convolutions=6).to(DEVICE)

In [6]:
loss_fn = loss.build_mse_loss(loss_coef={'energy_grad': 1})
trainable_params = filter(lambda p: p.requires_grad, model.parameters())
optimizer = Adam(trainable_params, lr=lr)
train_metrics = [
        metrics.MeanAbsoluteError('energy_grad')
    ]

In [None]:
train_hooks = [
    hooks.MaxEpochHook(n_epochs),
    hooks.CSVHook(
        OUTDIR,
        metrics=train_metrics,
    ),
    hooks.PrintingHook(
        OUTDIR,
        metrics=train_metrics,
        separator = ' | ',
        time_strf='%M:%S'
    ),
    hooks.ReduceLROnPlateauHook(
        optimizer=optimizer,
        patience=20,
        factor=0.5,
        min_lr=1e-7,
        window_length=1,
        stop_after_min=True
    )
]

T = Trainer(
    model_path=OUTDIR,
    model=model,
    loss_fn=loss_fn,
    optimizer=optimizer,
    train_loader=train_loader,
    validation_loader=val_loader,
    checkpoint_interval=1,
    hooks=train_hooks,
    mini_batches=1
)

T.train(device=DEVICE, n_epochs=1000)

In [6]:
from nff.utils.cuda import batch_to, batch_detach
data = None
for batch in train_loader:
    data = batch_to(batch, DEVICE)
    break

In [7]:
rotate = torch.Tensor([[2**.5/2, -2**.5/2, 0],
                       [2**.5/2, 2**.5/2, 0],
                       [0, 0, 1]]).to(DEVICE)
rotate

tensor([[ 0.7071, -0.7071,  0.0000],
        [ 0.7071,  0.7071,  0.0000],
        [ 0.0000,  0.0000,  1.0000]], device='cuda:0')

In [10]:
data['nxyz'][:, 1:4] @= rotate

In [8]:
results = model(data)

In [14]:
results['energy_grad'] @ rotate

tensor([[ 2.4089e+03,  3.0791e+03,  1.4598e+03],
        [-2.3518e+03,  2.8206e+02, -4.4779e+03],
        [-1.1090e+02, -4.7694e+03,  1.5016e+03],
        [ 1.9310e+03,  1.5531e+02, -1.0136e+02],
        [-1.8984e+03,  1.6706e+03,  7.3232e+02],
        [ 1.4531e+03,  1.1911e+03,  7.2989e+02],
        [ 1.5092e+03, -1.2851e+03, -9.3721e+02],
        [-2.2094e+03, -6.2140e+02,  1.5650e+03],
        [-7.3174e+02,  2.9770e+02, -4.7213e+02],
        [-2.0865e+03, -6.4635e+02, -2.4769e+03],
        [ 2.9930e+02,  3.2812e+03,  2.7352e+03],
        [ 2.3394e+03, -3.1117e+03,  1.7292e+02],
        [-1.1095e+03, -1.0708e+03,  1.8805e+03],
        [ 9.8922e+02,  1.0066e+03, -9.4703e+02],
        [-1.2877e+03, -2.5153e+02, -7.2854e+02],
        [ 7.5023e+02, -9.5565e+02,  1.5086e+03],
        [ 8.6068e+02,  5.7083e+02, -1.4099e+03],
        [-7.5514e+02,  1.1775e+03, -7.3489e+02],
        [-2.8446e+03, -5.9177e+01,  2.6900e+02],
        [ 2.0062e+03,  2.7296e+03,  1.4062e+03],
        [ 1.5942e+03

In [11]:
new_results = model(data)

In [13]:
new_results['energy_grad']

tensor([[ 2.4089e+03,  3.0791e+03,  1.4598e+03],
        [-2.3518e+03,  2.8206e+02, -4.4779e+03],
        [-1.1090e+02, -4.7694e+03,  1.5016e+03],
        [ 1.9310e+03,  1.5532e+02, -1.0137e+02],
        [-1.8984e+03,  1.6706e+03,  7.3232e+02],
        [ 1.4531e+03,  1.1911e+03,  7.2989e+02],
        [ 1.5092e+03, -1.2851e+03, -9.3721e+02],
        [-2.2094e+03, -6.2140e+02,  1.5650e+03],
        [-7.3174e+02,  2.9770e+02, -4.7213e+02],
        [-2.0865e+03, -6.4635e+02, -2.4769e+03],
        [ 2.9930e+02,  3.2812e+03,  2.7352e+03],
        [ 2.3394e+03, -3.1117e+03,  1.7292e+02],
        [-1.1095e+03, -1.0708e+03,  1.8805e+03],
        [ 9.8922e+02,  1.0066e+03, -9.4703e+02],
        [-1.2877e+03, -2.5153e+02, -7.2854e+02],
        [ 7.5023e+02, -9.5565e+02,  1.5086e+03],
        [ 8.6068e+02,  5.7083e+02, -1.4099e+03],
        [-7.5514e+02,  1.1775e+03, -7.3490e+02],
        [-2.8446e+03, -5.9175e+01,  2.6899e+02],
        [ 2.0062e+03,  2.7296e+03,  1.4062e+03],
        [ 1.5942e+03

In [82]:
from nff.utils.cuda import batch_to, batch_detach
data = None
for batch in train_loader:
    data = batch_to(batch, DEVICE)
    break

In [83]:
xyz, e_rbf, a_sbf, nbr_list, angle_list, num_atoms, z, kj_idx, ji_idx = model.get_prelims(data)


EPS = 1e-15


def norm(vec):
    result = ((vec ** 2 + EPS).sum(-1)) ** 0.5
    return result

def xyz2adjoint(xyz):
    ## angle adjoints
    ### identity matrix
    eye = torch.eye(3).unsqueeze(0).to(xyz)  # 1*3*3
    ### points from j -> i
    r_ji = xyz[angle_list[:, 0]] - xyz[angle_list[:, 1]]  # N_e*3
    d_ji = norm(r_ji)[:, None, None]  # N_e*1*1
    unit_ji = r_ji / d_ji.squeeze(-1)  # N_e*3
    ### points from j -> k
    r_jk = xyz[angle_list[:, 2]] - xyz[angle_list[:, 1]]  # N_e*3
    d_jk = norm(r_jk)[:, None, None]  # N_e*1*1
    unit_jk = r_jk / d_jk.squeeze(-1)  # N_e*3
    ### adjoint_ji
    kronecker_ji = r_ji.unsqueeze(-1) * r_ji.unsqueeze(-2)  # N_e*3*3
    angle_adjoint_ji = torch.einsum('ijk,ij->ik', (-eye*d_ji + kronecker_ji/d_ji)/d_ji**2, unit_jk)
    # angle_adjoint_ji = -1 / ((1-angle_adjoint_ji**2) + EPS) ** 0.5  #  N_angle*3
    ### adjoint_jk
    kronecker_jk = r_jk.unsqueeze(-1) * r_jk.unsqueeze(-2)  # N_e*3*3
    angle_adjoint_jk = torch.einsum('ijk,ij->ik', (-eye*d_jk + kronecker_jk/d_jk)/d_jk**2, unit_ji)
    # angle_adjoint_jk = -1 / ((1-angle_adjoint_jk**2) + EPS) ** 0.5  #  N_angle*3
    
    return angle_adjoint_ji, angle_adjoint_jk

a = xyz2adjoint(xyz)
b = xyz2adjoint(xyz@rotate)

In [84]:
a[0] @ rotate

tensor([[-0.1572,  0.3815, -0.0041],
        [ 0.3508, -0.0146,  0.0154],
        [-0.1712, -0.2301, -0.0092],
        ...,
        [-0.2771, -0.0931, -0.1060],
        [-0.2363, -0.2324, -0.0254],
        [ 0.0182, -0.3153,  0.1435]], device='cuda:0')

In [85]:
b[0]

tensor([[-0.1572,  0.3815, -0.0041],
        [ 0.3508, -0.0146,  0.0154],
        [-0.1712, -0.2301, -0.0092],
        ...,
        [-0.2771, -0.0931, -0.1060],
        [-0.2363, -0.2324, -0.0254],
        [ 0.0182, -0.3153,  0.1435]], device='cuda:0')