In [1]:
import sys
import numpy as np
from importlib import reload
import networkx as nx

# sys.path.insert(0, '/path/to/temp_graph/')

import torch
import torch.nn as nn
import copy
import torch.nn.functional as F

from nff.train import Trainer, get_trainer, get_model, loss, hooks, metrics, evaluate, load_model
from torch.optim import Adam
from sklearn.metrics import mean_absolute_error

from torch.utils.data import DataLoader
import nff.data as d
import pickle

from nff.data import Dataset, split_train_validation_test, collate_dicts, sparsify_tensor
from nff.io.ase import * 
from nff.nn.glue import Stack

from ase import Atoms
from ase.neighborlist import neighbor_list
from nff.data.sparse import sparsify_array

from nff.md.nvt import * 
from ase import units
from nff.io import NeuralFF
from nff.md.nve import * 

import pandas as pd



In [8]:
def data_loader():
    ## There are more file names, because when training on the full dataset with 
    ## multiple temperatures, all of them are loaded together
    PATH = './data'
    CG_PATH1 = '{}/p300_CG5_T300_intra7_inter8.pkl'.format(PATH)
    CG_PATH2 = '{}/p300_CG5_T350_intra7_inter8.pkl'.format(PATH)
    CG_PATH3 = '{}/p300_CG5_T450_intra7_inter8.pkl'.format(PATH)
    CG_PATH4 = '{}/p300_CG5_T500_intra7_inter8.pkl'.format(PATH)

    props = pickle.load( open( CG_PATH1, "rb" ) )
    props2 = pickle.load( open( CG_PATH2, "rb" ) )
    props3 = pickle.load( open( CG_PATH3, "rb" ) )
    props4 = pickle.load( open( CG_PATH4, "rb" ) )

    props['cell'] = [torch.Tensor(props['cell'][i]) for i, cell in enumerate(props['cell'])]
    props2['cell'] = [torch.Tensor(props2['cell'][i]) for i, cell in enumerate(props2['cell'])]
    props3['cell'] = [torch.Tensor(props3['cell'][i]) for i, cell in enumerate(props3['cell'])]
    props4['cell'] = [torch.Tensor(props4['cell'][i]) for i, cell in enumerate(props4['cell'])]

    for key in props:
        props[key] = props[key] + props2[key] + props3[key] + props4[key]

    bond_dic = {'CCCC': [[5 * i, 5 * i + 1,
                          5 * i + 1, 5 * i + 2,
                          5 * i + 2, 5 * i + 3] for i in range(300)]}
    bond_dic['CCCC'] = torch.LongTensor( np.array(bond_dic['CCCC']).reshape(900,2).tolist())

    props['bonds'] = [bond_dic['CCCC'] for i in range(len(props['nxyz']))]
    props['num_bonds'] = [torch.LongTensor([3]) for i in range(len(props['nxyz']))]
    props['bond_len'] = [torch.Tensor([2.2439, 2.8182, 2.5558]) for i in range(len(props['nxyz']))]

    props['smiles'] = ['CCCC'] * len(props['nxyz'])
    
    temp = 1/np.array([300,350,450,500])
    props['temp'] = torch.cat([torch.zeros(5997)+temp[0],torch.zeros(5997)+temp[1],
                              torch.zeros(5997)+temp[2],torch.zeros(5997)+temp[3]])

    dataset = d.Dataset(props.copy(), units='kcal/mol')

    train, val, test = split_train_validation_test(dataset, val_size=0.1, test_size=0.01)

    train_loader = DataLoader(train, batch_size=1, collate_fn=collate_dicts)
    val_loader = DataLoader(val, batch_size=1, collate_fn=collate_dicts)
    test_loader = DataLoader(test, batch_size=1, collate_fn=collate_dicts)
    
    return dataset, train_loader, val_loader, test_loader

In [9]:
dataset, train_loader, val_loader, test_loader = data_loader()

In [10]:
from nff.utils import batch_to
batch = batch_to( next(iter(train_loader)), "cpu")

### Running the default model, without any changes

In [11]:
# Parameters for the model described in the paper
modelparams = dict()
modelparams['n_atom_basis'] = 240
modelparams['n_filters'] = 256
modelparams['n_gaussians'] = 80
modelparams['mol_n_convolutions'] = 3
modelparams['sys_n_convolutions'] = 2
modelparams['mol_cutoff'] = 7
modelparams['sys_cutoff'] = 8
modelparams["V_ex_power"] = 7
modelparams["V_ex_sigma"] = 5.730579
modelparams['dropout_rate'] = 0
modelparams['temp_type'] = 'mult'

bondparams = dict()
bondparams['k'] = 11.286249
bondparams['dif_bond_len'] = True

bondprior = get_model(bondparams, model_type='BondPrior')
temp_transfer = get_model(modelparams, model_type='cg_temp_graph')

In [12]:
from nff.nn.glue import Stack
model_dict = dict()
model_dict['bondprior'] = bondprior
model_dict['temp_transfer'] = temp_transfer
stack = Stack(model_dict, mode='sum')
stack(batch)

{'energy': tensor([[31372.5039]], grad_fn=<AsStridedBackward>),
 'energy_grad': tensor([[ -9.5633,  -7.0947,   0.6039],
         [ -4.9302,  -7.5194,  23.1142],
         [ -6.7983,   9.3507,   7.6828],
         ...,
         [ 14.8118,  10.5061,   4.0798],
         [-20.4616, -12.3678,  -2.4416],
         [ 10.1174,   0.5738,   7.3089]], grad_fn=<AddBackward0>)}

In [23]:
OUTDIR = './models/t_nff'

In [16]:
loss_fn = loss.build_mse_loss(loss_coef={'energy_grad': 1})


trainable_params = filter(lambda p: p.requires_grad, stack.parameters()) # CHANGE PARAMTERS
optimizer = Adam(trainable_params, lr=3e-4)


train_metrics = [
    metrics.MeanAbsoluteError('energy_grad')
]

from shutil import rmtree
import os

train_hooks = [
    hooks.MaxEpochHook(100),
    hooks.CSVHook(
        OUTDIR,
        metrics=train_metrics,
    ),
    hooks.PrintingHook(
        OUTDIR,
        metrics=train_metrics,
        separator = ' | ',
        time_strf='%M:%S'
    ),
    hooks.ReduceLROnPlateauHook(
        optimizer=optimizer,
        patience=30,
        factor=0.5,
        min_lr=1e-7,
        window_length=1,
        stop_after_min=True
    )
]

if os.path.exists(OUTDIR):
    print('exists')
    rmtree(OUTDIR)

In [17]:
T = Trainer(
    model_path=OUTDIR,
    model=stack,
    loss_fn=loss_fn,
    optimizer=optimizer,
    train_loader=train_loader,
    validation_loader=val_loader,
    checkpoint_interval=1,
    hooks=train_hooks
)

In [None]:
T.train(device=0, n_epochs=15)

 Time | Epoch | Learning rate | Train loss | Validation loss | MAE_energy_grad | GPU Memory (MB)
11:56 |     1 |     3.000e-04 |   364.5730 |        355.2454 |         13.4167 |            1187
32:33 |     2 |     3.000e-04 |   355.2182 |        353.7612 |         13.3803 |            1187


#### The models used in the paper are reported in ./models repo.
The temperature transferable embedding model is located in the t-nff directory, and the non temperature-embedding in the nff one.

In [16]:
OUTDIR = './models/t_nff'

In [24]:
model = load_model(OUTDIR)

In [None]:
from ase import Atoms
from ase.neighborlist import neighbor_list
from nff.data.sparse import sparsify_array

from nff.md.nvt import * 
from ase import units
from nff.io import NeuralFF
from nff.md.nve import * 


DEFAULT_CUTOFF = 5.0

system_prop = {key: val[0] for key, val in dataset.props.items()}
system_prop['atoms_cutoff'] = 7
system_prop['system_cutoff'] = 8
system_prop['temp'] = torch.zeros(1)+1/400

In [None]:
from nff.io.ase import BulkPhaseMaterials

bulk = BulkPhaseMaterials(numbers=[1, 2, 3, 4, 5] * 300, 
                          positions=dataset.props['nxyz'][0][:, 1:4],
                          cell=dataset.props['cell'][0],
                          pbc=True,
                          props=system_prop)
bulk.set_masses( [29.0407, 53.0607, 28.052, 29.06, 86.804612] * 300) # mass of cg atoms  
bulk.update_nbr_list()

In [None]:
timestep = 1
steps = 10000
temperature = 400

path = '{}/ase_T400'.format(OUTDIR)
if os.path.exists(path):
    print('exists')
else:
    os.makedirs(path)

DEFAULTNVEPARAMS = {
    'T_init': temperature, 
    'thermostat': NoseHoover,   
    'thermostat_params': {'timestep': timestep * units.fs, "temperature": temperature * units.kB,  "ttime": 20.0},
    'nbr_list_update_freq': 10,
    'steps': steps/timestep,
    'save_frequency': 100/timestep,
    'thermo_filename': '{}/thermo.log'.format(path, temperature), 
    'traj_filename': '{}/atoms.traj'.format(path, temperature),
    'skip': 0
}


calc = NeuralFF(model=model, device=0)
bulk.set_calculator(calc)
nve = Dynamics(bulk, DEFAULTNVEPARAMS)

nve.run()