In [1]:
import sys
import numpy as np
from importlib import reload
import networkx as nx

# sys.path.insert(0, '/path/to/temp_graph/')
sys.path.insert(0, '/home/jurgis/PAPER/Temperature-transferable-NFF')

import torch
import torch.nn as nn
import copy
import torch.nn.functional as F

from nff.train import Trainer, get_trainer, get_model, loss, hooks, metrics, evaluate, load_model
from torch.optim import Adam
from sklearn.metrics import mean_absolute_error

from torch.utils.data import DataLoader
import nff.data as d
import pickle

from nff.data import Dataset, split_train_validation_test, collate_dicts, sparsify_tensor
from nff.io.ase import * 
from nff.nn.glue import Stack

from ase import Atoms
from ase.neighborlist import neighbor_list
from nff.data.sparse import sparsify_array

from nff.md.nvt import * 
from ase import units
from nff.io import NeuralFF
from nff.md.nve import * 

import pandas as pd



In [2]:
def data_loader():
    ## There are more file names, because when training on the full dataset with 
    ## multiple temperatures, all of them are loaded together
    PATH = './data'
    CG_PATH1 = '{}/p300_CG_T300_intra7_inter8.pkl'.format(PATH)
#     CG_PATH2 = '{}/p300_CG{}_T350_intra7_inter8.pkl'.format(PATH, CG_key)
#     CG_PATH3 = '{}/p300_CG{}_T450_intra7_inter8.pkl'.format(PATH, CG_key)
#     CG_PATH4 = '{}/p300_CG{}_T500_intra7_inter8.pkl'.format(PATH, CG_key)

    props = pickle.load( open( CG_PATH1, "rb" ) )
#     props2 = pickle.load( open( CG_PATH2, "rb" ) )
#     props3 = pickle.load( open( CG_PATH3, "rb" ) )
#     props4 = pickle.load( open( CG_PATH4, "rb" ) )

    props['cell'] = [torch.Tensor(props['cell'][i]) for i, cell in enumerate(props['cell'])]
#     props2['cell'] = [torch.Tensor(props2['cell'][i]) for i, cell in enumerate(props2['cell'])]
#     props3['cell'] = [torch.Tensor(props3['cell'][i]) for i, cell in enumerate(props3['cell'])]
#     props4['cell'] = [torch.Tensor(props4['cell'][i]) for i, cell in enumerate(props4['cell'])]

#     for key in props:
#         props[key] = props[key] + props2[key] + props3[key] + props4[key]

    bond_dic = {'CCCC': [[5 * i, 5 * i + 1,
                          5 * i + 1, 5 * i + 2,
                          5 * i + 2, 5 * i + 3] for i in range(300)]}
    bond_dic['CCCC'] = torch.LongTensor( np.array(bond_dic['CCCC']).reshape(900,2).tolist())

#     bond_dic = {'CCCC': [[3 * i, 3 * i + 1] for i in range(300)]}
#     bond_dic['CCCC'] = torch.LongTensor( np.array(bond_dic['CCCC']).reshape(300,2).tolist())
    
    # xyz = torch.Tensor([props['nxyz'][i][:,1:4] for i in range(len(props['nxyz']))])

    # r = (xyz[:, bond_dic['CCCC'][:,0]] - xyz[:, bond_dic['CCCC'][:,1]]).pow(2).sum(-1).sqrt()

    # print(r.reshape(-1, 300, 3)[:,:,0].mean())
    # print(r.reshape(-1, 300, 3)[:,:,1].mean())
    # print(r.reshape(-1, 300, 3)[:,:,2].mean())

    props['bonds'] = [bond_dic['CCCC'] for i in range(len(props['nxyz']))]
    props['num_bonds'] = [torch.LongTensor([3]) for i in range(len(props['nxyz']))]
    props['bond_len'] = [torch.Tensor([2.2439, 2.8182, 2.5558]) for i in range(len(props['nxyz']))]

    props['smiles'] = ['CCCC'] * len(props['nxyz'])
    
    temp = 1/np.array([300,350,450,500])
    props['temp'] = torch.cat([torch.zeros(99)+temp[0]])#,torch.zeros(5997)+temp[1],
#                               torch.zeros(5997)+temp[2],torch.zeros(5997)+temp[3]])

    dataset = d.Dataset(props.copy(), units='kcal/mol')

    train, val, test = split_train_validation_test(dataset, val_size=0.1, test_size=0.01)

    train_loader = DataLoader(train, batch_size=1, collate_fn=collate_dicts)
    val_loader = DataLoader(val, batch_size=1, collate_fn=collate_dicts)
    test_loader = DataLoader(test, batch_size=1, collate_fn=collate_dicts)
    
    return dataset, train_loader, val_loader, test_loader

In [3]:
dataset, train_loader, val_loader, test_loader = data_loader()

In [4]:
from nff.utils import batch_to
batch = batch_to( next(iter(train_loader)), "cpu")

### Running the default model, without any changes

In [5]:
# Parameters for the model described in the paper
modelparams = dict()
modelparams['n_atom_basis'] = 160
modelparams['n_filters'] = 256
modelparams['n_gaussians'] = 64
modelparams['mol_n_convolutions'] = 3
modelparams['sys_n_convolutions'] = 3
modelparams['mol_cutoff'] = 7
modelparams['sys_cutoff'] = 8
modelparams["V_ex_power"] = 10
modelparams["V_ex_sigma"] = 3.8553023965125024
modelparams['dropout_rate'] = 0
modelparams['temp_type'] = 'mult'

bondparams = dict()
bondparams['k'] = 20.758080279097705
bondparams['dif_bond_len'] = True

bondprior = get_model(bondparams, model_type='BondPrior')
temp_transfer = get_model(modelparams, model_type='cg_temp_graph')

In [6]:
from nff.nn.glue import Stack
model_dict = dict()
model_dict['bondprior'] = bondprior
model_dict['temp_transfer'] = temp_transfer
stack = Stack(model_dict, mode='sum')
stack(batch)

{'energy': tensor([[3161.5049]], grad_fn=<AsStridedBackward>),
 'energy_grad': tensor([[  0.2339,  -0.5899,   1.5959],
         [ -2.0033,   0.6025,  -3.8252],
         [-11.3451,  22.1112,   7.0605],
         ...,
         [  6.2783,  -3.3613,   2.4024],
         [  0.7824,   1.4860,  -0.0779],
         [  1.8330,  -0.6764,   4.4105]], grad_fn=<AddBackward0>)}

In [7]:
OUTDIR = './example/training/dummy'

In [8]:
loss_fn = loss.build_mse_loss(loss_coef={'energy_grad': 1})


trainable_params = filter(lambda p: p.requires_grad, stack.parameters()) # CHANGE PARAMTERS
optimizer = Adam(trainable_params, lr=3e-4)


train_metrics = [
    metrics.MeanAbsoluteError('energy_grad')
]

from shutil import rmtree
import os

train_hooks = [
    hooks.MaxEpochHook(100),
    hooks.CSVHook(
        OUTDIR,
        metrics=train_metrics,
    ),
    hooks.PrintingHook(
        OUTDIR,
        metrics=train_metrics,
        separator = ' | ',
        time_strf='%M:%S'
    ),
    hooks.ReduceLROnPlateauHook(
        optimizer=optimizer,
        patience=30,
        factor=0.5,
        min_lr=1e-7,
        window_length=1,
        stop_after_min=True
    )
]

if os.path.exists(OUTDIR):
    print('exists')
    rmtree(OUTDIR)

exists


In [9]:
T = Trainer(
    model_path=OUTDIR,
    model=stack,
    loss_fn=loss_fn,
    optimizer=optimizer,
    train_loader=train_loader,
    validation_loader=val_loader,
    checkpoint_interval=1,
    hooks=train_hooks
)

In [10]:
T.train(device=0, n_epochs=15)

 Time | Epoch | Learning rate | Train loss | Validation loss | MAE_energy_grad | GPU Memory (MB)
25:54 |     1 |     3.000e-04 |   309.8748 |        298.8344 |         12.3961 |             522


#### The models used in the paper are reported in ./models repo.
The temperature transferable embedding model is located in the t-nff directory, and the non temperature-embedding in the nff one.

In [16]:
OUTDIR = './models/t_nff'

In [17]:
model = load_model(OUTDIR)

In [18]:
from ase import Atoms
from ase.neighborlist import neighbor_list
from nff.data.sparse import sparsify_array

from nff.md.nvt import * 
from ase import units
from nff.io import NeuralFF
from nff.md.nve import * 


DEFAULT_CUTOFF = 5.0

system_prop = {key: val[0] for key, val in dataset.props.items()}
system_prop['atoms_cutoff'] = 7
system_prop['system_cutoff'] = 8
system_prop['temp'] = torch.zeros(1)+1/300

In [19]:
from nff.io.ase import BulkPhaseMaterials

bulk = BulkPhaseMaterials(numbers=[1, 2, 3, 4, 5] * 300, 
                          positions=dataset.props['nxyz'][0][:, 1:4],
                          cell=dataset.props['cell'][0],
                          pbc=True,
                          props=system_prop)
bulk.set_masses( [29.0407, 53.0607, 28.052, 29.06, 86.804612] * 300) # mass of cg atoms  
bulk.update_nbr_list()

In [20]:
timestep = 1
steps = 2000
temperature = 300

path = '{}/ase_T300'.format(OUTDIR)
if os.path.exists(path):
    print('exists')
else:
    os.makedirs(path)

DEFAULTNVEPARAMS = {
    'T_init': temperature, 
    'thermostat': NoseHoover,   
    'thermostat_params': {'timestep': timestep * units.fs, "temperature": temperature * units.kB,  "ttime": 20.0},
    'nbr_list_update_freq': 10,
    'steps': steps/timestep,
    'save_frequency': 100/timestep,
    'thermo_filename': '{}/thermo.log'.format(path, temperature), 
    'traj_filename': '{}/atoms.traj'.format(path, temperature),
    'skip': 0
}


calc = NeuralFF(model=model, device=0)
bulk.set_calculator(calc)
nve = Dynamics(bulk, DEFAULTNVEPARAMS)

nve.run()

Time[ps]      Etot[eV]     Epot[eV]     Ekin[eV]    T[K]
0.0000           153.51        92.00        61.51   317.2

0.1000           119.29        65.89        53.41   275.5

0.2000           105.91        51.37        54.54   281.3

0.3000           108.45        46.59        61.86   319.1

0.4000           108.63        46.00        62.63   323.0

0.5000            97.73        42.52        55.21   284.7

0.6000            95.32        40.07        55.25   284.9

0.7000            98.35        37.83        60.52   312.2

0.8000           101.52        38.78        62.75   323.6

0.9000            91.97        36.13        55.84   288.0

1.0000            91.23        36.74        54.49   281.0

1.1000            94.46        35.69        58.76   303.1

1.2000            97.60        35.55        62.05   320.0

1.3000            91.03        34.07        56.96   293.8

1.4000            88.79        33.75        55.03   283.8

1.5000            92.47        31.37        61.09   315.1
