# Tutorial to build condense phase dataset for CG simulations 

In [38]:
import sys
import numpy as np
from importlib import reload
import torch
from torch.utils.data import DataLoader

# make sure NeuralForceField is in your path, if not add it to your python paths
# sys.path.insert(0, '/home/wwj/Repo/playgrounds/NeuralForceField/')

import pickle 
from nff.io.ase import NeuralFF, AtomsBatch, BulkPhaseMaterials
from ase import Atoms

This data has 64 ethane molecules with periodic box, each molecule has 8 atoms 
The coarse-grained representation reduce 8 atoms to 2 psudo atoms 

In [39]:
N = 64
N_atom = 8
N_cg = 2

This data has 64 ethane molecules with periodic box, each molecule has 8 atoms 
the coarse-grained representation reduce 8 atoms to 2 psudo atoms 

In [40]:

xyz = pickle.load( open( "data/C2_64_small_xyz.pickle", "rb" )).reshape(-1, N, N_atom, 3)
force = pickle.load(open('data/C2_64_small_force.pickle', "rb")).reshape(-1, N, N_atom, 3)

In [41]:
# definte single ethane molecules 
ethane = Atoms( numbers=[6, 6, 1, 1, 1, 1, 1, 1])

# define mapping operators 
CG_map = np.array([[0.0, 1.0, 0.00, 0.000, 0.000, 1.0, 1.0, 1.0],
                    [1.0, 0.0, 1.0, 1.0, 1.0, 0.000, 0.00, 0.00]])

CG_com = CG_map * ethane.get_masses() / (CG_map  *  ethane.get_masses()).sum(1)[:, None]

# Generate cg coordinates and forces 
cg_xyz = np.matmul(CG_com, xyz)
cg_force = np.matmul(CG_map, force)

Here we use a new object called BulkPhaseMaterials to simulate periodic amorphous materials. It has a different way of defining interactions among atoms and there are two adajacency matrices. It is designed to be compatible with HybridGraphConv. See part 2 of this tutorial about training. 

it might take a while to build the dataset: 

In [42]:
Atoms_batch_list = []

N_frames = 300

# define cells 
cell = [[15.9808, 0, 0],
       [0, 16.887199, 0],
        [0, 0,  19.932]]

for i, frame in enumerate(cg_xyz[:300]):
    
    if i % 50 == 0:
        print("processing frame number {}".format(i))
    
    props = dict()
    props["num_subgraphs"] = torch.LongTensor( [N_cg] * N )
    props["num_atoms"] = torch.LongTensor([N * N_cg])
    props["energy_grad"] = -torch.Tensor( cg_force[i] )
    
    box = BulkPhaseMaterials(numbers=[1, 1] * N, 
                           positions=frame.reshape(N * N_cg, 3),
                           cell=cell,
                           pbc=True,
                           nbr_torch=True,
                           device="cuda:0",
                           props=props
                            )

    box.update_atoms_nbr_list(4.0)
    box.update_system_nbr_list(5.0)
    
    Atoms_batch_list.append(box)

processing frame number 0
processing frame number 50
processing frame number 100
processing frame number 150
processing frame number 200
processing frame number 250


In [49]:
# Note there are two cutoffs here, corresponding to the intramolecular and intermolecular cutoff 

props = {
    'nxyz': [atomsbatch.get_nxyz() for atomsbatch in Atoms_batch_list], # atomic number and xyz 
    'energy_grad': [-force for force in cg_force[:N_frames]], # negative forces 
    'num_subgraphs': [atomsbatch.props['num_subgraphs'] for atomsbatch in Atoms_batch_list], # number of subgraphs 
    'num_atoms': [atomsbatch.props['num_atoms'] for atomsbatch in Atoms_batch_list], # total number of atoms 
    'atoms_nbr_list': [atomsbatch.atoms_nbr_list  for atomsbatch in Atoms_batch_list], # intramolecular_nbr_list 
    'nbr_list': [atomsbatch.nbr_list for atomsbatch in Atoms_batch_list], # intermolecular_nbr_list 
    'offsets': [atomsbatch.offsets.to_dense() for atomsbatch in Atoms_batch_list], # Pytorch glitch" torch.sparse tensor has no storage, so we need to send it back to dense tensor 
    'cell': [atomsbatch.get_cell() for atomsbatch in Atoms_batch_list] # cell dimensions 
}

In [50]:
pickle.dump(props, open( "./data/ethane_data.pkl", "wb" ) )

In [48]:
len(props['energy_grad'])

300

In [38]:
# reload dataset
#props = pickle.load( open( "./ethane_data.pkl", "rb" ) )

In [None]:
# You can go to another tutorial for Bulk-CG training and sampling 