In [1]:
import pickle
import os 
import numpy as np 
import dgl
from gaspy_utils import make_atoms_from_doc
from pymatgen.io.ase import AseAtomsAdaptor
import json
from pymatgen.core.structure import Structure
from pymatgen.analysis.structure_analyzer import VoronoiConnectivity
from ase.constraints import FixAtoms
import copy

In [2]:
with open('gaspy_docs/docs.pkl','rb') as infile:
    gasdb = pickle.load(infile)

In [3]:
adsorbates = [doc['adsorbate'] for doc in gasdb]

In [4]:
adsorbates = list(set(adsorbates))

In [5]:
print(f"We have {len(adsorbates)} unique adsorbates")

We have 5 unique adsorbates
We have 5 unique adsorbates


In [6]:
test_atoms = make_atoms_from_doc(gasdb[0])

In [7]:
crystal = AseAtomsAdaptor.get_structure(test_atoms)

In [8]:
class AtomInitializer(object):
    """
    Base class for intializing the vector representation for atoms.

    !!! Use one AtomInitializer per dataset !!!
    """
    def __init__(self, atom_types):
        self.atom_types = set(atom_types)
        self._embedding = {}

    def get_atom_fea(self, atom_type):
        assert atom_type in self.atom_types
        return self._embedding[atom_type]

    def load_state_dict(self, state_dict):
        self._embedding = state_dict
        self.atom_types = set(self._embedding.keys())
        self._decodedict = {idx: atom_type for atom_type, idx in
                            self._embedding.items()}

    def state_dict(self):
        return self._embedding

    def decode(self, idx):
        if not hasattr(self, '_decodedict'):
            self._decodedict = {idx: atom_type for atom_type, idx in
                                self._embedding.items()}
        return self._decodedict[idx]


class AtomCustomJSONInitializer(AtomInitializer):
    """
    Initialize atom feature vectors using a JSON file, which is a python
    dictionary mapping from element number to a list representing the
    feature vector of the element.

    Parameters
    ----------

    elem_embedding_file: str
        The path to the .json file
    """
    def __init__(self, elem_embedding_file):
        with open(elem_embedding_file) as f:
            elem_embedding = json.load(f)
        elem_embedding = {int(key): value for key, value
                          in elem_embedding.items()}
        atom_types = set(elem_embedding.keys())
        super(AtomCustomJSONInitializer, self).__init__(atom_types)
        for key, value in elem_embedding.items():
            self._embedding[key] = np.array(value, dtype=float)

In [9]:
ari = AtomCustomJSONInitializer('../atom_init.json')

In [10]:
from collections import defaultdict
from dgl import backend as F

In [11]:
def crystal_atom_featurizer(atoms):
    """
    takes ASE.atoms object
    return atom featurizer dict with tags and fixed locations
    """
    atom_feats_dict = defaultdict(list)
    num_atoms = atoms.get_global_number_of_atoms()
    atomic_numbers = atoms.get_atomic_numbers()
    tags = atoms.get_tags()
    fix_loc, = np.where([type(constraint)==FixAtoms for constraint in test_atoms.constraints])
    fix_atoms_indices = set(test_atoms.constraints[fix_loc[0]].get_indices())
    fixed_atoms = [i in fix_atoms_indices for i in range(len(test_atoms))]
    for i in range(num_atoms):
        atom_feats = list(ari.get_atom_fea(atomic_numbers[i])) #get init feats from json and convert to list
        atom_feats.append(tags[i])
        atom_feats.append(fixed_atoms[i])
        atom_feats_dict['n_feat'].append(F.tensor(np.array(atom_feats).astype(np.float32))) #make it into tensor float32
    atom_feats_dict['n_feat'] = F.stack(atom_feats_dict['n_feat'],dim=0) #finally all together 
    return atom_feats_dict

In [12]:
atoms_feats_dict = crystal_atom_featurizer(test_atoms)

In [13]:
test_atoms_initial_config = make_atoms_from_doc(gasdb[0]['initial_configuration'])

In [14]:
atoms_initial_config = copy.deepcopy(test_atoms_initial_config)
crystal_initial_config = AseAtomsAdaptor.get_structure(atoms_initial_config)

In [15]:
VC = VoronoiConnectivity(crystal)
VC_initial_config = VoronoiConnectivity(crystal_initial_config)
conn = copy.deepcopy(VC.connectivity_array)
conn_initial_config = copy.deepcopy(VC_initial_config.connectivity_array)

In [16]:
train_geometry='final-adsorbate'

In [17]:
conn[0][0]

array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.24319872, 0.31252986, 0.        , 0.        ,
       0.        , 0.00551201, 0.        , 0.00551201, 0.        ,
       0.        , 0.        , 0.31252986, 0.24319872, 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.     

array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.24319872, 0.31252986, 0.        , 0.        ,
       0.        , 0.00551201, 0.        , 0.00551201, 0.        ,
       0.        , 0.        , 0.31252986, 0.24319872, 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.     

In [18]:
all_nbrs = []          
# Loop over central atom
for ii in range(0, conn.shape[0]):
    curnbr = []

    #Loop over neighbor atoms
    for jj in range(0, conn.shape[1]):

        #Loop over each possible PBC image for the chosen image
        for kk in range(0,conn.shape[2]):
            # Only add as a neighbor if the atom is not the currently selected center one and there is connectivity
            # to that image
            if jj is not kk and conn[ii][jj][kk] != 0:

                #Add the neighbor strength depending on train_geometry base
                if train_geometry =='initial':
                    curnbr.append([ii, conn_initial_config[ii][jj][kk]/np.max(conn_initial_config[ii]), jj])
                elif train_geometry =='final':
                    curnbr.append([ii, conn[ii][jj][kk]/np.max(conn[ii]), jj])
                elif train_geometry == 'final-adsorbate':
                    #In order for this to work, each adsorbate atom should be set to tag==1 in the atoms object
                    if (test_atoms.get_tags()[ii]==1 or test_atoms.get_tags()[jj]==1):
                        if conn[ii][jj][kk]/np.max(conn[ii])>0.3:
                            curnbr.append([ii, 1.0, jj])
                        else:
                            curnbr.append([ii, 0.0, jj])
                    else:
                        curnbr.append([ii, conn_initial_config[ii][jj][kk]/np.max(conn_initial_config[ii]), jj])
                        
                else:
                    curnbr.append([ii, conn[ii][jj][kk]/np.max(conn[ii]), jj])
                    
            else:
                curnbr.append([ii, 0.0, jj])
    all_nbrs.append(np.array(curnbr))

In [19]:
all_nbrs = np.array(all_nbrs)

In [20]:
all_nbrs.shape

(49, 5145, 3)

(49, 5145, 3)

In [21]:
all_nbrs = [sorted(nbrs, key=lambda x: x[1],reverse=True) for nbrs in all_nbrs]

In [22]:
nbr_fea_idx = np.array([list(map(lambda x: x[2],
                            nbr[:12])) for nbr in all_nbrs])
nbr_fea = np.array([list(map(lambda x: x[1], nbr[:12]))
                    for nbr in all_nbrs])

In [23]:
class GaussianDistance(object):
    """
    Expands the distance by Gaussian basis.

    Unit: angstrom
    """
    def __init__(self, dmin, dmax, step, var=None):
        """
        Parameters
        ----------

        dmin: float
          Minimum interatomic distance
        dmax: float
          Maximum interatomic distance
        step: float
          Step size for the Gaussian filter
        """
        assert dmin < dmax
        assert dmax - dmin > step
        self.filter = np.arange(dmin, dmax+step, step)
        if var is None:
            var = step
        self.var = var

    def expand(self, distances):
        """
        Apply Gaussian distance filter to a numpy distance array

        Parameters
        ----------

        distance: np.array shape n-d array
          A distance matrix of any shape

        Returns
        -------
        expanded_distance: shape (n+1)-d array
          Expanded distance matrix with the last dimension of length
          len(self.filter)
        """
        return np.exp(-(distances[..., np.newaxis] - self.filter)**2 /
                      self.var**2)

In [24]:
gdf = GaussianDistance(0,8,0.2)

In [25]:
gdf_nbr = gdf.expand(nbr_fea)

In [26]:
bond_feats_dict = defaultdict(list)

In [27]:
src_list = []
dst_list = []
for i in range(len(nbr_fea_idx)):
    for j in nbr_fea_idx[i]:
        if not i == j:
            src_list.extend([int(i),int(j)])
            bond_feats_dict['e_feat'].append(np.array(nbr_fea[int(i)][list(nbr_fea_idx[i]).index(j)]))
            bond_feats_dict['gdf_feat'].append(np.array(gdf_nbr[int(i)][list(nbr_fea_idx[i]).index(j)]))
            dst_list.extend([int(j),int(i)])
            bond_feats_dict['e_feat'].append(np.zeros(1))
            bond_feats_dict['gdf_feat'].append(np.zeros(gdf_nbr.shape[-1]))

In [28]:
len(bond_feats_dict['e_feat'])

1156

1156

In [29]:
len(src_list)

1156

1156

In [30]:
g = dgl.DGLGraph()

In [31]:
g.add_nodes(test_atoms.get_global_number_of_atoms())

In [32]:
g.add_edges(src_list,dst_list)

In [37]:
bond_feats_dict['e_feat'] = F.tensor(np.array(bond_feats_dict['e_feat']).astype(np.float32))
bond_feats_dict['gdf_feat'] = F.tensor(np.array(bond_feats_dict['gdf_feat']).astype(np.float32))

In [39]:
!nvidia-smi

Fri Apr 10 16:18:06 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 410.48                 Driver Version: 410.48                    |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla V100-PCIE...  Off  | 00000000:3B:00.0 Off |                    0 |
| N/A   42C    P0    38W / 250W |      0MiB / 16130MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   1  Tesla V100-PCIE...  Off  | 00000000:D8:00.0 Off |                    0 |
| N/A   40C    P0    39W / 250W |      0MiB / 16130MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-------

In [40]:
g.ndata.update(atoms_feats_dict)
g.edata.update(bond_feats_dict)

In [47]:
import torch

In [48]:
g.to(torch.device('cuda:0'))

In [49]:
!nvidia-smi

Fri Apr 10 16:20:52 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 410.48                 Driver Version: 410.48                    |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla V100-PCIE...  Off  | 00000000:3B:00.0 Off |                    0 |
| N/A   42C    P0    37W / 250W |   1000MiB / 16130MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   1  Tesla V100-PCIE...  Off  | 00000000:D8:00.0 Off |                    0 |
| N/A   40C    P0    39W / 250W |     11MiB / 16130MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-------

In [57]:
size = bond_feats_dict['gdf_feat'].nelement() * bond_feats_dict['gdf_feat'].element_size() * 8e-9 

In [62]:
print('\nSize of the gdf tensor will be {:4.2f} GB.'.format(size))


Size of the gdf tensor will be 0.00 GB.

Size of the gdf tensor will be 0.00 GB.


In [64]:
gasdb[0]

{'mongo_id': ObjectId('5d83021130582ea2977b252c'),
 'adsorbate': 'H',
 'mpid': 'mp-1184026',
 'miller': [1, 1, 1],
 'shift': 0.0,
 'top': False,
 'coordination': 'Cu-Ru',
 'neighborcoord': ['Ru:Cu-Cu-Cu-Cu-Cu-Cu', 'Cu:Cu-Cu-Cu-Cu-Ru-Ru'],
 'energy': -0.49401431499998916,
 'atoms': {'atoms': [{'symbol': 'H',
    'position': [3.552991350440603, 2.9439251093529757, 24.593403827707267],
    'tag': 1,
    'index': 0,
    'charge': 0.0,
    'momentum': [0.0, 0.0, 0.0],
    'magmom': 0.0},
   {'symbol': 'Cu',
    'position': [0.7547291571080015, 1.6457380302055582, 20.019014261702793],
    'tag': 0,
    'index': 1,
    'charge': 0.0,
    'momentum': [0.0, 0.0, 0.0],
    'magmom': 0.0},
   {'symbol': 'Cu',
    'position': [5.323580412083899, 0.0060754820913478, 22.12576864970429],
    'tag': 0,
    'index': 2,
    'charge': 0.0,
    'momentum': [0.0, 0.0, 0.0],
    'magmom': 0.0},
   {'symbol': 'Cu',
    'position': [5.345545771199834, 3.3027449169688223, 17.950312304551982],
    'tag': 0,
   

{'mongo_id': ObjectId('5d83021130582ea2977b252c'),
 'adsorbate': 'H',
 'mpid': 'mp-1184026',
 'miller': [1, 1, 1],
 'shift': 0.0,
 'top': False,
 'coordination': 'Cu-Ru',
 'neighborcoord': ['Ru:Cu-Cu-Cu-Cu-Cu-Cu', 'Cu:Cu-Cu-Cu-Cu-Ru-Ru'],
 'energy': -0.49401431499998916,
 'atoms': {'atoms': [{'symbol': 'H',
    'position': [3.552991350440603, 2.9439251093529757, 24.593403827707267],
    'tag': 1,
    'index': 0,
    'charge': 0.0,
    'momentum': [0.0, 0.0, 0.0],
    'magmom': 0.0},
   {'symbol': 'Cu',
    'position': [0.7547291571080015, 1.6457380302055582, 20.019014261702793],
    'tag': 0,
    'index': 1,
    'charge': 0.0,
    'momentum': [0.0, 0.0, 0.0],
    'magmom': 0.0},
   {'symbol': 'Cu',
    'position': [5.323580412083899, 0.0060754820913478, 22.12576864970429],
    'tag': 0,
    'index': 2,
    'charge': 0.0,
    'momentum': [0.0, 0.0, 0.0],
    'magmom': 0.0},
   {'symbol': 'Cu',
    'position': [5.345545771199834, 3.3027449169688223, 17.950312304551982],
    'tag': 0,
   