### Load QM9 model and predict

In [1]:
from megnet.models import MEGNetModel
import numpy as np
from operator import itemgetter
import json

def predict(model, graph):
    """
    predict from graph
    """
    return model.predict(model.graph_convertor.graph_to_input(graph)).ravel()[0]

def get_graph_from_doc(doc):
    """
    Convert a json document into a megnet graph
    """
    atom = [i['type'] for i in doc['atoms']]

    index1_temp = [i['a_idx'] for i in doc['atom_pairs']]
    index2_temp = [i['b_idx'] for i in doc['atom_pairs']]
    bond_temp = [i['spatial_distance'] for i in doc['atom_pairs']]

    index1 = index1_temp + index2_temp
    index2 = index2_temp + index1_temp
    bond = bond_temp + bond_temp
    sort_key = np.argsort(index1)
    it = itemgetter(*sort_key)

    index1 = it(index1)
    index2 = it(index2)
    bond = it(bond)
    graph = {'atom': atom, 'bond': bond, 'index1': index1, 'index2': index2, 'state': [[0, 0]]}
    return graph

# load scalers
with open('../mvl_models/qm9/scaler.json', 'r') as f:
    scaler = json.load(f)
    
# load an example qm9 document
with open('../megnet/data/tests/qm9/000001.json', 'r') as f:
    doc = json.load(f)
# convert to a graph
graph = get_graph_from_doc(doc)

Using TensorFlow backend.


In [2]:
# all target names
names = ['mu', 'alpha', 'HOMO', 'LUMO', 'gap', 'R2', 'ZPVE', 'U0', 'U', 'H', 'G', 'Cv', 'omega1']


y_pred = []
y_true = []

print('*** Result Comparisons ***')
print('Target\tMEGNet\tQM9')

for i in names:
    model = MEGNetModel.from_file('../mvl_models/qm9/' + i+'.hdf5')
    pred = predict(model, graph) 
    
    # if it is an extrinsic quantity, multiply by number of atoms
    # else multiply by 1
    if scaler[i]['is_per_atom']:
        n = len(graph['atom'])
    else:
        n = 1
    # inverse transform of x_transform = (x-x_mean)/x_standard_deviation to get x
    pred = (pred * scaler[i]['std'] + scaler[i]['mean'])  * n
    
    y_pred.append(pred)
    y_true.append(doc['mol_info'][i])
    print('%s\t%.3f\t%.3f' %(i, y_pred[-1], float(y_true[-1])))





*** Result Comparisons ***
Target	MEGNet	QM9
mu	-0.008	0.000
alpha	13.127	13.210
HOMO	-10.557	-10.550
LUMO	3.241	3.186
gap	13.622	13.736
R2	35.975	35.364
ZPVE	1.215	1.218
U0	-17.166	-17.172
U	-17.353	-17.286
H	-17.420	-17.389
G	-16.107	-16.152
Cv	6.427	6.469
omega1	3151.626	3151.708


### Predict from SMILES

In [4]:
from pymatgen.io.babel import BabelMolAdaptor
import pybel as pb
import openbabel as ob

ATOMNUM2TYPE = {1: 1, 6: 2, 7: 4, 8: 6, 9: 8}

class AtomNumberToTypeConvertor:
    def __init__(self, mapping=ATOMNUM2TYPE):
        self.mapping = mapping
    
    def convert(self, l):
        return [self.mapping[i] for i in l]

In [5]:
MODEL_NAME = 'HOMO'

model = MEGNetModel.from_file('../mvl_models/qm9/%s.hdf5' % MODEL_NAME)
# The default convertor works to convert the atom to Z
# We need to convert Z to the type in QM9 dataset
model.graph_convertor.atom_convertor = AtomNumberToTypeConvertor() 

def get_pmg_mol_from_smiles(smiles):
    b_mol = pb.readstring('smi', smiles)
    b_mol.make3D()
    b_mol = b_mol.OBMol
    p_mol = BabelMolAdaptor(b_mol).pymatgen_mol
    return p_mol

In [6]:
# The smiles of qm9:000001 is just C
mol1 = get_pmg_mol_from_smiles('C')
model.predict_structure(mol1) * scaler[MODEL_NAME]['std'] + scaler[MODEL_NAME]['mean']

array([-10.557696], dtype=float32)

The result matches with previous results when we compute it from pre-computed graph