# Neural Message Passing for Quantum Chemistry

Ref: https://arxiv.org/pdf/1704.01212.pdf

Assumptions:
1. Hidden states for atoms are not updated (only for atoms).

## 1. Directed models

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
import torch

from torch.autograd import Variable
from utils import rolling_mean, CUDA_wrapper
from data import DataPreprocessor
from mpnn.mpnn_directed import MPNNdirected
from mpnn.directed import Rd, Vd, Ud, Ed
from copy import deepcopy



In [3]:
AT_FEAT = 75
EDG_FEAT = 6
PASSES = 4
HID_SIZE = 32
CUDA = True
DATASET = 'data.test'
FLT_DOTS = True
FLT_ATOMS = True

In [10]:
rd = Rd(inp_size=AT_FEAT, hid_size=HID_SIZE)
vd = Vd(inp_size=AT_FEAT)
ud = Ud(inp_size=AT_FEAT*2+EDG_FEAT, out_size=AT_FEAT)
ed = Ed(inp_size=EDG_FEAT)

In [11]:
mpnn = MPNNdirected(rd, ud, vd, ed, t=PASSES, cuda=CUDA)

In [12]:
data_loader = DataPreprocessor(DATASET, filter_dots=FLT_DOTS, filter_atoms=FLT_ATOMS)

In [13]:
data_loader.load_dataset()

File data.test read. In total 5000 lines.


In [14]:
data_loader.filter_data()

Data filtered, in total 1162 smiles deleted


In [15]:
train_smiles, train_labels, valid_smiles, valid_labels, test_smiles, test_labels = data_loader.get_data()

File data.test read. In total 5000 lines.
Data filtered, in total 1162 smiles deleted
About to generate scaffolds
Generating scaffold 0/3838
Generating scaffold 1000/3838
Generating scaffold 2000/3838
Generating scaffold 3000/3838
About to sort in scaffold sets


In [24]:
train_x = [mpnn.get_features_from_smiles(x, cuda=CUDA) for x in train_smiles]
train_y = train_labels

In [25]:
%time fold, folded_nodes, ix = mpnn.batch_operations(train_x[:1500], PASSES, shuffle=False)

CPU times: user 2.45 s, sys: 12 ms, total: 2.46 s
Wall time: 2.46 s


In [None]:
losses = []

In [19]:
len(train_x)

3070

In [27]:
%time result = fold.apply(mpnn, folded_nodes)

CPU times: user 4.29 s, sys: 76 ms, total: 4.36 s
Wall time: 4.35 s


In [None]:
for i in range(2000):
    results = fold.apply(mpnn, folded_nodes)
    loss = mpnn.make_opt_step_batched(results, train_y)
    losses.append(loss)
    print(loss)

In [None]:
plt.plot(rolling_mean(losses, 1))

In [None]:
for j in range(100):
    loss = mpnn.make_opt_step_batched(deepcopy(train_x[0:150]), deepcopy(train_y[0:150]), 4)
    losses.append(loss)
    print(j)

In [None]:
np.array(train_y[0:150]).sum()/len(train_y[0:150])

In [None]:
f, fn = mpnn.make_opt_step_batched(deepcopy(train_x[0:150]), deepcopy(train_y[0:150]), 4)

In [None]:
res = f.apply(mpnn, fn)

In [None]:
f.cached_nodes['V_0']

In [None]:
f

In [None]:
plt.hist(res)

In [None]:
def forward_pass(self, x, t):
    g, h = self.get_features_from_smiles(x)
    g2, h2 = self.get_features_from_smiles(x)
#     for k, v in h.items():
#         print(v.data.numpy())
    for k in range(0, t):
        self.single_message_pass(g, h, k)
#         print('*'*33)
#         print(h[0])
    y_pred = self.R(h, h2)
    return y_pred

In [None]:
g, h = mpnn.get_features_from_smiles(train_smiles[10])

In [None]:
np.array(losses[-50:]).mean()

In [None]:
np.array(train_labels[:100]).mean()

In [None]:
for i in range(50):
    print(i, int(train_labels[i]), int(res[i]))

In [None]:
r = (np.array(res) > 0.5).astype(int)

## 2. Undirected models