# Neural Message Passing for Quantum Chemistry

Ref: https://arxiv.org/pdf/1704.01212.pdf

Assumptions:
1. Hidden states for atoms are not updated (only for atoms).

## 1. Directed models

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [12]:
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
import torch

from torch.autograd import Variable
from utils import rolling_mean, CUDA_wrapper
from data import DataPreprocessor
from mpnn.mpnn_directed import MPNNdirected
from mpnn.directed import Rd, Vd, Ud, Ed

In [3]:
AT_FEAT = 75
EDG_FEAT = 6
PASSES = 4
HID_SIZE = 32
CUDA = False
DATASET = 'data.test'
FLT_DOTS = True
FLT_ATOMS = True

In [4]:
rd = Rd(inp_size=AT_FEAT, hid_size=HID_SIZE)
vd = Vd(inp_size=AT_FEAT)
ud = Ud(inp_size=AT_FEAT*2+EDG_FEAT, out_size=AT_FEAT)
ed = Ed(inp_size=EDG_FEAT)

In [13]:
mpnn = MPNNdirected(rd, ud, vd, ed, t=PASSES, cuda=CUDA)

In [6]:
data = DataPreprocessor(DATASET, filter_dots=FLT_DOTS, filter_atoms=FLT_ATOMS)

In [7]:
data.load_dataset()

File data.test read. In total 586 lines.


In [8]:
data.filter_data()

Data filtered, in total 112 smiles deleted


In [9]:
train_smiles, train_labels, valid_smiles, valid_labels, test_smiles, test_labels = data.get_data()

File data.test read. In total 586 lines.
Data filtered, in total 112 smiles deleted
About to generate scaffolds
Generating scaffold 0/474
About to sort in scaffold sets


In [10]:
train_x = [mpnn.get_features_from_smiles(x, cuda=CUDA) for x in train_smiles]
train_y = train_labels

In [16]:
i = 11
n = 10
%time res = mpnn.make_opt_step_batched(train_x[i:i+n], train_y[i:i+n], 4)

Error while executing node V_0[24] with args: [([23:3]fold_non_lin,), ([23:7]fold_non_lin,), ([23:13]fold_non_lin,), ([23:18]fold_non_lin,), ([23:22]fold_non_lin,), ([23:27]fold_non_lin,), ([23:32]fold_non_lin,), ([23:37]fold_non_lin,), ([23:44]fold_non_lin,), ([23:55]fold_non_lin,)]


KeyError: 23

In [17]:
train_x[i:i+n]

[(OrderedDict([(0, [(Variable containing:
                   1  0  0  0  0  0
                  [torch.FloatTensor of size 1x6], 1)]),
               (1, [(Variable containing:
                   1  0  0  0  0  0
                  [torch.FloatTensor of size 1x6], 0)]),
               (2, [(Variable containing:
                   0  1  0  0  0  0
                  [torch.FloatTensor of size 1x6], 1)]),
               (3, [(Variable containing:
                   0  1  0  0  0  0
                  [torch.FloatTensor of size 1x6], 1)]),
               (4, [(Variable containing:
                   1  0  0  0  0  0
                  [torch.FloatTensor of size 1x6], 1)]),
               (5, [(Variable containing:
                   1  0  0  0  0  0
                  [torch.FloatTensor of size 1x6], 4)]),
               (6, [(Variable containing:
                   0  1  0  0  0  0
                  [torch.FloatTensor of size 1x6], 5)]),
               (7, [(Variable containing:
             

In [None]:
train_y[i:i+n]

In [None]:
smiles[i:i+n]

In [None]:
losses = []

In [None]:
for j in range(100):
    for i in range(0, 1):
        #print(i)
        %time loss = mpnn.make_opt_step_batched(train_smiles[i:i+100], train_labels[i:i+100], 4)
        losses.append(loss)
    print(j)

In [None]:
plt.plot(rolling_mean(losses, 100))

In [None]:
res = []
for i in range(100):
    res.append(forward_pass(mpnn, train_smiles[i], passes).data[0][0])

In [None]:
plt.hist(res)

In [None]:
def forward_pass(self, x, t):
    g, h = self.get_features_from_smiles(x)
    g2, h2 = self.get_features_from_smiles(x)
#     for k, v in h.items():
#         print(v.data.numpy())
    for k in range(0, t):
        self.single_message_pass(g, h, k)
#         print('*'*33)
#         print(h[0])
    y_pred = self.R(h, h2)
    return y_pred

In [None]:
g, h = mpnn.get_features_from_smiles(train_smiles[10])

In [None]:
np.array(losses[-50:]).mean()

In [None]:
np.array(train_labels[:100]).mean()

In [None]:
for i in range(50):
    print(i, int(train_labels[i]), int(res[i]))

In [None]:
r = (np.array(res) > 0.5).astype(int)

## 2. Undirected models

In [None]:
for p in mpnn.params:
    print(p.requires_grad)

In [None]:
mpnn.params[-2]

In [None]:
for p in mpnn.params:
    print(p.data.size())