# Neural Message Passing for Quantum Chemistry

Ref: https://arxiv.org/pdf/1704.01212.pdf

Assumptions:
1. Hidden states for atoms are not updated (only for atoms).

## 1. Directed models

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
import torch

from torch.autograd import Variable
from utils import rolling_mean, CUDA_wrapper
from data import DataPreprocessor
from mpnn.mpnn_directed import MPNNdirected
from mpnn.directed import Rd, Vd, Ud, Ed



In [3]:
AT_FEAT = 75
EDG_FEAT = 6
PASSES = 4
HID_SIZE = 32
CUDA = False
DATASET = 'data.test'
FLT_DOTS = True
FLT_ATOMS = True

In [4]:
rd = Rd(inp_size=AT_FEAT, hid_size=HID_SIZE)
vd = Vd(inp_size=AT_FEAT)
ud = Ud(inp_size=AT_FEAT*2+EDG_FEAT, out_size=AT_FEAT)
ed = Ed(inp_size=EDG_FEAT)

In [5]:
mpnn = MPNNdirected(rd, ud, vd, ed, t=PASSES, cuda=CUDA)

In [6]:
data = DataPreprocessor(DATASET, filter_dots=FLT_DOTS, filter_atoms=FLT_ATOMS)

In [7]:
data.load_dataset()

File data.test read. In total 586 lines.


In [8]:
data.filter_data()

Data filtered, in total 112 smiles deleted


In [9]:
train_smiles, train_labels, valid_smiles, valid_labels, test_smiles, test_labels = data.get_data()

File data.test read. In total 586 lines.
Data filtered, in total 112 smiles deleted
About to generate scaffolds
Generating scaffold 0/474
About to sort in scaffold sets


In [26]:
train_x = [mpnn.get_features_from_smiles(x, cuda=CUDA) for x in train_smiles]
train_y = train_labels

In [25]:
i = 11
n = 10
%time res = mpnn.make_opt_step_batched(train_x[i:i+n], train_y[i:i+n], 4)

Error while executing node V_0[16] with args: [([15:0]U_3,), ([15:1]U_3,), ([15:2]U_3,), ([15:3]U_3,), ([15:5]U_3,), ([15:7]U_3,), ([15:9]U_3,), ([15:10]U_3,), ([15:11]U_3,), ([15:13]U_3,)]


KeyError: 15

In [None]:
train_x[i:i+n]

In [None]:
train_y[i:i+n]

In [None]:
smiles[i:i+n]

In [14]:
losses = []

In [27]:
for j in range(100):
    loss = mpnn.make_opt_step_batched(train_x, train_y, 4)
    losses.append(loss)
    print(j)

0
Error while executing node V_0[16] with args: [([15:0]U_3,), ([15:1]U_3,), ([15:2]U_3,), ([15:3]U_3,), ([15:5]U_3,), ([15:7]U_3,), ([15:8]U_3,), ([15:9]U_3,), ([15:10]U_3,), ([15:12]U_3,), ([15:14]U_3,), ([15:15]U_3,), ([15:16]U_3,), ([15:17]U_3,), ([15:18]U_3,), ([15:20]U_3,), ([15:22]U_3,), ([15:24]U_3,), ([15:25]U_3,), ([15:26]U_3,), ([15:28]U_3,), ([15:29]U_3,), ([15:31]U_3,), ([15:32]U_3,), ([15:33]U_3,), ([15:35]U_3,), ([15:36]U_3,), ([15:38]U_3,), ([15:39]U_3,), ([15:40]U_3,), ([15:41]U_3,), ([15:42]U_3,), ([15:43]U_3,), ([15:44]U_3,), ([15:45]U_3,), ([15:47]U_3,), ([15:48]U_3,), ([15:49]U_3,), ([15:50]U_3,), ([15:51]U_3,), ([15:53]U_3,), ([15:54]U_3,), ([15:55]U_3,), ([15:58]U_3,), ([15:59]U_3,), ([15:61]U_3,), ([15:62]U_3,), ([15:63]U_3,), ([15:64]U_3,), ([15:65]U_3,), ([15:66]U_3,), ([15:67]U_3,), ([15:68]U_3,), ([15:69]U_3,), ([15:70]U_3,), ([15:71]U_3,), ([15:72]U_3,), ([15:73]U_3,), ([15:74]U_3,), ([15:75]U_3,), ([15:76]U_3,), ([15:77]U_3,), ([15:78]U_3,), ([15:79]U_3,),

KeyError: 15

In [None]:
plt.plot(rolling_mean(losses, 100))

In [None]:
res = []
for i in range(100):
    res.append(forward_pass(mpnn, train_smiles[i], passes).data[0][0])

In [None]:
plt.hist(res)

In [None]:
def forward_pass(self, x, t):
    g, h = self.get_features_from_smiles(x)
    g2, h2 = self.get_features_from_smiles(x)
#     for k, v in h.items():
#         print(v.data.numpy())
    for k in range(0, t):
        self.single_message_pass(g, h, k)
#         print('*'*33)
#         print(h[0])
    y_pred = self.R(h, h2)
    return y_pred

In [None]:
g, h = mpnn.get_features_from_smiles(train_smiles[10])

In [None]:
np.array(losses[-50:]).mean()

In [None]:
np.array(train_labels[:100]).mean()

In [None]:
for i in range(50):
    print(i, int(train_labels[i]), int(res[i]))

In [None]:
r = (np.array(res) > 0.5).astype(int)

## 2. Undirected models

In [None]:
import numpy as np
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from torch.autograd import Variable

In [None]:
def one_hots(zeros, ix):
    for i in range(zeros.size()[0]):
        zeros[i, ix[i]] = 1
    return zeros

def bleu(t, r, f, temp):
    t_temp = f(t/temp)
    length = t.size()[0]
    tt_1 = torch.matmul(t_temp, t_temp.t())
    tr_1 = torch.matmul(t_temp, r.t())
    tt_2 = torch.matmul(t_temp[:-1, :], t_temp[:-1, :].t()) * torch.matmul(t_temp[1:, :], t_temp[1:, :].t())
    tr_2 = torch.matmul(t_temp[:-1, :], r[:-1, :].t()) * torch.matmul(t_temp[1:, :], r[1:, :].t())
    t_vec_1 = tt_1.sum(0)
    c_vec_1 = tr_1.sum(1)
    t_vec_2 = tt_2.sum(0)
    c_vec_2 = tr_2.sum(1)
    overlap_1 = torch.min(t_vec_1, c_vec_1) / t_vec_1 / length
    overlap_2 = ((torch.min(t_vec_2, c_vec_2)+1) / t_vec_2 + 1) / (length - 1 + 1)
    overlap = overlap_2.sum() * overlap_1.sum()
    return overlap

In [None]:
vocab_size = 10000
length = 5
words = np.random.choice(vocab_size, size=length, replace=True)
t = Variable(torch.randn((length, vocab_size)), requires_grad=True)
r = one_hots(torch.zeros((length, vocab_size)), words)
r = Variable(r, requires_grad=True)
f = nn.Softmax()

In [None]:
res = []
norms = []
lr = 1
for i in range(1000):
    temp = 1
    b2 = bleu(t, r, f, temp)
    res.append(b2.data[0])
    b2.backward()
    t.data.add_(lr * t.grad.data)
    norms.append(t.grad.data.norm())
hard_t = one_hots(torch.zeros(list(t.size())), torch.max(t, dim=1)[1].data)

In [None]:
plt.plot(t.data[1].numpy())

In [None]:
plt.plot(res[1:])

In [None]:
plt.plot(norms)

In [None]:
bleu(hard_t, r, f, 1)

In [None]:
(hard_t * r.data > 0).sum()

In [None]:
(hard_t * r.data > 0).sum()

In [None]:
torch.max(r, dim=1)[1]

In [None]:
torch.max(hard_t, dim=1)[1]

In [None]:
    overlap.backward()
    total_norm = t.grad.norm().data[0]
    last_el_grad = t.grad.data[0].numpy()[-1]
    ratio = last_el_grad / total_norm
    print('norm={},   last_el={},   ratio={},   temp={}'.format(total_norm, last_el_grad, ratio, temp))
# norms.append(t.grad.norm().data[0])
# plt.plot(norms)
# plt.show()

In [None]:
t.data[0].numpy()

In [None]:
t.data[1].numpy()

In [None]:
overlap_1

In [None]:
total_norm

In [None]:
x = Variable(torch.Tensor([1]), requires_grad=True)
y = Variable(torch.Tensor([2]), requires_grad=False)
z = x * y

w =  z * 5
z.register_hook(lambda grad: print(grad.norm()))
w.backward()
#print(w.grad)

In [None]:
(1e-5)**(1/30000)

In [None]:
plt.hist(t.grad[0].data.numpy(), 100)
plt.show()

In [None]:
g = t.grad[0].data.numpy()

In [None]:
x.requires_grad = True