In [1]:
import chemprop_ish as C
import torch
import pandas as pd
import torch.nn as nn



In [9]:
#TESTING SINGLE MPNN

args = C.TrainArgs()
args.atom_messages = False
model = C.MPNN(args)
out = model(['CC(=O)C','C','O'])
print(out)

data = pd.read_csv('bordwell_pka.csv')
data.columns = ['SMILES','pka']
SMILES = data['SMILES'].tolist()
pka = data['pka'].tolist()
pka = [torch.Tensor([x]) for x in pka]

optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
loss_function = nn.MSELoss()
for epoch in range(0, 5):
    for x in range(len(SMILES)):
        mol = [SMILES[x]]
        target = pka[x]
        optimizer.zero_grad()
        output = model(mol)[0]
        loss = loss_function(output, target)
        loss.backward()
        optimizer.step()
    print('Epoch:', epoch)
    print('Loss:', loss)

tensor([[0.0839],
        [0.0198],
        [0.0560]], grad_fn=<AddmmBackward>)
Epoch: 0
Loss: tensor(49.6656, grad_fn=<MseLossBackward>)
Epoch: 1
Loss: tensor(50.8828, grad_fn=<MseLossBackward>)
Epoch: 2
Loss: tensor(56.8960, grad_fn=<MseLossBackward>)
Epoch: 3
Loss: tensor(64.9495, grad_fn=<MseLossBackward>)
Epoch: 4
Loss: tensor(70.9275, grad_fn=<MseLossBackward>)


In [10]:
#TESTING DOUBLE MPNN

args = C.TrainArgs()
data = pd.read_csv('ETMdata.csv')
solute = data['solute'].tolist()
solvent = data['solvent'].tolist()
pka = data['pka'].tolist()
X_Y = [(x,y) for x,y in zip(solute,solvent)]
pka = [torch.Tensor([x]) for x in pka]
dmodel = C.double_MPNN(args)

optimizer = torch.optim.Adam(dmodel.parameters(), lr=1e-4)
loss_function = nn.MSELoss()
for epoch in range(0, 20):
    for x in range(len(X_Y)):
        mols = [X_Y[x]]
        target = pka[x]
        optimizer.zero_grad()
        output = dmodel(mols)[0]
        loss = loss_function(output, target)
        loss.backward()
        optimizer.step()
    print('Epoch:', epoch)
    print('Loss:', loss.item())

Epoch: 0
Loss: 147.4577178955078
Epoch: 1
Loss: 1.0285344123840332
Epoch: 2
Loss: 0.03546340391039848
Epoch: 3
Loss: 0.005249371286481619
Epoch: 4
Loss: 0.06924451887607574
Epoch: 5
Loss: 1.0274475812911987
Epoch: 6
Loss: 0.24772685766220093
Epoch: 7
Loss: 0.3669643700122833
Epoch: 8
Loss: 1.0308297872543335
Epoch: 9
Loss: 0.31833070516586304
Epoch: 10
Loss: 1.7746273279190063
Epoch: 11
Loss: 1.2564406394958496
Epoch: 12
Loss: 10.210241317749023
Epoch: 13
Loss: 5.803338050842285
Epoch: 14
Loss: 2.0440008640289307
Epoch: 15
Loss: 9.229966163635254
Epoch: 16
Loss: 5.338027477264404
Epoch: 17
Loss: 13.310317039489746
Epoch: 18
Loss: 8.062491416931152
Epoch: 19
Loss: 15.149951934814453


In [36]:
#TESTING DOUBLE MPNN WITH INTERACTION (CIGIN)

args = C.TrainArgs()
args.interaction = True

data = pd.read_csv('ETMdata.csv')
solute = data['solute'].tolist()
solvent = data['solvent'].tolist()
pka = data['pka'].tolist()
X_Y = [(x,y) for x,y in zip(solute,solvent)]
pka = [torch.Tensor([x]) for x in pka]
dmodel = C.double_MPNN(args)

In [30]:
print(dmodel(X_Y[0:5]))

tensor([[0.],
        [0.],
        [0.],
        [0.],
        [0.]], grad_fn=<ReluBackward0>)


In [15]:
optimizer = torch.optim.Adam(dmodel.parameters(), lr=1e-4)
loss_function = torch.nn.MSELoss()
for epoch in range(0, 20):
    for x in range(len(X_Y)):
        mols = [X_Y[x]]
        target = pka[x]
        optimizer.zero_grad()
        output = dmodel(mols)[0]
        loss = loss_function(output, target)
        loss.backward()
        optimizer.step()
    print('Epoch:', epoch)
    print('Loss:', loss.item())

Epoch: 0
Loss: 0.7457123398780823
Epoch: 1
Loss: 19.77728843688965
Epoch: 2
Loss: 17.387468338012695
Epoch: 3
Loss: 21.935728073120117
Epoch: 4
Loss: 19.714401245117188
Epoch: 5
Loss: 13.222643852233887
Epoch: 6
Loss: 14.366247177124023
Epoch: 7
Loss: 8.343389511108398
Epoch: 8
Loss: 14.248923301696777
Epoch: 9
Loss: 9.019465446472168
Epoch: 10
Loss: 16.309341430664062
Epoch: 11
Loss: 19.873170852661133
Epoch: 12
Loss: 18.615921020507812
Epoch: 13
Loss: 8.89854907989502
Epoch: 14
Loss: 23.737348556518555
Epoch: 15
Loss: 22.30984115600586
Epoch: 16
Loss: 18.19049644470215
Epoch: 17
Loss: 21.16509246826172
Epoch: 18
Loss: 6.100868225097656
Epoch: 19
Loss: 14.546182632446289


In [48]:
from more_itertools import chunked
from random import shuffle

data = list(zip(X_Y,pka))
shuffle(data)
data = list(chunked(data,3))
xy, z = zip(*data[0])
print(list(xy))
print(z)

[('C1=CC(=CC(=C1)Cl)N', 'CS(=O)C'), ('C1=CC=C(C=C1)S(=O)(=O)N', 'CS(=O)C'), ('CCNCC', 'O')]
(tensor([2.9800]), tensor([16.1000]), tensor([10.9800]))


In [114]:
#list batch

optimizer = torch.optim.Adam(dmodel.parameters(), lr=1e-4)
loss_function = torch.nn.MSELoss()
data = list(zip(X_Y,pka))
for epoch in range(0, 1):
    shuffle(data)
    batched_data = list(chunked(data,5))
    for x in range(len(batched_data)):
        xy, z = zip(*batched_data[x])
        mols = xy
        targets = torch.Tensor(z)
        optimizer.zero_grad()
        output = dmodel(mols)[0]
        loss = loss_function(output, targets)
        loss.backward()
        optimizer.step()
    print('Epoch:', epoch)
    print('Loss:', loss.item())

O
CO
O
O
CS(=O)C
CS(=O)C
CO
CO
CC#N
CC#N
O
CC#N
CC#N
O
O
Epoch: 0
Loss: 20.281612396240234


In [75]:
import numpy as np
from numpy.random import shuffle

In [117]:
data = pd.read_csv('ETMdata.csv')
data = data.to_numpy()
batched_data = np.array_split(data,15)
batched_data[0][:,2]

array([4.44, 11.4, 4.41, 20.3, 4.22], dtype=object)

In [102]:
#numpy batch

optimizer = torch.optim.Adam(dmodel.parameters(), lr=1e-4)
loss_function = torch.nn.MSELoss()

shuffle(data)
train,test = data[:60,:], data[60:,:]
test_xy, test_z = test[:,[0,1]], test[:,2]

def train_f():
    for epoch in range(0, 100):
        shuffle(train)
        batches = np.array_split(train,15)
        for x in range(15-1):
            mols = batches[x][:,[0,1]]
            targets = torch.Tensor(batches[x][:,2].astype('float64'))
            optimizer.zero_grad()
            output = dmodel(mols)[0]
            loss = loss_function(output, targets)
            loss.backward()
            optimizer.step()
        if epoch % 10 == 0:
            print('Epoch:', epoch)
            test_output = dmodel(test_xy)
            loss = loss_function(test_output, torch.Tensor(test_z.astype('float64')))
            print('Loss:', loss.item())

In [106]:
def train_f():
    shuffle(train)
    batches = np.array_split(train,15)
    for x in range(15-1):
        mols = batches[x][:,[0,1]]
        targets = torch.Tensor(batches[x][:,2].astype('float64'))
        optimizer.zero_grad()
        output = dmodel(mols)[0]
        loss = loss_function(output, targets)
        loss.backward()
        optimizer.step()
    print('Epoch:', epoch)
    test_output = dmodel(test_xy)
    loss = loss_function(test_output, torch.Tensor(test_z.astype('float64')))
    print('Loss:', loss.item())

In [2]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, list_IDs, datapoints, labels):
        self.labels = labels
        self.datapoints = datapoints
        self.list_IDs = list_IDs
        
    def __len__(self):
        return len(self.list_IDs)
    
    def __getitem__(self, index):
        ID = self.list_IDs[index]
        
        X = self.datapoints[ID]
        y = self.labels[ID]
        
        return X, y

In [3]:
data = pd.read_csv('ETMdata.csv')
solute = data['solute'].tolist()
solvent = data['solvent'].tolist()
pka = data['pka'].tolist()
X_Y = [[x,y] for x,y in zip(solute,solvent)]
pka = [torch.Tensor([x]) for x in pka]

In [18]:
dataset = Dataset([0,1,2], X_Y, pka)
print(dataset[3])

IndexError: list index out of range

In [5]:
training_generator = torch.utils.data.DataLoader(dataset, batch_size=5, shuffle=True)

In [14]:
args = C.TrainArgs()
args.interaction = True
dmodel = C.double_MPNN(args)

optimizer = torch.optim.Adam(dmodel.parameters(), lr=1e-4)
loss_function = torch.nn.MSELoss()
num_epochs = 1000

for epoch in range(num_epochs):
    for local_batch, local_labels in training_generator:
        mols = local_batch
        targets = local_labels
        optimizer.zero_grad()
        output = dmodel(mols)
        loss = loss_function(output, targets)
        loss.backward()
        optimizer.step()
    if epoch % 100 == 0:
        print('Epoch:', epoch)
        testing = [[X_Y[x][0] for x in range(len(X_Y))],[X_Y[x][1] for x in range(len(X_Y))]]
        outputs = dmodel(testing)
        loss = loss_function(outputs, torch.Tensor(pka))
        print('Loss:', loss.item())

Epoch: 0
torch.Size([75, 1])
Loss: 132.60633850097656


  return F.mse_loss(input, target, reduction=self.reduction)


Epoch: 100
torch.Size([75, 1])
Loss: 49.01560592651367
Epoch: 200
torch.Size([75, 1])
Loss: 53.052371978759766
Epoch: 300
torch.Size([75, 1])
Loss: 54.99067306518555
Epoch: 400
torch.Size([75, 1])
Loss: 57.39252853393555
Epoch: 500
torch.Size([75, 1])
Loss: 52.931949615478516
Epoch: 600
torch.Size([75, 1])
Loss: 55.98434829711914
Epoch: 700
torch.Size([75, 1])
Loss: 57.607444763183594
Epoch: 800
torch.Size([75, 1])
Loss: 58.209354400634766
Epoch: 900
torch.Size([75, 1])
Loss: 56.79232406616211


In [31]:
args1 = C.TrainArgs()
args2 = C.TrainArgs()
args1.__dict__.update({"depth":10, "dropout":0.5})

In [27]:
print(args1.__dict__)
print(args1.hidden_size)
print(args2.depth)

{'depth': 10, 'dropout': 0.5}
128
3
