In [203]:
# import tensorflow as tf 
# import tensorflow.keras.layers as layers
# import tensorflow.keras.models as models
# import tensorflow.keras.optimizers as optim

# import spektral as sp 
# import spektral.layers as SL
# import spektral.data.graph as graph
# import spektral.data.loaders as loaders

import torch as th
import torch.nn as nn 
import torch.nn.functional as F
import torch.optim as optim

import torch_geometric as thg
import torch_geometric.nn as gnn
import torch_geometric.nn.functional as gf
from torch_geometric.data import Data, HeteroData
from torch_geometric.loader import DataLoader
from torch_geometric.utils.convert import from_scipy_sparse_matrix as keys

import numpy as np
import pandas as pd
import rdkit, rdkit.Chem, rdkit.Chem.rdDepictor, rdkit.Chem.Draw
import networkx as nx

In [204]:
DENSE = r"C:\Users\suyash\Desktop\toxic\/tox21_dense_train.csv"
LABEL = r"C:\Users\suyash\Desktop\toxic\tox21_labels_train.csv"
SMILE = r"C:\Users\suyash\Desktop\toxic\nr-ar.smiles"

In [205]:
my_elements = {6: "C", 8: "O", 1: "H", 17: "Cl", 7: "N", 9: "F", 16: "S",
               35: "Br", 14: "Si", 11: "Na", 53: "I", 80: "Hg", 5: "B", 
               19: "K", 15: "P", 79: "Au", 24: "Cr", 50: "Sn", 20: "Ca",
               48: "Cd", 30: "Zn", 23: "V", 33: "As", 3: "Li", 29: "Cu",
               27:"Co", 47: "Ag", 34: "Se", 78: "Pt", 83: "Bi", 26: "Fe", 
               51: "Sb", 12: "Mg", 13: "Al", 81: "Tl", 56: "Ba"}

In [206]:
def smiles2graph(sml):
    """Argument for the RD2NX function should be a valid SMILES sequence
    returns: the graph
    """
    m = rdkit.Chem.MolFromSmiles(sml)
    m = rdkit.Chem.AddHs(m)
    order_string = {
        rdkit.Chem.rdchem.BondType.SINGLE: 1,
        rdkit.Chem.rdchem.BondType.DOUBLE: 2,
        rdkit.Chem.rdchem.BondType.TRIPLE: 3,
        rdkit.Chem.rdchem.BondType.AROMATIC: 4,
    }
    N = len(list(m.GetAtoms()))
    nodes = np.zeros((N, len(my_elements)))
    lookup = list(my_elements.keys())
    for i in m.GetAtoms():
        nodes[i.GetIdx(), lookup.index(i.GetAtomicNum())] = 1

    adj = np.zeros((N, N, 5))
    for j in m.GetBonds():
        u = min(j.GetBeginAtomIdx(), j.GetEndAtomIdx())
        v = max(j.GetBeginAtomIdx(), j.GetEndAtomIdx())
        order = j.GetBondType()
        if order in order_string:
            order = order_string[order]
        else:
            raise Warning("Ignoring bond order" + order)
        adj[u, v, order] = 1
        adj[v, u, order] = 1
    return nodes, adj

In [207]:
file = open(SMILE, 'r')
smile = file.read().split('\n')
xtrain = []
ytrain = []

for c, x in enumerate(smile):
    try:
        x = x.split('\t')
        if x[2]=='1' or c%20==0:
            nodes, adj= smiles2graph(x[0])
            adj_mat = np.sum(adj, axis=-1) + np.eye(adj.shape[0])
            degree = np.sum(adj_mat, axis=-1)
            new_nodes = np.einsum("i,ij,jk->ik", 1 / degree, adj_mat, nodes)
            # print(new_nodes.shape, adj_mat.shape)
            xtrain.append(Data(x=th.tensor(th.from_numpy(new_nodes), dtype=th.float32), edge_index=th.tensor(adj_mat, dtype=th.long).nonzero().t().contiguous()))
            ytrain.append(x[2])
    except:
        pass
ytrain = th.from_numpy(np.array(ytrain, dtype="float32"))

  xtrain.append(Data(x=th.tensor(th.from_numpy(new_nodes), dtype=th.float32), edge_index=th.tensor(adj_mat, dtype=th.long).nonzero().t().contiguous()))
[09:12:48] Explicit valence for atom # 3 Si, 8, is greater than permitted


In [208]:
len(xtrain)

822

In [209]:
dataset = [x for x in DataLoader(xtrain, batch_size=1)]

### Not Working

In [210]:
# class Net(models.Model):
#     def __init__(self):
#         super(Net, self).__init__()
#         self.mask = SL.GraphMasking()
#         self.conv1 = SL.GCSConv(32, activation="relu")
#         # self.pool = SL.MinCutPool(N // 2)
#         self.conv2 = SL.GCSConv(16, activation="relu")
#         self.conv3 = SL.GCSConv(8, activation="relu")
#         self.global_pool = SL.GlobalSumPool()
#         self.dense1 = layers.Dense(1, activation="sigmoid")

#     def call(self, inputs):
#         x, a = inputs
#         x = self.mask(x)
#         x = self.conv1([x, a])
#         # x_pool, a_pool = self.pool([x, a])
#         x_pool = self.conv2([x, a])
#         output = self.global_pool(x_pool)
#         output = self.dense1(output)

#         return output

# learning_rate=1e-4
# model = Net()
# opt = optim.Adam(lr=learning_rate)
# model.compile(optimizer=opt, loss="binary_crossentropy", metrics=["accuracy"])

### Should work 

In [241]:
class Classifier(nn.Module):
    def __init__(self, in_dim=36, hidden_dim=64):
        super(Classifier, self).__init__()
        self.conv1 = gnn.GraphConv(in_dim, hidden_dim)
        self.conv2 = gnn.GraphConv(hidden_dim, hidden_dim)
        self.conv3 = gnn.GraphConv(hidden_dim, hidden_dim*2)
        self.conv4 = gnn.GraphConv(hidden_dim*2, hidden_dim*2)
        self.pool  = gnn.MeanAggregation()
        self.dense = nn.Linear(hidden_dim*2, hidden_dim)
        self.classify = nn.Linear(hidden_dim,1)

    def forward(self, g, h):
        # Apply graph convolution and activation.
        h = F.relu(self.conv1(h, g))
        h = F.relu(self.conv2(h, g))
        h = F.relu(self.conv3(h, g))
        h = F.relu(self.conv4(h, g))
        h = self.pool(h)
        h = F.relu(self.dense(h))
        return F.sigmoid(self.classify(h))
        

model = Classifier()

In [242]:
def get_accuracy(y_true, y_prob):
    assert y_true.size() == y_prob.size()
    y_prob = y_prob > 0.5
    return (y_true == y_prob).sum().item() / y_true.size(0)

In [243]:
epochs = 100
criterian = nn.BCELoss()
learning_rate = 1e-4
decay = 1e-2
for epoch in range(epochs):
    lss = 0
    acc = 0
    learning_rate = learning_rate/(epoch*decay+1)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    for step, (xtr, ytr) in enumerate(zip(dataset, ytrain)):
        h = xtr.x
        g = th.tensor(xtr.edge_index, dtype=th.long)
        batch = xtr.batch
        optimizer.zero_grad()
        ypred = model(g,h)
        loss = criterian(ypred, ytr.reshape(-1,1))

        if ypred>0.5 and ytr>0.5:
            acc=acc+1
        elif ypred<=0.5 and ytr<=0.5:
            acc=acc+1
        # print(ypred, ytr)
        lss = lss+loss
        loss.backward()
        optimizer.step()
    print(f"Epoch: {epoch+1}/{epochs} || Loss: {lss/(step+1):.4f} || Accuracy: {acc/(step+1):.4f} \n")

  g = th.tensor(xtr.edge_index, dtype=th.long)


Epoch: 1/100 || Loss: 0.5818 || Accuracy: 0.6740 

Epoch: 2/100 || Loss: 0.5782 || Accuracy: 0.7044 

Epoch: 3/100 || Loss: 0.5234 || Accuracy: 0.7494 

Epoch: 4/100 || Loss: 0.5043 || Accuracy: 0.7530 

Epoch: 5/100 || Loss: 0.4929 || Accuracy: 0.7652 

Epoch: 6/100 || Loss: 0.4822 || Accuracy: 0.7652 

Epoch: 7/100 || Loss: 0.4753 || Accuracy: 0.7725 

Epoch: 8/100 || Loss: 0.4661 || Accuracy: 0.7725 

Epoch: 9/100 || Loss: 0.4612 || Accuracy: 0.7835 

Epoch: 10/100 || Loss: 0.4560 || Accuracy: 0.7798 

Epoch: 11/100 || Loss: 0.4523 || Accuracy: 0.7822 

Epoch: 12/100 || Loss: 0.4504 || Accuracy: 0.7835 

Epoch: 13/100 || Loss: 0.4480 || Accuracy: 0.7883 

Epoch: 14/100 || Loss: 0.4458 || Accuracy: 0.7871 

Epoch: 15/100 || Loss: 0.4438 || Accuracy: 0.7871 

Epoch: 16/100 || Loss: 0.4428 || Accuracy: 0.7859 

Epoch: 17/100 || Loss: 0.4413 || Accuracy: 0.7859 

Epoch: 18/100 || Loss: 0.4399 || Accuracy: 0.7847 

Epoch: 19/100 || Loss: 0.4393 || Accuracy: 0.7835 

Epoch: 20/100 || Loss

KeyboardInterrupt: 

In [None]:
ypred, ytr

(tensor([[0.0128]], grad_fn=<SigmoidBackward0>), tensor(0.))