In [2]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader

from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import rdDepictor
from rdkit.Chem.Draw import rdMolDraw2D
from rdkit.Chem import Draw

import networkx as nx

from utils.graph_utils import *
from predict_logp.predict_logp import *
import torch_geometric as pyg

In [3]:
if torch.cuda.is_available():
    DEVICE = 'cuda'
else:
    DEVICE = 'cpu'

In [4]:
molecules = pd.read_csv("../../Downloads/molecules.csv", header = None)

In [5]:
molecules

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,N#CCl,2.0,0.404573,0.677747,-0.689285,1.300077,0.029264,0.884302,0.951709,True,True,False
1,***** Iteration 0 *****,,,,,,,,,,,
2,C#I=P12#P34(PI35=[SH]#P43=P(N)(=P)=NN(F)C31O5)...,0.0,0.116497,0.099583,-1.159525,-1.170163,0.000000,1.715745,1.003545,False,False,False
3,C[SH](#[PH]#CP(Br)CN)C(Cl)=I1=I#P(#N)OI(Cl)(C(...,0.0,0.124808,0.243159,-1.574953,-1.585591,0.000000,1.832623,0.546516,False,False,False
4,***** Iteration 1 *****,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
45515,CC=C(C=CC(S)=CC=CC1=CN1C)C1=CC=C1N,2.0,0.591782,0.595368,1.183565,3.172926,0.000142,0.003514,3.471982,True,True,True
45516,C=C1NCC2C(C3CC(N)C(C=O)C3)CN2C1C,2.0,0.693180,0.516870,1.386361,3.375723,0.001884,0.000753,3.739005,True,True,True
45517,COCCC1=CC2=C(Cl)C=CN(C2C)C(C)C1C,2.0,0.773161,0.518874,1.546322,3.535683,0.001684,0.008910,3.939542,True,True,True
45518,CCCOC1=NC1=C1C(Cl)=CC=CC=C1CO,2.0,0.838022,0.708149,1.676044,3.665405,0.004689,0.621927,4.642537,True,True,True


In [6]:
molecules = molecules[~molecules[0].str.contains("Iteration")]

In [7]:
molecules

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,N#CCl,2.0,0.404573,0.677747,-0.689285,1.300077,0.029264,0.884302,0.951709,True,True,False
2,C#I=P12#P34(PI35=[SH]#P43=P(N)(=P)=NN(F)C31O5)...,0.0,0.116497,0.099583,-1.159525,-1.170163,0.000000,1.715745,1.003545,False,False,False
3,C[SH](#[PH]#CP(Br)CN)C(Cl)=I1=I#P(#N)OI(Cl)(C(...,0.0,0.124808,0.243159,-1.574953,-1.585591,0.000000,1.832623,0.546516,False,False,False
5,CC(S)N(F)C1=P2(Cl)(C#N)=P(=O)(F)=I1(N=[SH]Br)N2Cl,0.0,0.120079,0.229736,-1.215512,-1.226150,0.000000,1.738464,0.528047,False,False,False
6,CN(N)C1N2C(Br)C12Cl,2.0,0.220044,0.497788,-1.477122,0.512240,0.040188,1.106027,1.845820,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...
45515,CC=C(C=CC(S)=CC=CC1=CN1C)C1=CC=C1N,2.0,0.591782,0.595368,1.183565,3.172926,0.000142,0.003514,3.471982,True,True,True
45516,C=C1NCC2C(C3CC(N)C(C=O)C3)CN2C1C,2.0,0.693180,0.516870,1.386361,3.375723,0.001884,0.000753,3.739005,True,True,True
45517,COCCC1=CC2=C(Cl)C=CN(C2C)C(C)C1C,2.0,0.773161,0.518874,1.546322,3.535683,0.001684,0.008910,3.939542,True,True,True
45518,CCCOC1=NC1=C1C(Cl)=CC=CC=C1CO,2.0,0.838022,0.708149,1.676044,3.665405,0.004689,0.621927,4.642537,True,True,True


In [12]:
molecules_smiles = molecules[[0]].iloc[31000:,]

In [13]:
gcn_net = torch.load("dock_score_models/default_run/dock_score/best_model.pth")

In [14]:
gcn_net

GNN_MyGAT(
  (layers): ModuleList(
    (0): MyGATConv(121, 512, heads=1)
    (1): MyGATConv(512, 512, heads=1)
    (2): MyGATConv(512, 512, heads=1)
    (3): MyGATConv(512, 512, heads=1)
    (4): MyGATConv(512, 512, heads=1)
    (5): MyGATConv(512, 512, heads=1)
    (6): MyGATConv(512, 512, heads=1)
  )
  (final_layer): Linear(in_features=512, out_features=1, bias=True)
  (act): ReLU()
)

In [15]:
eval_data = MolData([0]*len(molecules_smiles), molecules_smiles[0].values)

In [16]:
eval_dataloader = DataLoader(eval_data, collate_fn = my_collate, batch_size = 512, num_workers =24)

In [17]:
torch.cat((torch.empty(0), torch.zeros(1)))

tensor([0.])

In [18]:
scores = torch.empty(0)
for i, (g1,y,g2) in enumerate(eval_dataloader):
    g1 = g1.to(DEVICE)
    g2 = g2.to(DEVICE)
    y_pred = gcn_net(g1, g2.edge_index)
    scores = torch.cat((scores, y_pred))

In [24]:
scores = scores.detach().numpy()

In [31]:
scores.shape

(10298,)

In [39]:
top_mols = scores.argsort()[:9]

In [40]:
top_smiles = molecules_smiles.iloc[top_mols,][0].values

In [48]:
top_molecules = [Chem.MolFromSmiles(i) for i in top_smiles]
img = Draw.MolsToGridImage(top_molecules, subImgSize=(300, 300), molsPerRow=3, useSVG=False)
img.save('TopMolecules.png')