In [1]:
!nvidia-smi

Wed Jan 13 11:42:06 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.33.01    Driver Version: 440.33.01    CUDA Version: 10.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla V100-PCIE...  Off  | 00000000:06:00.0 Off |                    0 |
| N/A   35C    P0    34W / 250W |   1017MiB / 16160MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   1  Tesla V100-PCIE...  Off  | 00000000:2F:00.0 Off |                    0 |
| N/A   65C    P0   106W / 250W |  14693MiB / 16160MiB |     56%      Default |
+-------------------------------+----------------------+----------------------+
|   2  Tesla V100-PCIE...  Off  | 00000000:86:00.0 Off |                    0 |
| N/A   

In [2]:
from My_Pka_Model import Pka_basic,Pka_acidic
import torch

import dgl
import dgllife
from torch.utils.data import DataLoader
from dgllife.utils import smiles_to_bigraph, CanonicalAtomFeaturizer, CanonicalBondFeaturizer
import torch.optim as optim
import numpy as np
import random 
import pandas as pd

from torch.nn.utils import clip_grad_norm

Using backend: pytorch


In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# seed = 0
# random.seed(seed) 
# torch.manual_seed(seed)
# torch.cuda.manual_seed(seed)
# torch.cuda.manual_seed_all(seed)  
# np.random.seed(seed)  
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

In [4]:
def collate(samples):
    graphs, labels = map(list, zip(*samples))
    batched_graph = dgl.batch(graphs)
    return batched_graph, torch.tensor(labels)

node_featurizer = CanonicalAtomFeaturizer(atom_data_field='h')
edge_featurizer = CanonicalBondFeaturizer(bond_data_field='h')

def load_data(file_name,batch_size = 128,shuffle = True,split_ratio = False):
    dataset = []
    with open(file_name) as f:
        for line in f.readlines():
            line = line.replace('\n','').split('\t')
            g = smiles_to_bigraph(smiles=line[0], 
                          node_featurizer=node_featurizer,
                          edge_featurizer=edge_featurizer,
                          canonical_atom_order= False)
            dataset.append((g,float(line[1])))
            
    if split_ratio == False:
        dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle,collate_fn=collate)
        return dataloader
    else:
        random.shuffle(dataset)
        length = len(dataset)
        dataloader_list = []
        for i in split_ratio:
            num = round(length * i)
            dataset_part = dataset[:num]
            dataset = dataset[num:]
            dataloader = DataLoader(dataset_part, batch_size=batch_size, shuffle=shuffle,collate_fn=collate)
            dataloader_list.append(dataloader)
        dataset_part = dataset
        dataloader = DataLoader(dataset_part, batch_size=batch_size, shuffle=shuffle,collate_fn=collate)
        dataloader_list.append(dataloader)
        return dataloader_list

In [5]:
batch_size = 1024
epoch_num = 1500
layer_num = 6
learning_rate = 0.0003
weight_decay = 0.0003

In [6]:
model = Pka_acidic(node_feat_size = 74,
                   edge_feat_size = 12,
                   output_size = 1,
                   num_layers= layer_num,
                   graph_feat_size=200,
                   dropout=0.2).to(device)

In [7]:
test_loader = load_data('./Dataset/acidic_test_0.15_smiles.txt',batch_size = 1358,shuffle = False)
print(len(test_loader.dataset))

1358


In [8]:
import csv

In [9]:
prediction_all = []
for i in range(1,4):
    model.load_state_dict(torch.load('./Trained_model/acidic_ramdom_split_{}.pkl'.format(i),map_location='cuda:0'))
    for epoch in range(200):
        with torch.no_grad():
            model.train()
            for iter, (bg, label) in enumerate(test_loader):
                bg = bg.to(device)
                prediction = model(bg,bg.ndata['h'], bg.edata['h'])
                prediction = torch.squeeze(prediction)
                label = label.to(device)
                prediction_all.append(prediction.tolist())
                
with open("./Uncertainty analysis/acidic_dropout_on_3*200.csv","w") as csvfile: 
    writer = csv.writer(csvfile)
    writer.writerows(prediction_all)

In [12]:
prediction_all = []
for i in range(1,4):
    model.load_state_dict(torch.load('./Trained_model/acidic_ramdom_split_{}.pkl'.format(i),map_location='cuda:0'))

    with torch.no_grad():
        model.eval()
        for iter, (bg, label) in enumerate(test_loader):
            bg = bg.to(device)
            prediction = model(bg,bg.ndata['h'], bg.edata['h'])
            prediction = torch.squeeze(prediction)
            label = label.to(device)
            prediction_all.append(prediction.tolist())
                
with open("./Uncertainty analysis/acidic_dropout_off_3.csv","w") as csvfile: 
    writer = csv.writer(csvfile)
    writer.writerows(prediction_all)
    
with open("./Uncertainty analysis/acidic_label.csv","w") as csvfile: 
    writer = csv.writer(csvfile)
    writer.writerows([label.tolist()])

In [18]:
import pandas as pd
import numpy as np

In [27]:
label = pd.read_csv('./Uncertainty analysis/acidic_label.csv',header = None)
label = np.array(label)
label

array([[ 9.68000031, 10.22999954,  8.52000046, ...,  4.09000015,
         9.44999981,  7.88000011]])

In [28]:
off = pd.read_csv('./Uncertainty analysis/acidic_dropout_off_3.csv',header = None)
off = np.array(off)
off

array([[9.38287544, 9.8661232 , 8.43481064, ..., 4.11568451, 8.11683464,
        8.23316574],
       [9.62586975, 9.88845539, 8.48023415, ..., 4.18097878, 8.30923653,
        8.60177422],
       [9.10630608, 9.86979103, 8.43633747, ..., 4.21827698, 8.58673   ,
        8.54954243]])

In [31]:
mae = np.abs(off - label)
mae = np.mean(mae,axis = 0)
mae

array([0.30831655, 0.35520967, 0.06953971, ..., 0.0816466 , 1.11239942,
       0.58149401])

In [37]:
on = pd.read_csv('./Uncertainty analysis/acidic_dropout_on_3*200.csv',header = None)
on = np.array(on)
std = np.std(on,axis = 0)
std 

array([0.56447071, 0.48982475, 0.42498841, ..., 0.21230105, 0.51446948,
       0.37643102])

In [40]:
data = np.vstack((std,mae))
data

array([[0.56447071, 0.48982475, 0.42498841, ..., 0.21230105, 0.51446948,
        0.37643102],
       [0.30831655, 0.35520967, 0.06953971, ..., 0.0816466 , 1.11239942,
        0.58149401]])

In [74]:
a = data.T[np.argsort(data.T[:,0])].T
a

array([[ 0.09633361,  0.10464576,  0.10520521, ...,  1.76425896,
         1.82101421,  1.84828022],
       [ 0.10219534,  0.51034347,  0.34387191, ...,  4.38559945,
        10.38639482,  2.23717769]])

In [75]:
mae_list = a[1,:].tolist()
mae_list

[0.1021953423817951,
 0.5103434721628823,
 0.34387191136678075,
 0.421610116958618,
 0.17532626787821468,
 0.14017740885416652,
 0.15070839722951254,
 0.616608460744222,
 0.1484889189402261,
 0.10480237007141113,
 0.7699175675710043,
 0.3843410809834798,
 0.3163474400838216,
 0.18458183606465658,
 0.6108167966206873,
 0.8371573289235431,
 0.46305664380391437,
 0.1464916865030923,
 0.1037287712097168,
 0.15959040323893214,
 0.8765346209208172,
 0.6549389362335204,
 0.1664546330769857,
 0.040471434593200684,
 0.18215513229370117,
 0.1539547443389894,
 0.36162233352661133,
 0.5570180813471476,
 0.10991477966308638,
 0.0945662657419833,
 0.0724522670110066,
 0.8419400850931805,
 0.6916170120239258,
 0.43589146931966133,
 0.20729358990987143,
 0.18129571278889975,
 0.05995726585388169,
 0.43550646305084206,
 0.20903245608011867,
 0.6764523983001708,
 0.9232291777928672,
 0.3885613282521567,
 0.3167304992675781,
 0.7835613091786703,
 0.5596748193105059,
 0.3150759538014725,
 0.73190410931905

In [98]:
for i in mae_list:
    print(i)

0.3320259253184001
1.6511554718017571
0.4777081807454427
0.3008320331573488
0.5112301508585614
0.08002845446268718
0.09539477030436243
0.178011894226076
1.7212216059366863
0.3755037784576419
0.13183911641438803
0.10270977020263672
0.278676748275757
0.07808065414428711
0.07788642247517889
0.2653064727783194
0.22814098993937174
0.05162890752156576
0.19341834386189719
0.4690437316894534
0.28718137741088884
0.14732853571573848
0.11589590708414743
0.30130004882812483
0.1862808863321943
0.04233980178833008
0.06594983736673991
0.5850493907928467
0.0701748530069984
0.09706751505533855
0.8165489832560219
0.08063171307245891
0.23952261606852213
0.0789941946665446
0.024687290191650984
0.09385458628336633
0.06723499298095703
0.13297446568806967
0.22886737187703451
0.4156833489735918
1.9012054602305082
0.6424246629079188
0.25297045707702637
0.4609752694765727
0.1368089516957601
0.12333138783772817
0.531986395517985
0.2291439374287926
0.14712397257486978
0.3238368034362796
0.08633025487264086
0.1343

In [79]:
sum = 0
counter = 0
mae_list_new = []
for i in mae_list:
    counter += 1
    sum += i
    mae_list_new.append(sum/counter)
    
mae_list_new.reverse()

In [80]:
for i in mae_list_new:
    print(i)

0.5641772580645322
0.5629443911287875
0.5556999586601332
0.5528734645674513
0.5503743724170236
0.5497009914343609
0.5490754346447743
0.5487329483780408
0.5476173671482152
0.5472130470091996
0.5472737966772281
0.5473606156153462
0.546481500330384
0.5457517750155007
0.545049864838513
0.544954135044059
0.5442996471407795
0.543728856738863
0.5434588366878489
0.5433687477448861
0.5429593713515108
0.542373522275576
0.5420778976509831
0.5418982915096586
0.5420408789571121
0.5418483584096397
0.5418195717057231
0.541742708910916
0.5412340817025354
0.541370670063314
0.5415013085734696
0.5406195172079464
0.5398462461518349
0.5384439132826505
0.5385538612455802
0.5384804910270818
0.5386211137816616
0.5374622828995235
0.5352259595313983
0.5354596910283198
0.5345195322940535
0.534363605230066
0.5344723087764724
0.528436316497841
0.5282362329691859
0.5269672212425707
0.5271070228569679
0.5272687637225122
0.5272430536732948
0.527275077468351
0.5273122367355118
0.5273408955773036
0.5273777339911737
0.5

In [81]:
model = Pka_basic(node_feat_size = 74,
                   edge_feat_size = 12,
                   output_size = 1,
                   num_layers= layer_num,
                   graph_feat_size=200,
                   dropout=0.2).to(device)

In [82]:
test_loader = load_data('./Dataset/basic_test_0.15_smiles.txt',batch_size = 1358,shuffle = False)
print(len(test_loader.dataset))

1266


In [83]:
import csv

In [84]:
prediction_all = []
for i in range(1,4):
    model.load_state_dict(torch.load('./Trained_model/basic_ramdom_split_{}.pkl'.format(i),map_location='cuda:0'))
    for epoch in range(200):
        with torch.no_grad():
            model.train()
            for iter, (bg, label) in enumerate(test_loader):
                bg = bg.to(device)
                prediction = model(bg,bg.ndata['h'], bg.edata['h'])
                prediction = torch.squeeze(prediction)
                label = label.to(device)
                prediction_all.append(prediction.tolist())
                
with open("./Uncertainty analysis/basic_dropout_on_3*200.csv","w") as csvfile: 
    writer = csv.writer(csvfile)
    writer.writerows(prediction_all)

In [85]:
prediction_all = []
for i in range(1,4):
    model.load_state_dict(torch.load('./Trained_model/basic_ramdom_split_{}.pkl'.format(i),map_location='cuda:0'))

    with torch.no_grad():
        model.eval()
        for iter, (bg, label) in enumerate(test_loader):
            bg = bg.to(device)
            prediction = model(bg,bg.ndata['h'], bg.edata['h'])
            prediction = torch.squeeze(prediction)
            label = label.to(device)
            prediction_all.append(prediction.tolist())
                
with open("./Uncertainty analysis/basic_dropout_off_3.csv","w") as csvfile: 
    writer = csv.writer(csvfile)
    writer.writerows(prediction_all)
    
with open("./Uncertainty analysis/basic_label.csv","w") as csvfile: 
    writer = csv.writer(csvfile)
    writer.writerows([label.tolist()])

In [None]:
import pandas as pd
import numpy as np

In [86]:
label = pd.read_csv('./Uncertainty analysis/basic_label.csv',header = None)
label = np.array(label)
label

array([[9.10000038, 4.78000021, 6.69999981, ..., 1.82000005, 7.69999981,
        8.75      ]])

In [87]:
off = pd.read_csv('./Uncertainty analysis/basic_dropout_off_3.csv',header = None)
off = np.array(off)
off

array([[9.31546688, 5.03594351, 6.93537188, ..., 1.80494189, 9.94443607,
        8.81002808],
       [8.98242664, 4.85555029, 6.75200129, ..., 1.57668257, 9.50246143,
        9.05798721],
       [9.13710594, 4.83638239, 6.88787603, ..., 1.56552517, 9.24804306,
        9.19177246]])

In [88]:
mae = np.abs(off - label)
mae = np.mean(mae,axis = 0)
mae

array([0.12338193, 0.12929185, 0.15841659, ..., 0.17095017, 1.86498038,
       0.26992925])

In [89]:
on = pd.read_csv('./Uncertainty analysis/basic_dropout_on_3*200.csv',header = None)
on = np.array(on)
std = np.std(on,axis = 0)
std 

array([0.46842382, 0.34069328, 0.43111008, ..., 0.38408847, 0.60782909,
       0.45655447])

In [90]:
data = np.vstack((std,mae))
data

array([[0.46842382, 0.34069328, 0.43111008, ..., 0.38408847, 0.60782909,
        0.45655447],
       [0.12338193, 0.12929185, 0.15841659, ..., 0.17095017, 1.86498038,
        0.26992925]])

In [91]:
a = data.T[np.argsort(data.T[:,0])].T
a

array([[0.21937609, 0.2268993 , 0.23046697, ..., 1.31290647, 1.42793953,
        1.53604818],
       [0.33202593, 1.65115547, 0.47770818, ..., 1.03642925, 1.23565706,
        2.3875863 ]])

In [92]:
mae_list = a[1,:].tolist()
mae_list

[0.3320259253184001,
 1.6511554718017571,
 0.4777081807454427,
 0.3008320331573488,
 0.5112301508585614,
 0.08002845446268718,
 0.09539477030436243,
 0.178011894226076,
 1.7212216059366863,
 0.3755037784576419,
 0.13183911641438803,
 0.10270977020263672,
 0.278676748275757,
 0.07808065414428711,
 0.07788642247517889,
 0.2653064727783194,
 0.22814098993937174,
 0.05162890752156576,
 0.19341834386189719,
 0.4690437316894534,
 0.28718137741088884,
 0.14732853571573848,
 0.11589590708414743,
 0.30130004882812483,
 0.1862808863321943,
 0.04233980178833008,
 0.06594983736673991,
 0.5850493907928467,
 0.0701748530069984,
 0.09706751505533855,
 0.8165489832560219,
 0.08063171307245891,
 0.23952261606852213,
 0.0789941946665446,
 0.024687290191650984,
 0.09385458628336633,
 0.06723499298095703,
 0.13297446568806967,
 0.22886737187703451,
 0.4156833489735918,
 1.9012054602305082,
 0.6424246629079188,
 0.25297045707702637,
 0.4609752694765727,
 0.1368089516957601,
 0.12333138783772817,
 0.5319863

In [94]:
sum = 0
counter = 0
mae_list_new = []
for i in mae_list:
    counter += 1
    sum += i
    mae_list_new.append(sum/counter)
    
mae_list_new.reverse()

In [95]:
for i in mae_list_new:
    print(i)

0.5481739071981998
0.5467198262587479
0.5461747809819173
0.5457866143427369
0.5450854070983503
0.5439885152967165
0.5366429793648416
0.5355824538286369
0.534562928312387
0.5342678695643482
0.5333990075693693
0.533003291795908
0.5324363333981624
0.5315035652977751
0.5310131247339844
0.5307601607741391
0.5305924213364717
0.5295858071350932
0.5263079210800417
0.5258537663972865
0.5256192890385041
0.5255056616966141
0.5233065429530905
0.5225387377008596
0.5222806458746869
0.5211229234480571
0.520935887654602
0.5207595800615609
0.5200737980969234
0.5197777224936008
0.5185265645034158
0.5183848561017738
0.518159213780789
0.5178710793389978
0.5178453565945088
0.5174863519635772
0.5173808480095529
0.5157729856177842
0.5153715475806184
0.5149927127760237
0.5146991938343466
0.5111447483640743
0.5105700885165836
0.5098246786119978
0.5089945154441379
0.5087769537194928
0.5083047193190069
0.5075599829680829
0.5074763388011538
0.5069735236246553
0.5069873003516825
0.5068681477452784
0.50676152799258