In [1]:
from My_Pka_Model import Pka_basic_view,Pka_acidic_view
import torch
from dgllife.utils import smiles_to_bigraph, CanonicalAtomFeaturizer, CanonicalBondFeaturizer

from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error

Using backend: pytorch


In [2]:
def predict(smiles,model_view):

    node_featurizer = CanonicalAtomFeaturizer(atom_data_field='h')
    edge_featurizer = CanonicalBondFeaturizer(bond_data_field='h')
    bg = smiles_to_bigraph(smiles= smiles, 
                  node_featurizer=node_featurizer,
                  edge_featurizer=edge_featurizer,canonical_atom_order= False)

    with torch.no_grad():
        model_view.eval()
        molecule_pka,atom_pka = model_view(bg,bg.ndata['h'], bg.edata['h'])
        
    return molecule_pka,atom_pka

In [3]:
acid_pred = Pka_acidic_view(node_feat_size = 74,
                            edge_feat_size = 12,
                            output_size = 1,
                            num_layers= 6,
                            graph_feat_size=200,
                            dropout=0.2)

In [None]:
acid_pred.load_state_dict(torch.load('./Trained_model/acidic_ramdom_split_{}.pkl'.format(i),map_location='cuda:0'))
print('time: '+ str(i))
with open('./Dataset/SAMPL6_acidic_smiles.txt') as f: #acidic_test_0.15_smiles.txt,SAMPL7_acidic_smiles.txt
    pred = []
    label = []
    for line in f.readlines():
        line = line.replace('\n','').split('\t')
        molecule_pka,atom_pka = predict(line[0],acid_pred)
        print(molecule_pka)
        pred.append(molecule_pka)
        label.append(float(line[1]))

print('')
print(mean_absolute_error(pred,label))
print(mean_squared_error(pred,label)**0.5)
print(r2_score(pred,label))
print('')

In [4]:
for i in range(1,4):
    acid_pred.load_state_dict(torch.load('./Trained_model/acidic_ramdom_split_{}.pkl'.format(i),map_location='cuda:0'))
    print('time: '+ str(i))
    with open('./Dataset/SAMPL6_acidic_smiles.txt') as f: #acidic_test_0.15_smiles.txt,SAMPL7_acidic_smiles.txt
        pred = []
        label = []
        for line in f.readlines():
            line = line.replace('\n','').split('\t')
            molecule_pka,atom_pka = predict(line[0],acid_pred)
            print(molecule_pka)
            pred.append(molecule_pka)
            label.append(float(line[1]))

    print('')
    print(mean_absolute_error(pred,label))
    print(mean_squared_error(pred,label)**0.5)
    print(r2_score(pred,label))
    print('')

time: 1
8.67052936553955
7.8257012367248535
11.976712226867676
3.805478572845459
9.641075134277344
9.602269172668457
12.049108505249023
9.807235717773438
9.935364723205566
5.174829483032227
7.877353191375732

0.5976347715204412
0.678635531570444
0.9210780225330559

time: 2
8.631477355957031
7.473364353179932
11.269383430480957
4.0138840675354
10.418246269226074
8.812257766723633
11.944774627685547
9.283881187438965
10.134147644042969
4.6184186935424805
8.090373992919922

0.678327671397816
0.7947522059276297
0.889540275595077

time: 3
8.37906265258789
7.537794589996338
11.740337371826172
4.198156833648682
9.795727729797363
9.419637680053711
12.454751014709473
9.472614288330078
9.965655326843262
6.481783390045166
8.317770957946777

0.6303294806046918
0.8122688633934673
0.8647461301493476



In [5]:
for i in range(1,4):
    acid_pred.load_state_dict(torch.load('./Trained_model/acidic_ramdom_split_{}.pkl'.format(i),map_location='cuda:0'))
    print('time: '+ str(i))
    with open('./Dataset/SAMPL7_acidic_smiles.txt') as f: #acidic_test_0.15_smiles.txt,SAMPL7_acidic_smiles.txt
        pred = []
        label = []
        for line in f.readlines():
            line = line.replace('\n','').split('\t')
            molecule_pka,atom_pka = predict(line[0],acid_pred)
            print(molecule_pka)
            pred.append(molecule_pka)
            label.append(float(line[1]))

    print('')
    print(mean_absolute_error(pred,label))
    print(mean_squared_error(pred,label)**0.5)
    print(r2_score(pred,label))
    print('')

time: 1
4.693657398223877
4.951440811157227
12.197287559509277
12.228243827819824
11.542144775390625
12.505785942077637
12.007244110107422
12.335575103759766
11.110367774963379
10.304190635681152
11.222630500793457
10.159642219543457
9.264741897583008
9.990362167358398
5.937761306762695
5.1273651123046875
6.235569477081299
5.721826553344727
4.819912910461426
6.274832725524902

0.9146612205505373
1.075912669082801
0.868127394696077

time: 2
4.431005001068115
4.878375053405762
10.462681770324707
10.519570350646973
9.74099349975586
11.187289237976074
10.360854148864746
11.043974876403809
10.608086585998535
10.177499771118164
11.022433280944824
10.539410591125488
9.694185256958008
11.13106918334961
6.0614800453186035
5.377483367919922
6.810568809509277
5.648723602294922
4.916687488555908
6.523675918579102

0.6165741624832154
0.7612511245617858
0.9078619721459777

time: 3
4.755572319030762
5.081221103668213
10.792909622192383
11.206064224243164
10.085659980773926
11.865121841430664
11.23481

In [6]:
for i in range(1,4):
    acid_pred.load_state_dict(torch.load('./Trained_model/acidic_ramdom_split_{}.pkl'.format(i),map_location='cuda:0'))

    with open('./Dataset/acidic_test_0.15_smiles.txt') as f: #acidic_test_0.15_smiles.txt,SAMPL6_test_acidic.txt
        pred = []
        label = []
        for line in f.readlines():
            line = line.replace('\n','').split('\t')
            molecule_pka,atom_pka = predict(line[0],acid_pred)
            #print(molecule_pka,float(line[1]))
            pred.append(molecule_pka)
            label.append(float(line[1]))

    print('time: '+ str(i))
    print(mean_absolute_error(pred,label))
    print(mean_squared_error(pred,label)**0.5)
    print(r2_score(pred,label))



time: 1
0.5631147899948855
0.9482059244366742
0.9221193673834996
time: 2
0.5616683274900125
0.9205022739301771
0.926579490756474
time: 3
0.5677474436867235
0.9351637811552465
0.9242672538599793


In [5]:
for i in range(1,6):
    acid_pred.load_state_dict(torch.load('./Trained_model/non_B_try_{}.pkl'.format(i),map_location='cuda:0'))

    with open('./Dataset/Phenylboronic_acid_smiles(22).txt') as f: #acidic_test_0.15_smiles.txt,SAMPL6_test_acidic.txt
        pred = []
        label = []
        for line in f.readlines():
            line = line.replace('\n','').split('\t')
            molecule_pka,atom_pka = predict(line[0],acid_pred)
            #print(molecule_pka,float(line[1]))
            pred.append(molecule_pka)
            label.append(float(line[1]))

    print('time: '+ str(i))
    print(mean_absolute_error(pred,label))
    print(mean_squared_error(pred,label)**0.5)
    print(r2_score(pred,label))


time: 1
0.5338740305467087
0.6763158919725007
0.6511761113450325
time: 2
0.54870530908758
0.7662336647128798
0.17656255464603998
time: 3
0.3649598251689564
0.5468604689056408
0.8273342469443705
time: 4
0.5141829187219794
0.6620796493997997
0.6481016477253337
time: 5
0.5389714934609152
0.6628636925193704
0.7451528429992658


In [7]:
base_pred = Pka_basic_view(node_feat_size = 74,
                            edge_feat_size = 12,
                            output_size = 1,
                            num_layers= 6,
                            graph_feat_size=200,
                            dropout=0.2)

In [8]:

for i in range(1,4):
    base_pred.load_state_dict(torch.load('./Trained_model/basic_ramdom_split_{}.pkl'.format(i),map_location='cuda:0'))
    print('time: '+ str(i))
    with open('./Dataset/SAMPL6_basic_smiles.txt') as f: #acidic_test_0.15_smiles.txt,SAMPL6_test_acidic.txt
        pred = []
        label = []
        for line in f.readlines():
            line = line.replace('\n','').split('\t')
            molecule_pka,atom_pka = predict(line[0],base_pred)
            print(molecule_pka)
            pred.append(molecule_pka)
            label.append(float(line[1]))

    print('')
    print(mean_absolute_error(pred,label))
    print(mean_squared_error(pred,label)**0.5)
    print(r2_score(pred,label))
    print('')

time: 1
3.8340981006622314
4.930722236633301
4.591391563415527
3.862032413482666
4.930919647216797
3.9746387004852295
3.830474376678467
3.8794164657592773
5.066303730010986
5.347182273864746
4.897919178009033
5.256861209869385
3.463160514831543
2.771059274673462
3.598081350326538
2.5035266876220703
5.473254680633545
3.509474515914917

0.5915269602669609
0.7695206414650383
0.20172800117143952

time: 2
4.233867168426514
5.548564434051514
4.558190822601318
3.1008951663970947
5.558676242828369
4.429934024810791
3.878787040710449
4.27506160736084
4.779202938079834
4.816225528717041
4.1727614402771
5.350734710693359
3.2919929027557373
2.83524489402771
3.2453174591064453
2.646540403366089
5.100139617919922
3.339419364929199

0.49314609421624084
0.5994953862508009
0.5615887032587248

time: 3
4.259359836578369
5.221678733825684
4.6791791915893555
3.76163387298584
5.221678733825684
4.260782241821289
3.6289143562316895
4.256599426269531
5.032618045806885
5.603991985321045
4.757470607757568
5.4713

In [9]:
for i in range(1,4):
    base_pred.load_state_dict(torch.load('./Trained_model/basic_ramdom_split_{}.pkl'.format(i),map_location='cuda:0'))

    with open('./Dataset/basic_test_0.15_smiles.txt') as f: #acidic_test_0.15_smiles.txt,SAMPL6_test_acidic.txt
        pred = []
        label = []
        for line in f.readlines():
            line = line.replace('\n','').split('\t')
            molecule_pka,atom_pka = predict(line[0],base_pred)
            #print(molecule_pka,float(line[1]))
            pred.append(molecule_pka)
            label.append(float(line[1]))

    print('time: '+ str(i))
    print(mean_absolute_error(pred,label))
    print(mean_squared_error(pred,label)**0.5)
    print(r2_score(pred,label))

time: 1
0.5513672611376628
0.9110421235773952
0.9080233121048138
time: 2
0.5430566594588307
0.8652425432779225
0.9147038803394035
time: 3
0.5500978914092157
0.8816145256831921
0.9138485570125634


In [10]:
for i in range(1,4):
    base_pred.load_state_dict(torch.load('./Trained_model/basic_ramdom_split_{}.pkl'.format(i),map_location='cuda:0'))

    with open('./Dataset/Jensen_basic_smiles.txt') as f: #acidic_test_0.15_smiles.txt,SAMPL6_test_acidic.txt
        pred = []
        label = []
        for line in f.readlines():
            line = line.replace('\n','').split('\t')
            molecule_pka,atom_pka = predict(line[0],base_pred)
            #print(molecule_pka,float(line[1]))
            pred.append(molecule_pka)
            label.append(float(line[1]))

    print('time: '+ str(i))
    print(mean_absolute_error(pred,label))
    print(mean_squared_error(pred,label)**0.5)
    print(r2_score(pred,label))

time: 1
0.3567237470460973
0.507098781434984
0.9366450482219731
time: 2
0.35316574573516846
0.5301852122144557
0.9289659473127811
time: 3
0.3710230671841163
0.546624469817794
0.9260769434136331


In [1]:
from My_Pka_Model import Pka_basic_view,Pka_acidic_view
import torch
from dgllife.utils import smiles_to_bigraph, CanonicalAtomFeaturizer, CanonicalBondFeaturizer

from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error

Using backend: pytorch


In [2]:
def predict(smiles,model_view):

    node_featurizer = CanonicalAtomFeaturizer(atom_data_field='h')
    edge_featurizer = CanonicalBondFeaturizer(bond_data_field='h')
    bg = smiles_to_bigraph(smiles= smiles, 
                  node_featurizer=node_featurizer,
                  edge_featurizer=edge_featurizer,canonical_atom_order= False)

    with torch.no_grad():
        model_view.eval()
        molecule_pka,atom_pka = model_view(bg,bg.ndata['h'], bg.edata['h'])
        
    return molecule_pka,atom_pka

In [3]:
acid_pred = Pka_acidic_view(node_feat_size = 74,
                            edge_feat_size = 12,
                            output_size = 1,
                            num_layers= 6,
                            graph_feat_size=200,
                            dropout=0.2)

In [4]:
acid_pred=torch.load('./Trained_model/acid_pred.pkl')

with open('./Dataset/SAMPL7_acidic_smiles.txt') as f: #acidic_test_0.15_smiles.txt,SAMPL7_acidic_smiles.txt
    pred = []
    label = []
    for line in f.readlines():
        line = line.replace('\n','').split('\t')
        molecule_pka,atom_pka = predict(line[0],acid_pred)
        print(molecule_pka)
        pred.append(molecule_pka)
        label.append(float(line[1]))

print('')
print(mean_absolute_error(pred,label))
print(mean_squared_error(pred,label)**0.5)
print(r2_score(pred,label))
print('')



4.761752128601074
5.2170305252075195
11.26727294921875
10.702266693115234
10.005017280578613
11.165238380432129
10.458869934082031
10.912881851196289
9.3477144241333
9.120635032653809
10.461565017700195
8.758251190185547
8.480949401855469
9.497883796691895
5.0338664054870605
4.833774566650391
6.3165974617004395
4.790356159210205
4.623432159423828
6.1063079833984375

0.6589761419296264
0.8533253869449401
0.8822999540969091



In [5]:
acid_pred=torch.load('./Trained_model/acid_pred.pkl')

with open('./Dataset/SAMPL6_acidic_smiles.txt') as f: #acidic_test_0.15_smiles.txt,SAMPL7_acidic_smiles.txt
    pred = []
    label = []
    for line in f.readlines():
        line = line.replace('\n','').split('\t')
        molecule_pka,atom_pka = predict(line[0],acid_pred)
        print(molecule_pka)
        pred.append(molecule_pka)
        label.append(float(line[1]))

print('')
print(mean_absolute_error(pred,label))
print(mean_squared_error(pred,label)**0.5)
print(r2_score(pred,label))
print('')



8.63145923614502
7.254913330078125
11.661334037780762
3.811230182647705
9.551641464233398
8.686821937561035
12.492609977722168
9.147997856140137
9.21206283569336
6.983104228973389
7.8885111808776855

0.6154430996287953
0.7944162676825823
0.8712716306668987



In [None]:
11

In [6]:
base_pred = Pka_basic_view(node_feat_size = 74,
                            edge_feat_size = 12,
                            output_size = 1,
                            num_layers= 6,
                            graph_feat_size=200,
                            dropout=0.2)

In [7]:
base_pred=torch.load('./Trained_model/base_pred.pkl')

with open('./Dataset/SAMPL6_basic_smiles.txt') as f: #acidic_test_0.15_smiles.txt,SAMPL6_test_acidic.txt
    pred = []
    label = []
    for line in f.readlines():
        line = line.replace('\n','').split('\t')
        molecule_pka,atom_pka = predict(line[0],base_pred)
        print(molecule_pka)
        pred.append(molecule_pka)
        label.append(float(line[1]))

print('')
print(mean_absolute_error(pred,label))
print(mean_squared_error(pred,label)**0.5)
print(r2_score(pred,label))
print('')



4.673941135406494
5.243436336517334
3.7530205249786377
2.6559715270996094
5.270466327667236
4.68306303024292
3.767454147338867
4.652645111083984
5.194212436676025
4.870110034942627
4.719622611999512
5.155691146850586
3.50388765335083
3.124019145965576
3.892946481704712
2.5847835540771484
6.120266437530518
3.9156174659729004

0.5291799237993028
0.6232898545493251
0.566707090716286



In [None]:
18*0.529 + 