In [1]:
import torch
import random
import pandas as pd
import torch
from torch_geometric.utils import from_smiles
import random
import pandas as pd
import numpy as np
from tqdm import tqdm




def split_list(input_list, ratio):
    input_list_copy = input_list.copy()
    total_length = len(input_list_copy)
    a_length = int(ratio[0] * total_length)
    b_length = int(ratio[1] * total_length)
    
    #random.seed(random_state)
    random.shuffle(input_list_copy)
    part_a = input_list_copy[:a_length]
    part_b = input_list_copy[a_length:a_length + b_length]
    part_c = input_list_copy[a_length + b_length:]

    return part_a, part_b, part_c

def load_dataset(filename,x_name,y_name1,y_name2):
    table=pd.read_csv(filename)
    result=[]
    for i in tqdm(range(table.shape[0])):
        cur_data=from_smiles(table[x_name][i])
        cur_data.y=torch.tensor([[table[y_name1][i],table[y_name2][i]]],dtype=torch.float32)
        result.append(cur_data)
    return result

def load_splited_dataset(filename,x_name,y_name1,y_name2,ratio=(0.8,0.1,0.1)):
    all_list=load_dataset(filename,x_name,y_name1,y_name2)
    return split_list(all_list,ratio)

In [2]:
import torch
from torch.nn import Linear, ReLU
from torch_geometric.nn import DenseGCNConv, global_mean_pool,dense_diff_pool

class GNN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels,normalize=False,lin=True):
        super(GNN, self).__init__()
        
        self.convs = torch.nn.ModuleList()
        self.convs.append(DenseGCNConv(in_channels, hidden_channels,normalize))
        self.convs.append(DenseGCNConv(hidden_channels, out_channels,normalize))
        self.act=ReLU()

    def forward(self, x, adj, mask=None):
        
        for step in range(len(self.convs)):
            x = self.act(self.convs[step](x, adj))
        
        return x



class DiffPool(torch.nn.Module):
    def __init__(self,in_channels, hidden_channels=32, out_channels=2,num_nodes=3):
        super(DiffPool, self).__init__()

        self.gnn1_pool = GNN(in_channels, hidden_channels, num_nodes)
        self.gnn1_embed = GNN(in_channels, hidden_channels, hidden_channels)

        self.gnn2_embed = GNN(hidden_channels, hidden_channels, hidden_channels)

        self.lin1 = torch.nn.Linear(hidden_channels, hidden_channels)
        self.lin2 = torch.nn.Linear(hidden_channels, out_channels)
        self.act=ReLU()

    def forward(self, x, adj, mask=None):
        s = self.gnn1_pool(x, adj, mask)
        x = self.gnn1_embed(x, adj, mask)

        x, adj, l1, e1 = dense_diff_pool(x, adj, s, mask)

        x = self.gnn2_embed(x, adj)

        x = x.mean(dim=1)
        x = self.act(self.lin1(x))
        x = self.lin2(x)
        return x
    

In [3]:
from torch_geometric.utils import to_dense_batch,to_dense_adj

def train(model, loader, optimizer, loss_func):

    model.train()
    mae=0
    for data in loader:
        data.x=to_dense_batch(data.x,batch=data.batch,max_num_nodes=max_nodes)[0].float()
        data.adj=to_dense_adj(data.edge_index,batch=data.batch,max_num_nodes=max_nodes)
        data=data.cuda()
        optimizer.zero_grad()
        pred=model(data.x, data.adj)
        mae+=torch.sum(torch.abs(pred-data.y)).item()/2
        loss=loss_func(pred,data.y)
        loss.backward()
        optimizer.step()

    return model,mae/len(loader.dataset)

def test(model, loader):
    model.eval()
    mae=0
    with torch.no_grad():
        for data in loader:
            data.x=to_dense_batch(data.x,batch=data.batch,max_num_nodes=max_nodes)[0].float()
            data.adj=to_dense_adj(data.edge_index,batch=data.batch,max_num_nodes=max_nodes)
            data=data.cuda()
            out = model(data.x,data.adj)  
            mae+=torch.sum(torch.abs(out-data.y)).item()/2
        
    return mae / len(loader.dataset)

# QM9

In [4]:
from torch_geometric.seed import seed_everything

max_nodes=9
QM9=load_dataset("./dataset/QM9/QM9.csv","smiles","homo","lumo")

100%|████████████████████████████████████████████████████████████████████████| 133247/133247 [01:06<00:00, 2013.99it/s]


### Exp 1

In [5]:
seed_everything(1222)
QM9_train,QM9_val,QM9_test=split_list(QM9,(0.8,0.1,0.1))

In [6]:
from torch_geometric.loader import DenseDataLoader,DataLoader

train_loader=DataLoader(QM9_train,batch_size=128, shuffle=True)
val_loader = DataLoader(QM9_val, batch_size=1024, shuffle=False)
test_loader = DataLoader(QM9_test, batch_size=1024, shuffle=False)
num_features=QM9_train[0].num_node_features


In [7]:
from torch.nn import MSELoss
import time

diffpool=DiffPool(num_features)
optimizer=torch.optim.Adam(params=diffpool.parameters(), lr=0.001)
loss_fn=MSELoss()

diffpool=diffpool.cuda()
loss_fn=loss_fn.cuda()

epochs=50
train_curve=[]
val_curve=[]
test_curve=[]
for i in range(1,epochs+1):
    t_start=time.time()
    diffpool,train_mae = train(diffpool, train_loader, optimizer, loss_fn)
    val_mae=test(diffpool,val_loader)
    test_mae=test(diffpool,test_loader)
    t_end=time.time()
    print("Epoch %d"%i)
    print("Train MAE:%f    Val MAE:%f    Test MAE:%f     Time:%f s"%(train_mae,val_mae,test_mae,t_end-t_start))
    
    train_curve.append(train_mae)
    val_curve.append(val_mae)
    test_curve.append(test_mae)

Epoch 1
Train MAE:0.024062    Val MAE:0.017624    Test MAE:0.017495     Time:13.435047 s
Epoch 2
Train MAE:0.017118    Val MAE:0.016368    Test MAE:0.016327     Time:14.767745 s
Epoch 3
Train MAE:0.016203    Val MAE:0.015188    Test MAE:0.015100     Time:15.179350 s
Epoch 4
Train MAE:0.015326    Val MAE:0.014575    Test MAE:0.014571     Time:14.860917 s
Epoch 5
Train MAE:0.014662    Val MAE:0.014998    Test MAE:0.014989     Time:15.049303 s
Epoch 6
Train MAE:0.014148    Val MAE:0.014855    Test MAE:0.014791     Time:14.264054 s
Epoch 7
Train MAE:0.013586    Val MAE:0.012989    Test MAE:0.012967     Time:14.898383 s
Epoch 8
Train MAE:0.013336    Val MAE:0.012861    Test MAE:0.012921     Time:14.506850 s
Epoch 9
Train MAE:0.012828    Val MAE:0.012106    Test MAE:0.012167     Time:15.141315 s
Epoch 10
Train MAE:0.012647    Val MAE:0.012037    Test MAE:0.012080     Time:14.699220 s
Epoch 11
Train MAE:0.012340    Val MAE:0.011917    Test MAE:0.012031     Time:14.473908 s
Epoch 12
Train MAE:

In [9]:
epochs=50
optimizer=torch.optim.Adam(params=diffpool.parameters(), lr=0.0005)
train_loader = DataLoader(QM9_train, batch_size=32, shuffle=True)
for i in range(1,epochs+1):
    t_start=time.time()
    diffpool,train_mae = train(diffpool, train_loader, optimizer, loss_fn)
    val_mae=test(diffpool,val_loader)
    test_mae=test(diffpool,test_loader)
    t_end=time.time()
    print("Epoch %d"%i)
    print("Train MAE:%f    Val MAE:%f    Test MAE:%f     Time:%f s"%(train_mae,val_mae,test_mae,t_end-t_start))
    
    train_curve.append(train_mae)
    val_curve.append(val_mae)
    test_curve.append(test_mae)

Epoch 1
Train MAE:0.009577    Val MAE:0.009336    Test MAE:0.009388     Time:35.865411 s
Epoch 2
Train MAE:0.009590    Val MAE:0.009116    Test MAE:0.009225     Time:35.812259 s
Epoch 3
Train MAE:0.009572    Val MAE:0.010674    Test MAE:0.010776     Time:34.016861 s
Epoch 4
Train MAE:0.009503    Val MAE:0.009131    Test MAE:0.009231     Time:34.772415 s
Epoch 5
Train MAE:0.009464    Val MAE:0.009138    Test MAE:0.009234     Time:34.533585 s
Epoch 6
Train MAE:0.009454    Val MAE:0.009601    Test MAE:0.009669     Time:32.897979 s
Epoch 7
Train MAE:0.009411    Val MAE:0.009297    Test MAE:0.009370     Time:32.506943 s
Epoch 8
Train MAE:0.009493    Val MAE:0.009485    Test MAE:0.009623     Time:34.493815 s
Epoch 9
Train MAE:0.009337    Val MAE:0.009380    Test MAE:0.009427     Time:35.315694 s
Epoch 10
Train MAE:0.009358    Val MAE:0.009143    Test MAE:0.009197     Time:35.118951 s
Epoch 11
Train MAE:0.009329    Val MAE:0.008814    Test MAE:0.008906     Time:32.725700 s
Epoch 12
Train MAE:

In [10]:
with open("./result/DIFF_QM9_1.csv","w") as fo:
    fo.write("epoch,train,val,test\n")
    for i in range(len(train_curve)):
        fo.write("%d,%f,%f,%f\n"%(i+1,train_curve[i],val_curve[i],test_curve[i]))

In [18]:
from torch_geometric.data import Batch

smiles_list=["C[C@H]1CCC(=O)OC1",
             "CCC1(O)CC=CC1"]

def get_S(smile_list,model):
    data_list=[]
    for smile in smiles_list:
        data_list.append(from_smiles(smile))
    batch=Batch.from_data_list(data_list)
    batch.x=to_dense_batch(batch.x,batch=batch.batch,max_num_nodes=max_nodes)[0].float()
    batch.adj=to_dense_adj(batch.edge_index,batch=batch.batch,max_num_nodes=max_nodes)
    batch=batch.cuda()

    model.eval()
    with torch.no_grad():
        S=model.gnn1_pool(batch.x,batch.adj)
    return S.cpu()

get_S(smiles_list,diffpool)

tensor([[[2.9548, 0.0000, 7.0829],
         [4.6630, 0.0000, 9.3750],
         [4.0158, 0.0000, 8.3664],
         [3.6069, 0.0000, 6.2083],
         [2.7244, 0.0000, 4.3757],
         [0.2756, 0.0000, 0.3999],
         [3.8084, 0.0000, 5.0550],
         [4.2172, 0.0000, 7.2131],
         [0.0000, 0.0000, 0.0000]],

        [[2.9893, 0.0000, 7.1162],
         [4.1858, 0.0000, 8.4671],
         [5.2983, 0.0000, 8.3270],
         [2.9852, 0.0000, 3.4018],
         [4.6228, 0.0000, 7.5535],
         [4.3397, 0.0000, 7.4099],
         [4.3397, 0.0000, 7.4099],
         [4.6228, 0.0000, 7.5535],
         [0.0000, 0.0000, 0.0000]]])

### Exp 2

In [19]:
seed_everything(324)
QM9_train,QM9_val,QM9_test=split_list(QM9,(0.8,0.1,0.1))

In [20]:
from torch_geometric.loader import DenseDataLoader,DataLoader

train_loader=DataLoader(QM9_train,batch_size=128, shuffle=True)
val_loader = DataLoader(QM9_val, batch_size=1024, shuffle=False)
test_loader = DataLoader(QM9_test, batch_size=1024, shuffle=False)
num_features=QM9_train[0].num_node_features

In [21]:
from torch.nn import MSELoss
import time

diffpool=DiffPool(num_features)
optimizer=torch.optim.Adam(params=diffpool.parameters(), lr=0.001)
loss_fn=MSELoss()

diffpool=diffpool.cuda()
loss_fn=loss_fn.cuda()

epochs=50
train_curve=[]
val_curve=[]
test_curve=[]
for i in range(1,epochs+1):
    t_start=time.time()
    diffpool,train_mae = train(diffpool, train_loader, optimizer, loss_fn)
    val_mae=test(diffpool,val_loader)
    test_mae=test(diffpool,test_loader)
    t_end=time.time()
    print("Epoch %d"%i)
    print("Train MAE:%f    Val MAE:%f    Test MAE:%f     Time:%f s"%(train_mae,val_mae,test_mae,t_end-t_start))
    
    train_curve.append(train_mae)
    val_curve.append(val_mae)
    test_curve.append(test_mae)

Epoch 1
Train MAE:0.020758    Val MAE:0.017022    Test MAE:0.016854     Time:12.850177 s
Epoch 2
Train MAE:0.016281    Val MAE:0.014884    Test MAE:0.014830     Time:12.685997 s
Epoch 3
Train MAE:0.015422    Val MAE:0.014831    Test MAE:0.014842     Time:12.650656 s
Epoch 4
Train MAE:0.014874    Val MAE:0.013747    Test MAE:0.013776     Time:13.121297 s
Epoch 5
Train MAE:0.014257    Val MAE:0.013179    Test MAE:0.013166     Time:13.513441 s
Epoch 6
Train MAE:0.013803    Val MAE:0.013720    Test MAE:0.013738     Time:13.544003 s
Epoch 7
Train MAE:0.013385    Val MAE:0.013176    Test MAE:0.013098     Time:13.957102 s
Epoch 8
Train MAE:0.013103    Val MAE:0.012766    Test MAE:0.012695     Time:13.892344 s
Epoch 9
Train MAE:0.012788    Val MAE:0.012681    Test MAE:0.012605     Time:13.700646 s
Epoch 10
Train MAE:0.012597    Val MAE:0.012140    Test MAE:0.012064     Time:13.980258 s
Epoch 11
Train MAE:0.012278    Val MAE:0.012270    Test MAE:0.012174     Time:13.884907 s
Epoch 12
Train MAE:

In [22]:
epochs=50
optimizer=torch.optim.Adam(params=diffpool.parameters(), lr=0.0005)
train_loader = DataLoader(QM9_train, batch_size=32, shuffle=True)
for i in range(1,epochs+1):
    t_start=time.time()
    diffpool,train_mae = train(diffpool, train_loader, optimizer, loss_fn)
    val_mae=test(diffpool,val_loader)
    test_mae=test(diffpool,test_loader)
    t_end=time.time()
    print("Epoch %d"%i)
    print("Train MAE:%f    Val MAE:%f    Test MAE:%f     Time:%f s"%(train_mae,val_mae,test_mae,t_end-t_start))
    
    train_curve.append(train_mae)
    val_curve.append(val_mae)
    test_curve.append(test_mae)

Epoch 1
Train MAE:0.009496    Val MAE:0.010511    Test MAE:0.010354     Time:33.155694 s
Epoch 2
Train MAE:0.009405    Val MAE:0.009575    Test MAE:0.009529     Time:33.679666 s
Epoch 3
Train MAE:0.009386    Val MAE:0.008937    Test MAE:0.008816     Time:36.936462 s
Epoch 4
Train MAE:0.009333    Val MAE:0.009628    Test MAE:0.009585     Time:34.060930 s
Epoch 5
Train MAE:0.009261    Val MAE:0.010397    Test MAE:0.010372     Time:33.644606 s
Epoch 6
Train MAE:0.009232    Val MAE:0.009119    Test MAE:0.009033     Time:34.029007 s
Epoch 7
Train MAE:0.009203    Val MAE:0.010691    Test MAE:0.010613     Time:37.026620 s
Epoch 8
Train MAE:0.009144    Val MAE:0.008684    Test MAE:0.008555     Time:34.136580 s
Epoch 9
Train MAE:0.009123    Val MAE:0.009072    Test MAE:0.009019     Time:33.657645 s
Epoch 10
Train MAE:0.009033    Val MAE:0.009086    Test MAE:0.008967     Time:33.569898 s
Epoch 11
Train MAE:0.009021    Val MAE:0.009308    Test MAE:0.009210     Time:38.066563 s
Epoch 12
Train MAE:

In [23]:
with open("./result/DIFF_QM9_2.csv","w") as fo:
    fo.write("epoch,train,val,test\n")
    for i in range(len(train_curve)):
        fo.write("%d,%f,%f,%f\n"%(i+1,train_curve[i],val_curve[i],test_curve[i]))

### Exp 3

In [24]:
seed_everything(10086)
QM9_train,QM9_val,QM9_test=split_list(QM9,(0.8,0.1,0.1))

In [25]:
from torch_geometric.loader import DenseDataLoader,DataLoader

train_loader=DataLoader(QM9_train,batch_size=128, shuffle=True)
val_loader = DataLoader(QM9_val, batch_size=1024, shuffle=False)
test_loader = DataLoader(QM9_test, batch_size=1024, shuffle=False)
num_features=QM9_train[0].num_node_features

In [26]:
from torch.nn import MSELoss
import time

diffpool=DiffPool(num_features)
optimizer=torch.optim.Adam(params=diffpool.parameters(), lr=0.001)
loss_fn=MSELoss()

diffpool=diffpool.cuda()
loss_fn=loss_fn.cuda()

epochs=50
train_curve=[]
val_curve=[]
test_curve=[]
for i in range(1,epochs+1):
    t_start=time.time()
    diffpool,train_mae = train(diffpool, train_loader, optimizer, loss_fn)
    val_mae=test(diffpool,val_loader)
    test_mae=test(diffpool,test_loader)
    t_end=time.time()
    print("Epoch %d"%i)
    print("Train MAE:%f    Val MAE:%f    Test MAE:%f     Time:%f s"%(train_mae,val_mae,test_mae,t_end-t_start))
    
    train_curve.append(train_mae)
    val_curve.append(val_mae)
    test_curve.append(test_mae)

Epoch 1
Train MAE:0.019519    Val MAE:0.016373    Test MAE:0.016314     Time:14.797408 s
Epoch 2
Train MAE:0.016010    Val MAE:0.017225    Test MAE:0.017231     Time:14.827670 s
Epoch 3
Train MAE:0.014842    Val MAE:0.015077    Test MAE:0.015091     Time:14.763676 s
Epoch 4
Train MAE:0.013773    Val MAE:0.014323    Test MAE:0.014366     Time:14.546332 s
Epoch 5
Train MAE:0.012824    Val MAE:0.012369    Test MAE:0.012437     Time:14.905575 s
Epoch 6
Train MAE:0.012430    Val MAE:0.011662    Test MAE:0.011758     Time:14.734825 s
Epoch 7
Train MAE:0.011939    Val MAE:0.011637    Test MAE:0.011764     Time:14.500009 s
Epoch 8
Train MAE:0.011771    Val MAE:0.012593    Test MAE:0.012643     Time:14.654719 s
Epoch 9
Train MAE:0.011471    Val MAE:0.011108    Test MAE:0.011232     Time:14.561376 s
Epoch 10
Train MAE:0.011291    Val MAE:0.011694    Test MAE:0.011811     Time:14.746042 s
Epoch 11
Train MAE:0.011258    Val MAE:0.011246    Test MAE:0.011298     Time:14.603122 s
Epoch 12
Train MAE:

In [27]:
epochs=50
optimizer=torch.optim.Adam(params=diffpool.parameters(), lr=0.0005)
train_loader = DataLoader(QM9_train, batch_size=32, shuffle=True)
for i in range(1,epochs+1):
    t_start=time.time()
    diffpool,train_mae = train(diffpool, train_loader, optimizer, loss_fn)
    val_mae=test(diffpool,val_loader)
    test_mae=test(diffpool,test_loader)
    t_end=time.time()
    print("Epoch %d"%i)
    print("Train MAE:%f    Val MAE:%f    Test MAE:%f     Time:%f s"%(train_mae,val_mae,test_mae,t_end-t_start))
    
    train_curve.append(train_mae)
    val_curve.append(val_mae)
    test_curve.append(test_mae)

Epoch 1
Train MAE:0.009279    Val MAE:0.009110    Test MAE:0.009158     Time:36.182680 s
Epoch 2
Train MAE:0.009209    Val MAE:0.009237    Test MAE:0.009257     Time:34.176524 s
Epoch 3
Train MAE:0.009167    Val MAE:0.009030    Test MAE:0.009084     Time:33.397826 s
Epoch 4
Train MAE:0.009172    Val MAE:0.009148    Test MAE:0.009162     Time:33.767055 s
Epoch 5
Train MAE:0.009054    Val MAE:0.008797    Test MAE:0.008889     Time:36.471446 s
Epoch 6
Train MAE:0.009041    Val MAE:0.008991    Test MAE:0.008996     Time:34.438837 s
Epoch 7
Train MAE:0.008988    Val MAE:0.009556    Test MAE:0.009598     Time:33.469681 s
Epoch 8
Train MAE:0.008985    Val MAE:0.011193    Test MAE:0.011354     Time:33.669780 s
Epoch 9
Train MAE:0.008966    Val MAE:0.008746    Test MAE:0.008717     Time:36.522151 s
Epoch 10
Train MAE:0.008941    Val MAE:0.008956    Test MAE:0.009013     Time:34.098759 s
Epoch 11
Train MAE:0.008851    Val MAE:0.009463    Test MAE:0.009419     Time:33.532637 s
Epoch 12
Train MAE:

In [28]:
with open("./result/DIFF_QM9_3.csv","w") as fo:
    fo.write("epoch,train,val,test\n")
    for i in range(len(train_curve)):
        fo.write("%d,%f,%f,%f\n"%(i+1,train_curve[i],val_curve[i],test_curve[i]))

# OPV

In [4]:
from torch_geometric.seed import seed_everything

max_nodes=122
OPV=load_dataset("./dataset/OPV/OPV.csv","smile","homo","lumo")

100%|███████████████████████████████████████████████████████████████████████████| 90823/90823 [02:05<00:00, 721.42it/s]


### Exp 1

In [5]:
seed_everything(1222)
OPV_train,OPV_val,OPV_test=split_list(OPV,(0.8,0.1,0.1))

In [6]:
from torch_geometric.loader import DenseDataLoader,DataLoader

train_loader=DataLoader(OPV_train,batch_size=128, shuffle=True)
val_loader = DataLoader(OPV_val, batch_size=1024, shuffle=False)
test_loader = DataLoader(OPV_test, batch_size=1024, shuffle=False)
num_features=OPV_train[0].num_node_features

In [7]:
from torch.nn import MSELoss
import time

diffpool=DiffPool(num_features,num_nodes=30)
optimizer=torch.optim.Adam(params=diffpool.parameters(), lr=0.001)
loss_fn=MSELoss()

diffpool=diffpool.cuda()
loss_fn=loss_fn.cuda()

epochs=50
train_curve=[]
val_curve=[]
test_curve=[]
for i in range(1,epochs+1):
    t_start=time.time()
    diffpool,train_mae = train(diffpool, train_loader, optimizer, loss_fn)
    val_mae=test(diffpool,val_loader)
    test_mae=test(diffpool,test_loader)
    t_end=time.time()
    print("Epoch %d"%i)
    print("Train MAE:%f    Val MAE:%f    Test MAE:%f     Time:%f s"%(train_mae,val_mae,test_mae,t_end-t_start))
    
    train_curve.append(train_mae)
    val_curve.append(val_mae)
    test_curve.append(test_mae)

Epoch 1
Train MAE:0.561642    Val MAE:0.286996    Test MAE:0.288156     Time:12.145228 s
Epoch 2
Train MAE:0.275761    Val MAE:0.260087    Test MAE:0.259048     Time:12.548325 s
Epoch 3
Train MAE:0.245612    Val MAE:0.234242    Test MAE:0.233044     Time:13.046635 s
Epoch 4
Train MAE:0.229121    Val MAE:0.224023    Test MAE:0.221781     Time:13.293635 s
Epoch 5
Train MAE:0.222515    Val MAE:0.217101    Test MAE:0.215273     Time:13.300992 s
Epoch 6
Train MAE:0.216650    Val MAE:0.210829    Test MAE:0.208854     Time:14.225732 s
Epoch 7
Train MAE:0.211936    Val MAE:0.203942    Test MAE:0.201364     Time:14.994059 s
Epoch 8
Train MAE:0.209104    Val MAE:0.201797    Test MAE:0.200295     Time:13.871802 s
Epoch 9
Train MAE:0.206427    Val MAE:0.206001    Test MAE:0.204415     Time:14.278886 s
Epoch 10
Train MAE:0.199511    Val MAE:0.195341    Test MAE:0.193682     Time:14.581679 s
Epoch 11
Train MAE:0.195867    Val MAE:0.190640    Test MAE:0.187739     Time:14.630158 s
Epoch 12
Train MAE:

In [8]:
train_loader=DataLoader(OPV_train,batch_size=32, shuffle=True)
optimizer=torch.optim.Adam(params=diffpool.parameters(), lr=0.0005)
epochs=50
for i in range(1,epochs+1):
    t_start=time.time()
    diffpool,train_mae = train(diffpool, train_loader, optimizer, loss_fn)
    val_mae=test(diffpool,val_loader)
    test_mae=test(diffpool,test_loader)
    t_end=time.time()
    print("Epoch %d"%i)
    print("Train MAE:%f    Val MAE:%f    Test MAE:%f     Time:%f s"%(train_mae,val_mae,test_mae,t_end-t_start))
    
    train_curve.append(train_mae)
    val_curve.append(val_mae)
    test_curve.append(test_mae)

Epoch 1
Train MAE:0.156132    Val MAE:0.159048    Test MAE:0.156789     Time:35.307971 s
Epoch 2
Train MAE:0.156319    Val MAE:0.203168    Test MAE:0.202529     Time:36.647931 s
Epoch 3
Train MAE:0.154858    Val MAE:0.162606    Test MAE:0.161188     Time:36.513839 s
Epoch 4
Train MAE:0.154634    Val MAE:0.149092    Test MAE:0.146278     Time:38.580723 s
Epoch 5
Train MAE:0.153544    Val MAE:0.146196    Test MAE:0.144155     Time:39.681599 s
Epoch 6
Train MAE:0.153575    Val MAE:0.147840    Test MAE:0.145802     Time:37.568303 s
Epoch 7
Train MAE:0.152136    Val MAE:0.146167    Test MAE:0.143700     Time:39.901852 s
Epoch 8
Train MAE:0.152017    Val MAE:0.156364    Test MAE:0.154919     Time:38.697060 s
Epoch 9
Train MAE:0.151107    Val MAE:0.148168    Test MAE:0.146600     Time:42.020453 s
Epoch 10
Train MAE:0.150597    Val MAE:0.145460    Test MAE:0.144012     Time:39.251281 s
Epoch 11
Train MAE:0.150362    Val MAE:0.145809    Test MAE:0.143795     Time:38.964707 s
Epoch 12
Train MAE:

In [9]:
with open("./result/DIFF_OPV_1.csv","w") as fo:
    fo.write("epoch,train,val,test\n")
    for i in range(len(train_curve)):
        fo.write("%d,%f,%f,%f\n"%(i+1,train_curve[i],val_curve[i],test_curve[i]))

### Exp2

In [11]:
seed_everything(324)
OPV_train,OPV_val,OPV_test=split_list(OPV,(0.8,0.1,0.1))

In [12]:
from torch_geometric.loader import DenseDataLoader,DataLoader

train_loader=DataLoader(OPV_train,batch_size=128, shuffle=True)
val_loader = DataLoader(OPV_val, batch_size=1024, shuffle=False)
test_loader = DataLoader(OPV_test, batch_size=1024, shuffle=False)
num_features=OPV_train[0].num_node_features

In [13]:
from torch.nn import MSELoss
import time

diffpool=DiffPool(num_features,num_nodes=30)
optimizer=torch.optim.Adam(params=diffpool.parameters(), lr=0.001)
loss_fn=MSELoss()

diffpool=diffpool.cuda()
loss_fn=loss_fn.cuda()

epochs=50
train_curve=[]
val_curve=[]
test_curve=[]
for i in range(1,epochs+1):
    t_start=time.time()
    diffpool,train_mae = train(diffpool, train_loader, optimizer, loss_fn)
    val_mae=test(diffpool,val_loader)
    test_mae=test(diffpool,test_loader)
    t_end=time.time()
    print("Epoch %d"%i)
    print("Train MAE:%f    Val MAE:%f    Test MAE:%f     Time:%f s"%(train_mae,val_mae,test_mae,t_end-t_start))
    
    train_curve.append(train_mae)
    val_curve.append(val_mae)
    test_curve.append(test_mae)

Epoch 1
Train MAE:0.691866    Val MAE:0.310651    Test MAE:0.306837     Time:12.669918 s
Epoch 2
Train MAE:0.300473    Val MAE:0.308800    Test MAE:0.305713     Time:12.999642 s
Epoch 3
Train MAE:0.293691    Val MAE:0.288932    Test MAE:0.287129     Time:13.461200 s
Epoch 4
Train MAE:0.287530    Val MAE:0.281904    Test MAE:0.282469     Time:13.925685 s
Epoch 5
Train MAE:0.275798    Val MAE:0.265980    Test MAE:0.267221     Time:14.244412 s
Epoch 6
Train MAE:0.250116    Val MAE:0.236978    Test MAE:0.238651     Time:14.254255 s
Epoch 7
Train MAE:0.234936    Val MAE:0.246464    Test MAE:0.249010     Time:14.768749 s
Epoch 8
Train MAE:0.225294    Val MAE:0.222559    Test MAE:0.224272     Time:14.776127 s
Epoch 9
Train MAE:0.219590    Val MAE:0.208398    Test MAE:0.209370     Time:14.710483 s
Epoch 10
Train MAE:0.212912    Val MAE:0.224103    Test MAE:0.224276     Time:15.035003 s
Epoch 11
Train MAE:0.211309    Val MAE:0.214227    Test MAE:0.213988     Time:15.867059 s
Epoch 12
Train MAE:

In [14]:
train_loader=DataLoader(OPV_train,batch_size=32, shuffle=True)
optimizer=torch.optim.Adam(params=diffpool.parameters(), lr=0.0005)
epochs=50
for i in range(1,epochs+1):
    t_start=time.time()
    diffpool,train_mae = train(diffpool, train_loader, optimizer, loss_fn)
    val_mae=test(diffpool,val_loader)
    test_mae=test(diffpool,test_loader)
    t_end=time.time()
    print("Epoch %d"%i)
    print("Train MAE:%f    Val MAE:%f    Test MAE:%f     Time:%f s"%(train_mae,val_mae,test_mae,t_end-t_start))
    
    train_curve.append(train_mae)
    val_curve.append(val_mae)
    test_curve.append(test_mae)

Epoch 1
Train MAE:0.164559    Val MAE:0.173227    Test MAE:0.172923     Time:38.333716 s
Epoch 2
Train MAE:0.163461    Val MAE:0.159884    Test MAE:0.159415     Time:37.282444 s
Epoch 3
Train MAE:0.162690    Val MAE:0.171963    Test MAE:0.171034     Time:37.586907 s
Epoch 4
Train MAE:0.160934    Val MAE:0.158010    Test MAE:0.157462     Time:37.243931 s
Epoch 5
Train MAE:0.160546    Val MAE:0.173043    Test MAE:0.171946     Time:37.467637 s
Epoch 6
Train MAE:0.160046    Val MAE:0.161135    Test MAE:0.159806     Time:37.125564 s
Epoch 7
Train MAE:0.160006    Val MAE:0.154708    Test MAE:0.153727     Time:36.664322 s
Epoch 8
Train MAE:0.158298    Val MAE:0.161585    Test MAE:0.160698     Time:37.314878 s
Epoch 9
Train MAE:0.158092    Val MAE:0.174135    Test MAE:0.173690     Time:37.427814 s
Epoch 10
Train MAE:0.157578    Val MAE:0.152306    Test MAE:0.151207     Time:37.498954 s
Epoch 11
Train MAE:0.157108    Val MAE:0.151924    Test MAE:0.150838     Time:37.041483 s
Epoch 12
Train MAE:

In [15]:
with open("./result/DIFF_OPV_2.csv","w") as fo:
    fo.write("epoch,train,val,test\n")
    for i in range(len(train_curve)):
        fo.write("%d,%f,%f,%f\n"%(i+1,train_curve[i],val_curve[i],test_curve[i]))

### Exp3

In [5]:
seed_everything(10068)
OPV_train,OPV_val,OPV_test=split_list(OPV,(0.8,0.1,0.1))

In [6]:
from torch_geometric.loader import DenseDataLoader,DataLoader

train_loader=DataLoader(OPV_train,batch_size=128, shuffle=True)
val_loader = DataLoader(OPV_val, batch_size=1024, shuffle=False)
test_loader = DataLoader(OPV_test, batch_size=1024, shuffle=False)
num_features=OPV_train[0].num_node_features

In [7]:
from torch.nn import MSELoss
import time

diffpool=DiffPool(num_features,num_nodes=30)
optimizer=torch.optim.Adam(params=diffpool.parameters(), lr=0.001)
loss_fn=MSELoss()

diffpool=diffpool.cuda()
loss_fn=loss_fn.cuda()

epochs=50
train_curve=[]
val_curve=[]
test_curve=[]
for i in range(1,epochs+1):
    t_start=time.time()
    diffpool,train_mae = train(diffpool, train_loader, optimizer, loss_fn)
    val_mae=test(diffpool,val_loader)
    test_mae=test(diffpool,test_loader)
    t_end=time.time()
    print("Epoch %d"%i)
    print("Train MAE:%f    Val MAE:%f    Test MAE:%f     Time:%f s"%(train_mae,val_mae,test_mae,t_end-t_start))
    
    train_curve.append(train_mae)
    val_curve.append(val_mae)
    test_curve.append(test_mae)

Epoch 1
Train MAE:0.592505    Val MAE:0.294461    Test MAE:0.293699     Time:11.263473 s
Epoch 2
Train MAE:0.287370    Val MAE:0.273597    Test MAE:0.273819     Time:11.764177 s
Epoch 3
Train MAE:0.271121    Val MAE:0.258377    Test MAE:0.259417     Time:12.042249 s
Epoch 4
Train MAE:0.259242    Val MAE:0.250881    Test MAE:0.252133     Time:11.757219 s
Epoch 5
Train MAE:0.254201    Val MAE:0.248703    Test MAE:0.249377     Time:12.081896 s
Epoch 6
Train MAE:0.241740    Val MAE:0.231490    Test MAE:0.232392     Time:12.397175 s
Epoch 7
Train MAE:0.230682    Val MAE:0.220669    Test MAE:0.221407     Time:12.595804 s
Epoch 8
Train MAE:0.220275    Val MAE:0.213086    Test MAE:0.214012     Time:12.884266 s
Epoch 9
Train MAE:0.216207    Val MAE:0.228963    Test MAE:0.230330     Time:12.908219 s
Epoch 10
Train MAE:0.213869    Val MAE:0.216919    Test MAE:0.218067     Time:13.333025 s
Epoch 11
Train MAE:0.209940    Val MAE:0.207561    Test MAE:0.207854     Time:13.374297 s
Epoch 12
Train MAE:

In [8]:
train_loader=DataLoader(OPV_train,batch_size=32, shuffle=True)
optimizer=torch.optim.Adam(params=diffpool.parameters(), lr=0.0005)
epochs=50
for i in range(1,epochs+1):
    t_start=time.time()
    diffpool,train_mae = train(diffpool, train_loader, optimizer, loss_fn)
    val_mae=test(diffpool,val_loader)
    test_mae=test(diffpool,test_loader)
    t_end=time.time()
    print("Epoch %d"%i)
    print("Train MAE:%f    Val MAE:%f    Test MAE:%f     Time:%f s"%(train_mae,val_mae,test_mae,t_end-t_start))
    
    train_curve.append(train_mae)
    val_curve.append(val_mae)
    test_curve.append(test_mae)

Epoch 1
Train MAE:0.171824    Val MAE:0.167247    Test MAE:0.166558     Time:35.817431 s
Epoch 2
Train MAE:0.171199    Val MAE:0.165021    Test MAE:0.164752     Time:35.291419 s
Epoch 3
Train MAE:0.170794    Val MAE:0.163554    Test MAE:0.163574     Time:35.012333 s
Epoch 4
Train MAE:0.169550    Val MAE:0.165752    Test MAE:0.164930     Time:33.512813 s
Epoch 5
Train MAE:0.168629    Val MAE:0.163616    Test MAE:0.164026     Time:33.551322 s
Epoch 6
Train MAE:0.167865    Val MAE:0.169995    Test MAE:0.170590     Time:33.815075 s
Epoch 7
Train MAE:0.167572    Val MAE:0.165039    Test MAE:0.164134     Time:33.939149 s
Epoch 8
Train MAE:0.167552    Val MAE:0.164610    Test MAE:0.163254     Time:35.584588 s
Epoch 9
Train MAE:0.165740    Val MAE:0.160390    Test MAE:0.160343     Time:37.325290 s
Epoch 10
Train MAE:0.164947    Val MAE:0.156467    Test MAE:0.156143     Time:35.748142 s
Epoch 11
Train MAE:0.164791    Val MAE:0.161389    Test MAE:0.161092     Time:37.840164 s
Epoch 12
Train MAE:

In [9]:
with open("./result/DIFF_OPV_3.csv","w") as fo:
    fo.write("epoch,train,val,test\n")
    for i in range(len(train_curve)):
        fo.write("%d,%f,%f,%f\n"%(i+1,train_curve[i],val_curve[i],test_curve[i]))

In [11]:
from torch_geometric.data import Batch

smiles_list=["Cc1c(C)c2c(F)c(-c3nc4c(C)c5c(ncn5C)c(C)c4n3C)sc2c2scc(F)c12",
             "COC(=O)N1C(F)(F)c2cccc(-c3ccc(-c4ccc5c6ccc(-c7cccs7)cc6n(C)c5c4)s3)c2C1(F)F"]

def get_S(smile_list,model):
    data_list=[]
    for smile in smiles_list:
        data_list.append(from_smiles(smile))
    batch=Batch.from_data_list(data_list)
    batch.x=to_dense_batch(batch.x,batch=batch.batch,max_num_nodes=max_nodes)[0].float()
    batch.adj=to_dense_adj(batch.edge_index,batch=batch.batch,max_num_nodes=max_nodes)
    batch=batch.cuda()

    model.eval()
    with torch.no_grad():
        S=model.gnn1_pool(batch.x,batch.adj)
    return S.cpu()

S=get_S(smiles_list,diffpool).numpy()
np.save("./saved_models/DIFF_OPV_S.npy",S)