In [1]:
import torch
from torch_geometric.utils import from_smiles
import random
import pandas as pd
import numpy as np
from tqdm import tqdm


def split_list(input_list, ratio):
    input_list_copy = input_list.copy()
    total_length = len(input_list_copy)
    a_length = int(ratio[0] * total_length)
    b_length = int(ratio[1] * total_length)
    
    #random.seed(random_state)
    random.shuffle(input_list_copy)
    part_a = input_list_copy[:a_length]
    part_b = input_list_copy[a_length:a_length + b_length]
    part_c = input_list_copy[a_length + b_length:]

    return part_a, part_b, part_c

def load_dataset(filename,x_name,y_name1,y_name2):
    table=pd.read_csv(filename)
    result=[]
    for i in tqdm(range(table.shape[0])):
        cur_data=from_smiles(table[x_name][i])
        cur_data.y=torch.tensor([[table[y_name1][i],table[y_name2][i]]],dtype=torch.float32)
        result.append(cur_data)
    return result

def load_splited_dataset(filename,x_name,y_name1,y_name2,ratio=(0.8,0.1,0.1)):
    all_list=load_dataset(filename,x_name,y_name1,y_name2)
    return split_list(all_list,ratio)

In [2]:
from torch.nn import Linear, ReLU
from torch_geometric.nn import global_add_pool, global_mean_pool, global_max_pool,GCNConv

class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels=32, out_channels=2,pool="add"):
        super(GCN, self).__init__()
        self.convs = torch.nn.ModuleList()
        self.convs.append(GCNConv(in_channels, hidden_channels))
        self.convs.append(GCNConv(hidden_channels, hidden_channels))
        self.convs.append(GCNConv(hidden_channels, hidden_channels))
        self.convs.append(GCNConv(hidden_channels, hidden_channels))
        
        self.lin1=Linear(hidden_channels, hidden_channels)
        self.lin2=Linear(hidden_channels, out_channels)
        
        if pool=="mean":
            self.pool=global_mean_pool
        elif pool=="add":
            self.pool=global_add_pool
        else:
            self.pool=global_max_pool
        
        self.act=ReLU()
        
    def forward(self, x, edge_index, batch):
        for step in range(len(self.convs)):
            x = self.act(self.convs[step](x, edge_index))
       
        x = self.pool(x,batch)
        
        x = self.act(self.lin1(x))
        x = self.lin2(x)
        
        return x

In [3]:
def train(model, loader, optimizer, loss_func):

    model.train()
    mae=0
    for data in loader:
        #data.y=data.y.reshape(-1,1)
        data.x=data.x.float()
        data=data.cuda()
        optimizer.zero_grad()
        pred=model(data.x, data.edge_index, data.batch)
        mae+=torch.sum(torch.abs(pred-data.y)).item()/2
        loss=loss_func(pred,data.y)
        loss.backward()
        optimizer.step()

    return model,mae/len(loader.dataset)

def test(model, loader):
    model.eval()
    mae=0
    with torch.no_grad():
        for data in loader:
            #data.y=data.y.reshape(-1,1)
            data.x=data.x.float()
            data=data.cuda()
            out = model(data.x, data.edge_index, data.batch)  
            mae+=torch.sum(torch.abs(out-data.y)).item()/2
        
    return mae / len(loader.dataset)

# QM9

In [4]:
from torch_geometric.seed import seed_everything

max_nodes=9
QM9=load_dataset("./dataset/QM9/QM9.csv","smiles","homo","lumo")

100%|████████████████████████████████████████████████████████████████████████| 133247/133247 [01:05<00:00, 2026.68it/s]


### Exp 1

In [5]:
seed_everything(1222)
QM9_train,QM9_val,QM9_test=split_list(QM9,(0.8,0.1,0.1))

In [6]:
from torch_geometric.loader import DataLoader
from torch_geometric.data import Batch



train_loader = DataLoader(QM9_train, batch_size=128, shuffle=True)
val_loader = DataLoader(QM9_val, batch_size=1024, shuffle=False)
test_loader = DataLoader(QM9_test, batch_size=1024, shuffle=False)
num_features=QM9_train[0].num_node_features

In [7]:
from torch.nn import MSELoss
import time

gcn=GCN(num_features,pool="mean")
optimizer = torch.optim.Adam(params=gcn.parameters(), lr=0.001)
loss_fn=MSELoss()

gcn=gcn.cuda()
loss_fn=loss_fn.cuda()

epochs=50
train_curve=[]
val_curve=[]
test_curve=[]
for i in range(1,epochs+1):
    t_start=time.time()
    gcn,train_mae = train(gcn, train_loader, optimizer, loss_fn)
    val_mae=test(gcn,val_loader)
    test_mae=test(gcn,test_loader)
    t_end=time.time()
    print("Epoch %d"%i)
    print("Train MAE:%f    Val MAE:%f    Test MAE:%f     Time:%f s"%(train_mae,val_mae,test_mae,t_end-t_start))
    
    train_curve.append(train_mae)
    val_curve.append(val_mae)
    test_curve.append(test_mae)

Epoch 1
Train MAE:0.021878    Val MAE:0.019322    Test MAE:0.019441     Time:10.718618 s
Epoch 2
Train MAE:0.017876    Val MAE:0.016604    Test MAE:0.016770     Time:10.514676 s
Epoch 3
Train MAE:0.016340    Val MAE:0.015745    Test MAE:0.015911     Time:10.843867 s
Epoch 4
Train MAE:0.015416    Val MAE:0.014804    Test MAE:0.014944     Time:10.958769 s
Epoch 5
Train MAE:0.014600    Val MAE:0.015915    Test MAE:0.016053     Time:10.774030 s
Epoch 6
Train MAE:0.013889    Val MAE:0.013557    Test MAE:0.013656     Time:11.143261 s
Epoch 7
Train MAE:0.013475    Val MAE:0.012940    Test MAE:0.012999     Time:11.378840 s
Epoch 8
Train MAE:0.012975    Val MAE:0.013697    Test MAE:0.013748     Time:11.278141 s
Epoch 9
Train MAE:0.012648    Val MAE:0.013367    Test MAE:0.013383     Time:11.342589 s
Epoch 10
Train MAE:0.012212    Val MAE:0.012539    Test MAE:0.012540     Time:10.755606 s
Epoch 11
Train MAE:0.011805    Val MAE:0.011754    Test MAE:0.011712     Time:10.634016 s
Epoch 12
Train MAE:

In [8]:
epochs=50
optimizer=torch.optim.Adam(params=gcn.parameters(), lr=0.0005)
train_loader = DataLoader(QM9_train, batch_size=32, shuffle=True)
for i in range(1,epochs+1):
    t_start=time.time()
    gcn,train_mae = train(gcn, train_loader, optimizer, loss_fn)
    val_mae=test(gcn,val_loader)
    test_mae=test(gcn,test_loader)
    t_end=time.time()
    print("Epoch %d"%i)
    print("Train MAE:%f    Val MAE:%f    Test MAE:%f     Time:%f s"%(train_mae,val_mae,test_mae,t_end-t_start))
    
    train_curve.append(train_mae)
    val_curve.append(val_mae)
    test_curve.append(test_mae)

Epoch 1
Train MAE:0.009298    Val MAE:0.008814    Test MAE:0.008879     Time:29.248197 s
Epoch 2
Train MAE:0.009305    Val MAE:0.008974    Test MAE:0.009023     Time:28.217042 s
Epoch 3
Train MAE:0.009226    Val MAE:0.009334    Test MAE:0.009356     Time:32.587508 s
Epoch 4
Train MAE:0.009204    Val MAE:0.008933    Test MAE:0.008994     Time:29.801223 s
Epoch 5
Train MAE:0.009163    Val MAE:0.009533    Test MAE:0.009564     Time:30.150695 s
Epoch 6
Train MAE:0.009117    Val MAE:0.010174    Test MAE:0.010141     Time:28.576640 s
Epoch 7
Train MAE:0.009092    Val MAE:0.008949    Test MAE:0.008975     Time:28.574719 s
Epoch 8
Train MAE:0.009111    Val MAE:0.009324    Test MAE:0.009332     Time:28.466078 s
Epoch 9
Train MAE:0.009090    Val MAE:0.008993    Test MAE:0.009063     Time:28.483323 s
Epoch 10
Train MAE:0.009051    Val MAE:0.008843    Test MAE:0.008888     Time:29.561930 s
Epoch 11
Train MAE:0.008981    Val MAE:0.009327    Test MAE:0.009309     Time:29.306342 s
Epoch 12
Train MAE:

In [9]:
with open("./result/mean_QM9_1.csv","w") as fo:
    fo.write("epoch,train,val,test\n")
    for i in range(len(train_curve)):
        fo.write("%d,%f,%f,%f\n"%(i+1,train_curve[i],val_curve[i],test_curve[i]))

### Exp 2

In [10]:
seed_everything(324)
QM9_train,QM9_val,QM9_test=split_list(QM9,(0.8,0.1,0.1))

In [11]:
from torch_geometric.loader import DataLoader
from torch_geometric.data import Batch



train_loader = DataLoader(QM9_train, batch_size=128, shuffle=True)
val_loader = DataLoader(QM9_val, batch_size=1024, shuffle=False)
test_loader = DataLoader(QM9_test, batch_size=1024, shuffle=False)
num_features=QM9_train[0].num_node_features

In [12]:
from torch.nn import MSELoss
import time

gcn=GCN(num_features,pool="mean")
optimizer = torch.optim.Adam(params=gcn.parameters(), lr=0.001)
loss_fn=MSELoss()

gcn=gcn.cuda()
loss_fn=loss_fn.cuda()

epochs=50
train_curve=[]
val_curve=[]
test_curve=[]
for i in range(1,epochs+1):
    t_start=time.time()
    gcn,train_mae = train(gcn, train_loader, optimizer, loss_fn)
    val_mae=test(gcn,val_loader)
    test_mae=test(gcn,test_loader)
    t_end=time.time()
    print("Epoch %d"%i)
    print("Train MAE:%f    Val MAE:%f    Test MAE:%f     Time:%f s"%(train_mae,val_mae,test_mae,t_end-t_start))
    
    train_curve.append(train_mae)
    val_curve.append(val_mae)
    test_curve.append(test_mae)

Epoch 1
Train MAE:0.019855    Val MAE:0.016577    Test MAE:0.016499     Time:11.201969 s
Epoch 2
Train MAE:0.015705    Val MAE:0.015731    Test MAE:0.015682     Time:11.174039 s
Epoch 3
Train MAE:0.014466    Val MAE:0.013792    Test MAE:0.013742     Time:11.391923 s
Epoch 4
Train MAE:0.013582    Val MAE:0.013081    Test MAE:0.012989     Time:11.297103 s
Epoch 5
Train MAE:0.012629    Val MAE:0.011708    Test MAE:0.011635     Time:11.032203 s
Epoch 6
Train MAE:0.011852    Val MAE:0.011391    Test MAE:0.011414     Time:10.970572 s
Epoch 7
Train MAE:0.011286    Val MAE:0.010986    Test MAE:0.010988     Time:11.110813 s
Epoch 8
Train MAE:0.011049    Val MAE:0.010471    Test MAE:0.010468     Time:11.095203 s
Epoch 9
Train MAE:0.010837    Val MAE:0.010624    Test MAE:0.010636     Time:11.174900 s
Epoch 10
Train MAE:0.010693    Val MAE:0.011011    Test MAE:0.010995     Time:10.863428 s
Epoch 11
Train MAE:0.010494    Val MAE:0.009999    Test MAE:0.009896     Time:10.781943 s
Epoch 12
Train MAE:

In [13]:
epochs=50
optimizer=torch.optim.Adam(params=gcn.parameters(), lr=0.0005)
train_loader = DataLoader(QM9_train, batch_size=32, shuffle=True)
for i in range(1,epochs+1):
    t_start=time.time()
    gcn,train_mae = train(gcn, train_loader, optimizer, loss_fn)
    val_mae=test(gcn,val_loader)
    test_mae=test(gcn,test_loader)
    t_end=time.time()
    print("Epoch %d"%i)
    print("Train MAE:%f    Val MAE:%f    Test MAE:%f     Time:%f s"%(train_mae,val_mae,test_mae,t_end-t_start))
    
    train_curve.append(train_mae)
    val_curve.append(val_mae)
    test_curve.append(test_mae)

Epoch 1
Train MAE:0.008529    Val MAE:0.008425    Test MAE:0.008393     Time:28.944042 s
Epoch 2
Train MAE:0.008872    Val MAE:0.008790    Test MAE:0.008711     Time:30.949699 s
Epoch 3
Train MAE:0.008582    Val MAE:0.008569    Test MAE:0.008474     Time:29.112366 s
Epoch 4
Train MAE:0.008492    Val MAE:0.008324    Test MAE:0.008237     Time:28.417929 s
Epoch 5
Train MAE:0.008434    Val MAE:0.008983    Test MAE:0.008903     Time:28.572825 s
Epoch 6
Train MAE:0.008460    Val MAE:0.008069    Test MAE:0.008044     Time:28.538372 s
Epoch 7
Train MAE:0.008361    Val MAE:0.009262    Test MAE:0.009236     Time:28.393016 s
Epoch 8
Train MAE:0.008319    Val MAE:0.009236    Test MAE:0.009178     Time:28.402221 s
Epoch 9
Train MAE:0.008371    Val MAE:0.008439    Test MAE:0.008349     Time:28.614672 s
Epoch 10
Train MAE:0.008309    Val MAE:0.008592    Test MAE:0.008530     Time:29.503442 s
Epoch 11
Train MAE:0.008282    Val MAE:0.008343    Test MAE:0.008239     Time:31.104370 s
Epoch 12
Train MAE:

In [14]:
with open("./result/mean_QM9_2.csv","w") as fo:
    fo.write("epoch,train,val,test\n")
    for i in range(len(train_curve)):
        fo.write("%d,%f,%f,%f\n"%(i+1,train_curve[i],val_curve[i],test_curve[i]))

### Exp 3

In [15]:
seed_everything(10086)
QM9_train,QM9_val,QM9_test=split_list(QM9,(0.8,0.1,0.1))

In [16]:
from torch_geometric.loader import DataLoader
from torch_geometric.data import Batch



train_loader = DataLoader(QM9_train, batch_size=128, shuffle=True)
val_loader = DataLoader(QM9_val, batch_size=1024, shuffle=False)
test_loader = DataLoader(QM9_test, batch_size=1024, shuffle=False)
num_features=QM9_train[0].num_node_features

In [17]:
from torch.nn import MSELoss
import time

gcn=GCN(num_features,pool="mean")
optimizer = torch.optim.Adam(params=gcn.parameters(), lr=0.001)
loss_fn=MSELoss()

gcn=gcn.cuda()
loss_fn=loss_fn.cuda()

epochs=50
train_curve=[]
val_curve=[]
test_curve=[]
for i in range(1,epochs+1):
    t_start=time.time()
    gcn,train_mae = train(gcn, train_loader, optimizer, loss_fn)
    val_mae=test(gcn,val_loader)
    test_mae=test(gcn,test_loader)
    t_end=time.time()
    print("Epoch %d"%i)
    print("Train MAE:%f    Val MAE:%f    Test MAE:%f     Time:%f s"%(train_mae,val_mae,test_mae,t_end-t_start))
    
    train_curve.append(train_mae)
    val_curve.append(val_mae)
    test_curve.append(test_mae)

Epoch 1
Train MAE:0.022220    Val MAE:0.018448    Test MAE:0.018484     Time:10.789605 s
Epoch 2
Train MAE:0.017366    Val MAE:0.016668    Test MAE:0.016694     Time:10.688173 s
Epoch 3
Train MAE:0.015954    Val MAE:0.015818    Test MAE:0.015858     Time:10.750861 s
Epoch 4
Train MAE:0.015119    Val MAE:0.015028    Test MAE:0.015103     Time:10.673245 s
Epoch 5
Train MAE:0.014380    Val MAE:0.016839    Test MAE:0.016952     Time:10.563678 s
Epoch 6
Train MAE:0.013698    Val MAE:0.012850    Test MAE:0.012903     Time:10.656258 s
Epoch 7
Train MAE:0.013066    Val MAE:0.012670    Test MAE:0.012854     Time:10.564569 s
Epoch 8
Train MAE:0.012524    Val MAE:0.012044    Test MAE:0.012201     Time:10.625791 s
Epoch 9
Train MAE:0.011975    Val MAE:0.011329    Test MAE:0.011450     Time:10.580192 s
Epoch 10
Train MAE:0.011640    Val MAE:0.011220    Test MAE:0.011310     Time:10.844235 s
Epoch 11
Train MAE:0.011409    Val MAE:0.010781    Test MAE:0.010852     Time:11.190645 s
Epoch 12
Train MAE:

In [18]:
epochs=50
optimizer=torch.optim.Adam(params=gcn.parameters(), lr=0.0005)
train_loader = DataLoader(QM9_train, batch_size=32, shuffle=True)
for i in range(1,epochs+1):
    t_start=time.time()
    gcn,train_mae = train(gcn, train_loader, optimizer, loss_fn)
    val_mae=test(gcn,val_loader)
    test_mae=test(gcn,test_loader)
    t_end=time.time()
    print("Epoch %d"%i)
    print("Train MAE:%f    Val MAE:%f    Test MAE:%f     Time:%f s"%(train_mae,val_mae,test_mae,t_end-t_start))
    
    train_curve.append(train_mae)
    val_curve.append(val_mae)
    test_curve.append(test_mae)

Epoch 1
Train MAE:0.009488    Val MAE:0.009011    Test MAE:0.009081     Time:29.502970 s
Epoch 2
Train MAE:0.009478    Val MAE:0.009812    Test MAE:0.009843     Time:29.865599 s
Epoch 3
Train MAE:0.009434    Val MAE:0.009250    Test MAE:0.009335     Time:28.380757 s
Epoch 4
Train MAE:0.009409    Val MAE:0.011135    Test MAE:0.011255     Time:28.193394 s
Epoch 5
Train MAE:0.009394    Val MAE:0.010229    Test MAE:0.010347     Time:28.238023 s
Epoch 6
Train MAE:0.009378    Val MAE:0.008993    Test MAE:0.009092     Time:28.362079 s
Epoch 7
Train MAE:0.009296    Val MAE:0.009129    Test MAE:0.009203     Time:28.273390 s
Epoch 8
Train MAE:0.009248    Val MAE:0.009252    Test MAE:0.009283     Time:28.366663 s
Epoch 9
Train MAE:0.009202    Val MAE:0.008952    Test MAE:0.008966     Time:28.849077 s
Epoch 10
Train MAE:0.009200    Val MAE:0.009527    Test MAE:0.009565     Time:30.084338 s
Epoch 11
Train MAE:0.009154    Val MAE:0.009493    Test MAE:0.009500     Time:29.993149 s
Epoch 12
Train MAE:

In [19]:
with open("./result/mean_QM9_3.csv","w") as fo:
    fo.write("epoch,train,val,test\n")
    for i in range(len(train_curve)):
        fo.write("%d,%f,%f,%f\n"%(i+1,train_curve[i],val_curve[i],test_curve[i]))

# OPV

In [20]:
from torch_geometric.seed import seed_everything


max_nodes=122
OPV=load_dataset("./dataset/OPV/OPV.csv","smile","homo","lumo")

100%|███████████████████████████████████████████████████████████████████████████| 90823/90823 [02:14<00:00, 674.42it/s]


### Exp 1

In [21]:
seed_everything(1222)
OPV_train,OPV_val,OPV_test=split_list(OPV,(0.8,0.1,0.1))

In [22]:
from torch_geometric.loader import DataLoader
from torch_geometric.data import Batch



train_loader = DataLoader(OPV_train, batch_size=128, shuffle=True)
val_loader = DataLoader(OPV_val, batch_size=1024, shuffle=False)
test_loader = DataLoader(OPV_test, batch_size=1024, shuffle=False)
num_features=OPV_train[0].num_node_features

In [23]:
from torch.nn import MSELoss
import time

gcn=GCN(num_features,pool="mean")
optimizer = torch.optim.Adam(params=gcn.parameters(), lr=0.001)
loss_fn=MSELoss()

gcn=gcn.cuda()
loss_fn=loss_fn.cuda()

epochs=50
train_curve=[]
val_curve=[]
test_curve=[]
for i in range(1,epochs+1):
    t_start=time.time()
    gcn,train_mae = train(gcn, train_loader, optimizer, loss_fn)
    val_mae=test(gcn,val_loader)
    test_mae=test(gcn,test_loader)
    t_end=time.time()
    print("Epoch %d"%i)
    print("Train MAE:%f    Val MAE:%f    Test MAE:%f     Time:%f s"%(train_mae,val_mae,test_mae,t_end-t_start))
    
    train_curve.append(train_mae)
    val_curve.append(val_mae)
    test_curve.append(test_mae)

Epoch 1
Train MAE:0.617878    Val MAE:0.410650    Test MAE:0.414293     Time:9.014637 s
Epoch 2
Train MAE:0.397856    Val MAE:0.372104    Test MAE:0.374599     Time:8.543613 s
Epoch 3
Train MAE:0.344203    Val MAE:0.336777    Test MAE:0.337743     Time:9.199456 s
Epoch 4
Train MAE:0.314391    Val MAE:0.299524    Test MAE:0.297646     Time:8.692860 s
Epoch 5
Train MAE:0.292180    Val MAE:0.281314    Test MAE:0.279441     Time:9.324214 s
Epoch 6
Train MAE:0.277244    Val MAE:0.279166    Test MAE:0.277726     Time:9.164810 s
Epoch 7
Train MAE:0.267320    Val MAE:0.270710    Test MAE:0.269814     Time:9.133550 s
Epoch 8
Train MAE:0.261749    Val MAE:0.256346    Test MAE:0.254779     Time:9.261894 s
Epoch 9
Train MAE:0.257879    Val MAE:0.254971    Test MAE:0.253137     Time:9.212464 s
Epoch 10
Train MAE:0.252071    Val MAE:0.248088    Test MAE:0.246633     Time:9.195877 s
Epoch 11
Train MAE:0.248755    Val MAE:0.247488    Test MAE:0.246380     Time:9.479865 s
Epoch 12
Train MAE:0.244508   

In [24]:
epochs=50
optimizer=torch.optim.Adam(params=gcn.parameters(), lr=0.0005)
train_loader = DataLoader(OPV_train, batch_size=32, shuffle=True)
for i in range(1,epochs+1):
    t_start=time.time()
    gcn,train_mae = train(gcn, train_loader, optimizer, loss_fn)
    val_mae=test(gcn,val_loader)
    test_mae=test(gcn,test_loader)
    t_end=time.time()
    print("Epoch %d"%i)
    print("Train MAE:%f    Val MAE:%f    Test MAE:%f     Time:%f s"%(train_mae,val_mae,test_mae,t_end-t_start))
    
    train_curve.append(train_mae)
    val_curve.append(val_mae)
    test_curve.append(test_mae)

Epoch 1
Train MAE:0.163403    Val MAE:0.154501    Test MAE:0.155283     Time:22.169941 s
Epoch 2
Train MAE:0.162046    Val MAE:0.159173    Test MAE:0.159379     Time:22.995635 s
Epoch 3
Train MAE:0.160329    Val MAE:0.154065    Test MAE:0.154956     Time:22.101156 s
Epoch 4
Train MAE:0.158993    Val MAE:0.171952    Test MAE:0.171043     Time:20.894102 s
Epoch 5
Train MAE:0.158320    Val MAE:0.153067    Test MAE:0.153720     Time:20.489645 s
Epoch 6
Train MAE:0.157851    Val MAE:0.156507    Test MAE:0.157219     Time:20.559094 s
Epoch 7
Train MAE:0.155752    Val MAE:0.166403    Test MAE:0.167572     Time:20.563125 s
Epoch 8
Train MAE:0.154680    Val MAE:0.154175    Test MAE:0.154656     Time:20.418101 s
Epoch 9
Train MAE:0.154003    Val MAE:0.149783    Test MAE:0.150087     Time:20.715394 s
Epoch 10
Train MAE:0.153362    Val MAE:0.149914    Test MAE:0.150393     Time:20.375087 s
Epoch 11
Train MAE:0.152497    Val MAE:0.146643    Test MAE:0.146679     Time:20.253697 s
Epoch 12
Train MAE:

In [25]:
with open("./result/mean_OPV_1.csv","w") as fo:
    fo.write("epoch,train,val,test\n")
    for i in range(len(train_curve)):
        fo.write("%d,%f,%f,%f\n"%(i+1,train_curve[i],val_curve[i],test_curve[i]))

### Exp 2

In [26]:
seed_everything(324)
OPV_train,OPV_val,OPV_test=split_list(OPV,(0.8,0.1,0.1))

In [27]:
from torch_geometric.loader import DataLoader
from torch_geometric.data import Batch



train_loader = DataLoader(OPV_train, batch_size=128, shuffle=True)
val_loader = DataLoader(OPV_val, batch_size=1024, shuffle=False)
test_loader = DataLoader(OPV_test, batch_size=1024, shuffle=False)
num_features=OPV_train[0].num_node_features

In [28]:
from torch.nn import MSELoss
import time

gcn=GCN(num_features,pool="mean")
optimizer = torch.optim.Adam(params=gcn.parameters(), lr=0.001)
loss_fn=MSELoss()

gcn=gcn.cuda()
loss_fn=loss_fn.cuda()

epochs=50
train_curve=[]
val_curve=[]
test_curve=[]
for i in range(1,epochs+1):
    t_start=time.time()
    gcn,train_mae = train(gcn, train_loader, optimizer, loss_fn)
    val_mae=test(gcn,val_loader)
    test_mae=test(gcn,test_loader)
    t_end=time.time()
    print("Epoch %d"%i)
    print("Train MAE:%f    Val MAE:%f    Test MAE:%f     Time:%f s"%(train_mae,val_mae,test_mae,t_end-t_start))
    
    train_curve.append(train_mae)
    val_curve.append(val_mae)
    test_curve.append(test_mae)

Epoch 1
Train MAE:0.606974    Val MAE:0.416865    Test MAE:0.413365     Time:9.759225 s
Epoch 2
Train MAE:0.363583    Val MAE:0.319418    Test MAE:0.317395     Time:10.006884 s
Epoch 3
Train MAE:0.309395    Val MAE:0.300886    Test MAE:0.298775     Time:10.103455 s
Epoch 4
Train MAE:0.298717    Val MAE:0.290470    Test MAE:0.288762     Time:10.117355 s
Epoch 5
Train MAE:0.287308    Val MAE:0.277729    Test MAE:0.276656     Time:10.248805 s
Epoch 6
Train MAE:0.272759    Val MAE:0.277667    Test MAE:0.276274     Time:10.622792 s
Epoch 7
Train MAE:0.266658    Val MAE:0.259311    Test MAE:0.258813     Time:10.495546 s
Epoch 8
Train MAE:0.259788    Val MAE:0.251881    Test MAE:0.251654     Time:10.164132 s
Epoch 9
Train MAE:0.253826    Val MAE:0.246668    Test MAE:0.246543     Time:10.133013 s
Epoch 10
Train MAE:0.248384    Val MAE:0.246662    Test MAE:0.245906     Time:10.126017 s
Epoch 11
Train MAE:0.245474    Val MAE:0.250229    Test MAE:0.250983     Time:10.071281 s
Epoch 12
Train MAE:0

In [29]:
epochs=50
optimizer=torch.optim.Adam(params=gcn.parameters(), lr=0.0005)
train_loader = DataLoader(OPV_train, batch_size=32, shuffle=True)
for i in range(1,epochs+1):
    t_start=time.time()
    gcn,train_mae = train(gcn, train_loader, optimizer, loss_fn)
    val_mae=test(gcn,val_loader)
    test_mae=test(gcn,test_loader)
    t_end=time.time()
    print("Epoch %d"%i)
    print("Train MAE:%f    Val MAE:%f    Test MAE:%f     Time:%f s"%(train_mae,val_mae,test_mae,t_end-t_start))
    
    train_curve.append(train_mae)
    val_curve.append(val_mae)
    test_curve.append(test_mae)

Epoch 1
Train MAE:0.168070    Val MAE:0.165658    Test MAE:0.164404     Time:21.861462 s
Epoch 2
Train MAE:0.166690    Val MAE:0.173971    Test MAE:0.172313     Time:20.684032 s
Epoch 3
Train MAE:0.165919    Val MAE:0.162230    Test MAE:0.160488     Time:20.677517 s
Epoch 4
Train MAE:0.165720    Val MAE:0.179587    Test MAE:0.176719     Time:20.953110 s
Epoch 5
Train MAE:0.165351    Val MAE:0.159769    Test MAE:0.156994     Time:20.530901 s
Epoch 6
Train MAE:0.164471    Val MAE:0.175614    Test MAE:0.172630     Time:20.684026 s
Epoch 7
Train MAE:0.163499    Val MAE:0.157197    Test MAE:0.154532     Time:20.715308 s
Epoch 8
Train MAE:0.162557    Val MAE:0.164678    Test MAE:0.161436     Time:20.959920 s
Epoch 9
Train MAE:0.162837    Val MAE:0.180111    Test MAE:0.177325     Time:21.558344 s
Epoch 10
Train MAE:0.162002    Val MAE:0.157133    Test MAE:0.154310     Time:20.337162 s
Epoch 11
Train MAE:0.161739    Val MAE:0.164092    Test MAE:0.161450     Time:22.660356 s
Epoch 12
Train MAE:

In [30]:
with open("./result/mean_OPV_2.csv","w") as fo:
    fo.write("epoch,train,val,test\n")
    for i in range(len(train_curve)):
        fo.write("%d,%f,%f,%f\n"%(i+1,train_curve[i],val_curve[i],test_curve[i]))

### Exp 3

In [31]:
seed_everything(10086)
OPV_train,OPV_val,OPV_test=split_list(OPV,(0.8,0.1,0.1))

In [32]:
from torch_geometric.loader import DataLoader
from torch_geometric.data import Batch



train_loader = DataLoader(OPV_train, batch_size=128, shuffle=True)
val_loader = DataLoader(OPV_val, batch_size=1024, shuffle=False)
test_loader = DataLoader(OPV_test, batch_size=1024, shuffle=False)
num_features=OPV_train[0].num_node_features

In [33]:
from torch.nn import MSELoss
import time

gcn=GCN(num_features,pool="mean")
optimizer = torch.optim.Adam(params=gcn.parameters(), lr=0.001)
loss_fn=MSELoss()

gcn=gcn.cuda()
loss_fn=loss_fn.cuda()

epochs=50
train_curve=[]
val_curve=[]
test_curve=[]
for i in range(1,epochs+1):
    t_start=time.time()
    gcn,train_mae = train(gcn, train_loader, optimizer, loss_fn)
    val_mae=test(gcn,val_loader)
    test_mae=test(gcn,test_loader)
    t_end=time.time()
    print("Epoch %d"%i)
    print("Train MAE:%f    Val MAE:%f    Test MAE:%f     Time:%f s"%(train_mae,val_mae,test_mae,t_end-t_start))
    
    train_curve.append(train_mae)
    val_curve.append(val_mae)
    test_curve.append(test_mae)

Epoch 1
Train MAE:0.638910    Val MAE:0.407820    Test MAE:0.406038     Time:9.555429 s
Epoch 2
Train MAE:0.391426    Val MAE:0.368216    Test MAE:0.366496     Time:9.969795 s
Epoch 3
Train MAE:0.339844    Val MAE:0.323985    Test MAE:0.320655     Time:10.249173 s
Epoch 4
Train MAE:0.316370    Val MAE:0.309417    Test MAE:0.305531     Time:9.811286 s
Epoch 5
Train MAE:0.305220    Val MAE:0.311211    Test MAE:0.308635     Time:9.764691 s
Epoch 6
Train MAE:0.292890    Val MAE:0.284139    Test MAE:0.282252     Time:9.749170 s
Epoch 7
Train MAE:0.279538    Val MAE:0.279531    Test MAE:0.277920     Time:9.922184 s
Epoch 8
Train MAE:0.266571    Val MAE:0.258008    Test MAE:0.257073     Time:9.703231 s
Epoch 9
Train MAE:0.257811    Val MAE:0.259623    Test MAE:0.257051     Time:9.703095 s
Epoch 10
Train MAE:0.251670    Val MAE:0.247102    Test MAE:0.246047     Time:9.828446 s
Epoch 11
Train MAE:0.245953    Val MAE:0.251718    Test MAE:0.251453     Time:9.812213 s
Epoch 12
Train MAE:0.242383  

In [34]:
epochs=50
optimizer=torch.optim.Adam(params=gcn.parameters(), lr=0.0005)
train_loader = DataLoader(OPV_train, batch_size=32, shuffle=True)
for i in range(1,epochs+1):
    t_start=time.time()
    gcn,train_mae = train(gcn, train_loader, optimizer, loss_fn)
    val_mae=test(gcn,val_loader)
    test_mae=test(gcn,test_loader)
    t_end=time.time()
    print("Epoch %d"%i)
    print("Train MAE:%f    Val MAE:%f    Test MAE:%f     Time:%f s"%(train_mae,val_mae,test_mae,t_end-t_start))
    
    train_curve.append(train_mae)
    val_curve.append(val_mae)
    test_curve.append(test_mae)

Epoch 1
Train MAE:0.187590    Val MAE:0.200097    Test MAE:0.198689     Time:21.116851 s
Epoch 2
Train MAE:0.186211    Val MAE:0.196132    Test MAE:0.195880     Time:20.391836 s
Epoch 3
Train MAE:0.183961    Val MAE:0.190634    Test MAE:0.189590     Time:21.137383 s
Epoch 4
Train MAE:0.182483    Val MAE:0.177607    Test MAE:0.177105     Time:21.189898 s
Epoch 5
Train MAE:0.181297    Val MAE:0.183417    Test MAE:0.182762     Time:21.313182 s
Epoch 6
Train MAE:0.179334    Val MAE:0.189333    Test MAE:0.188343     Time:21.836194 s
Epoch 7
Train MAE:0.177434    Val MAE:0.171648    Test MAE:0.171817     Time:22.118827 s
Epoch 8
Train MAE:0.176322    Val MAE:0.175460    Test MAE:0.175315     Time:21.206009 s
Epoch 9
Train MAE:0.174698    Val MAE:0.171557    Test MAE:0.171021     Time:21.095250 s
Epoch 10
Train MAE:0.174379    Val MAE:0.174653    Test MAE:0.174574     Time:21.538052 s
Epoch 11
Train MAE:0.172858    Val MAE:0.178985    Test MAE:0.178353     Time:22.691118 s
Epoch 12
Train MAE:

In [35]:
with open("./result/mean_OPV_3.csv","w") as fo:
    fo.write("epoch,train,val,test\n")
    for i in range(len(train_curve)):
        fo.write("%d,%f,%f,%f\n"%(i+1,train_curve[i],val_curve[i],test_curve[i]))