In [4]:
import torch
from torch_geometric.utils import from_smiles
import random
import pandas as pd
import numpy as np
from tqdm import tqdm


def split_list(input_list, ratio):
    input_list_copy = input_list.copy()
    total_length = len(input_list_copy)
    a_length = int(ratio[0] * total_length)
    b_length = int(ratio[1] * total_length)
    
    #random.seed(random_state)
    random.shuffle(input_list_copy)
    part_a = input_list_copy[:a_length]
    part_b = input_list_copy[a_length:a_length + b_length]
    part_c = input_list_copy[a_length + b_length:]

    return part_a, part_b, part_c

def load_dataset(filename,x_name,y_name1,y_name2):
    table=pd.read_csv(filename)
    result=[]
    for i in tqdm(range(table.shape[0])):
        cur_data=from_smiles(table[x_name][i])
        cur_data.y=torch.tensor([[table[y_name1][i],table[y_name2][i]]],dtype=torch.float32)
        result.append(cur_data)
    return result

def load_splited_dataset(filename,x_name,y_name1,y_name2,ratio=(0.8,0.1,0.1)):
    all_list=load_dataset(filename,x_name,y_name1,y_name2)
    return split_list(all_list,ratio)

In [5]:
from torch.nn import Linear, ReLU
from torch_geometric.nn import global_add_pool, global_mean_pool, global_max_pool,GCNConv

class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels=32, out_channels=2,pool="add"):
        super(GCN, self).__init__()
        self.convs = torch.nn.ModuleList()
        self.convs.append(GCNConv(in_channels, hidden_channels))
        self.convs.append(GCNConv(hidden_channels, hidden_channels))
        self.convs.append(GCNConv(hidden_channels, hidden_channels))
        self.convs.append(GCNConv(hidden_channels, hidden_channels))
        
        self.lin1=Linear(hidden_channels, hidden_channels)
        self.lin2=Linear(hidden_channels, out_channels)
        
        if pool=="mean":
            self.pool=global_mean_pool
        elif pool=="add":
            self.pool=global_add_pool
        else:
            self.pool=global_max_pool
        
        self.act=ReLU()
        
    def forward(self, x, edge_index, batch):
        for step in range(len(self.convs)):
            x = self.act(self.convs[step](x, edge_index))
       
        x = self.pool(x,batch)
        
        x = self.act(self.lin1(x))
        x = self.lin2(x)
        
        return x


In [6]:
def train(model, loader, optimizer, loss_func):

    model.train()
    mae=0
    for data in loader:
        #data.y=data.y.reshape(-1,1)
        data.x=data.x.float()
        data=data.cuda()
        optimizer.zero_grad()
        pred=model(data.x, data.edge_index, data.batch)
        mae+=torch.sum(torch.abs(pred-data.y)).item()/2
        loss=loss_func(pred,data.y)
        loss.backward()
        optimizer.step()

    return model,mae/len(loader.dataset)

def test(model, loader):
    model.eval()
    mae=0
    with torch.no_grad():
        for data in loader:
            #data.y=data.y.reshape(-1,1)
            data.x=data.x.float()
            data=data.cuda()
            out = model(data.x, data.edge_index, data.batch)  
            mae+=torch.sum(torch.abs(out-data.y)).item()/2
        
    return mae / len(loader.dataset)

# QM9

In [4]:
from torch_geometric.seed import seed_everything

max_nodes=9
QM9=load_dataset("./dataset/QM9/QM9.csv","smiles","homo","lumo")

100%|████████████████████████████████████████████████████████████████████████| 133247/133247 [01:04<00:00, 2064.76it/s]


### Exp 1

In [5]:
seed_everything(1222)
QM9_train,QM9_val,QM9_test=split_list(QM9,(0.8,0.1,0.1))

In [6]:
from torch_geometric.loader import DataLoader
from torch_geometric.data import Batch



train_loader = DataLoader(QM9_train, batch_size=128, shuffle=True)
val_loader = DataLoader(QM9_val, batch_size=1024, shuffle=False)
test_loader = DataLoader(QM9_test, batch_size=1024, shuffle=False)
num_features=QM9_train[0].num_node_features


In [7]:
from torch.nn import MSELoss
import time

gcn=GCN(num_features)
optimizer = torch.optim.Adam(params=gcn.parameters(), lr=0.001)
loss_fn=MSELoss()

gcn=gcn.cuda()
loss_fn=loss_fn.cuda()

epochs=50
train_curve=[]
val_curve=[]
test_curve=[]
for i in range(1,epochs+1):
    t_start=time.time()
    gcn,train_mae = train(gcn, train_loader, optimizer, loss_fn)
    val_mae=test(gcn,val_loader)
    test_mae=test(gcn,test_loader)
    t_end=time.time()
    print("Epoch %d"%i)
    print("Train MAE:%f    Val MAE:%f    Test MAE:%f     Time:%f s"%(train_mae,val_mae,test_mae,t_end-t_start))
    
    train_curve.append(train_mae)
    val_curve.append(val_mae)
    test_curve.append(test_mae)

Epoch 1
Train MAE:0.032048    Val MAE:0.021600    Test MAE:0.021668     Time:10.144664 s
Epoch 2
Train MAE:0.020759    Val MAE:0.019258    Test MAE:0.019362     Time:10.989016 s
Epoch 3
Train MAE:0.018648    Val MAE:0.017207    Test MAE:0.017263     Time:11.041065 s
Epoch 4
Train MAE:0.017311    Val MAE:0.015623    Test MAE:0.015724     Time:11.301169 s
Epoch 5
Train MAE:0.016102    Val MAE:0.015455    Test MAE:0.015560     Time:10.931112 s
Epoch 6
Train MAE:0.015399    Val MAE:0.015892    Test MAE:0.015985     Time:10.975624 s
Epoch 7
Train MAE:0.014817    Val MAE:0.013830    Test MAE:0.013958     Time:10.683165 s
Epoch 8
Train MAE:0.014110    Val MAE:0.015053    Test MAE:0.015136     Time:10.381951 s
Epoch 9
Train MAE:0.013694    Val MAE:0.016018    Test MAE:0.016141     Time:10.550438 s
Epoch 10
Train MAE:0.013337    Val MAE:0.012391    Test MAE:0.012448     Time:11.256263 s
Epoch 11
Train MAE:0.012955    Val MAE:0.013204    Test MAE:0.013175     Time:11.008290 s
Epoch 12
Train MAE:

In [8]:
epochs=50
optimizer=torch.optim.Adam(params=gcn.parameters(), lr=0.0005)
train_loader = DataLoader(QM9_train, batch_size=32, shuffle=True)
for i in range(1,epochs+1):
    t_start=time.time()
    gcn,train_mae = train(gcn, train_loader, optimizer, loss_fn)
    val_mae=test(gcn,val_loader)
    test_mae=test(gcn,test_loader)
    t_end=time.time()
    print("Epoch %d"%i)
    print("Train MAE:%f    Val MAE:%f    Test MAE:%f     Time:%f s"%(train_mae,val_mae,test_mae,t_end-t_start))
    
    train_curve.append(train_mae)
    val_curve.append(val_mae)
    test_curve.append(test_mae)

Epoch 1
Train MAE:0.008903    Val MAE:0.009112    Test MAE:0.009189     Time:28.918895 s
Epoch 2
Train MAE:0.008925    Val MAE:0.008532    Test MAE:0.008614     Time:29.637086 s
Epoch 3
Train MAE:0.008810    Val MAE:0.008352    Test MAE:0.008355     Time:30.206611 s
Epoch 4
Train MAE:0.008781    Val MAE:0.008310    Test MAE:0.008335     Time:30.487760 s
Epoch 5
Train MAE:0.008741    Val MAE:0.008644    Test MAE:0.008667     Time:31.969428 s
Epoch 6
Train MAE:0.008692    Val MAE:0.012028    Test MAE:0.011903     Time:30.426583 s
Epoch 7
Train MAE:0.008715    Val MAE:0.008191    Test MAE:0.008220     Time:29.549383 s
Epoch 8
Train MAE:0.008621    Val MAE:0.009043    Test MAE:0.009073     Time:30.268893 s
Epoch 9
Train MAE:0.008584    Val MAE:0.008209    Test MAE:0.008193     Time:28.172728 s
Epoch 10
Train MAE:0.008560    Val MAE:0.008689    Test MAE:0.008817     Time:28.165795 s
Epoch 11
Train MAE:0.008524    Val MAE:0.008913    Test MAE:0.008936     Time:28.103331 s
Epoch 12
Train MAE:

In [9]:
with open("./result/add_QM9_1.csv","w") as fo:
    fo.write("epoch,train,val,test\n")
    for i in range(len(train_curve)):
        fo.write("%d,%f,%f,%f\n"%(i+1,train_curve[i],val_curve[i],test_curve[i]))

### Exp 2

In [10]:
seed_everything(324)
QM9_train,QM9_val,QM9_test=split_list(QM9,(0.8,0.1,0.1))

In [11]:
from torch_geometric.loader import DataLoader
from torch_geometric.data import Batch



train_loader = DataLoader(QM9_train, batch_size=128, shuffle=True)
val_loader = DataLoader(QM9_val, batch_size=1024, shuffle=False)
test_loader = DataLoader(QM9_test, batch_size=1024, shuffle=False)
num_features=QM9_train[0].num_node_features


In [12]:
from torch.nn import MSELoss
import time

gcn=GCN(num_features)
optimizer = torch.optim.Adam(params=gcn.parameters(), lr=0.001)
loss_fn=MSELoss()

gcn=gcn.cuda()
loss_fn=loss_fn.cuda()

epochs=50
train_curve=[]
val_curve=[]
test_curve=[]
for i in range(1,epochs+1):
    t_start=time.time()
    gcn,train_mae = train(gcn, train_loader, optimizer, loss_fn)
    val_mae=test(gcn,val_loader)
    test_mae=test(gcn,test_loader)
    t_end=time.time()
    print("Epoch %d"%i)
    print("Train MAE:%f    Val MAE:%f    Test MAE:%f     Time:%f s"%(train_mae,val_mae,test_mae,t_end-t_start))
    
    train_curve.append(train_mae)
    val_curve.append(val_mae)
    test_curve.append(test_mae)

Epoch 1
Train MAE:0.024230    Val MAE:0.018960    Test MAE:0.018890     Time:11.190283 s
Epoch 2
Train MAE:0.018027    Val MAE:0.016971    Test MAE:0.016903     Time:10.907537 s
Epoch 3
Train MAE:0.016321    Val MAE:0.016888    Test MAE:0.016880     Time:11.001810 s
Epoch 4
Train MAE:0.015212    Val MAE:0.016265    Test MAE:0.016207     Time:10.906961 s
Epoch 5
Train MAE:0.014623    Val MAE:0.014076    Test MAE:0.013955     Time:11.001211 s
Epoch 6
Train MAE:0.014015    Val MAE:0.014332    Test MAE:0.014208     Time:10.969944 s
Epoch 7
Train MAE:0.013498    Val MAE:0.013178    Test MAE:0.013054     Time:10.719079 s
Epoch 8
Train MAE:0.013300    Val MAE:0.012449    Test MAE:0.012288     Time:10.877997 s
Epoch 9
Train MAE:0.013075    Val MAE:0.012916    Test MAE:0.012775     Time:10.923617 s
Epoch 10
Train MAE:0.012859    Val MAE:0.012776    Test MAE:0.012696     Time:10.938210 s
Epoch 11
Train MAE:0.012642    Val MAE:0.011670    Test MAE:0.011521     Time:10.981494 s
Epoch 12
Train MAE:

In [13]:
epochs=50
optimizer=torch.optim.Adam(params=gcn.parameters(), lr=0.0005)
train_loader = DataLoader(QM9_train, batch_size=32, shuffle=True)
for i in range(1,epochs+1):
    t_start=time.time()
    gcn,train_mae = train(gcn, train_loader, optimizer, loss_fn)
    val_mae=test(gcn,val_loader)
    test_mae=test(gcn,test_loader)
    t_end=time.time()
    print("Epoch %d"%i)
    print("Train MAE:%f    Val MAE:%f    Test MAE:%f     Time:%f s"%(train_mae,val_mae,test_mae,t_end-t_start))
    
    train_curve.append(train_mae)
    val_curve.append(val_mae)
    test_curve.append(test_mae)

Epoch 1
Train MAE:0.008628    Val MAE:0.009772    Test MAE:0.009671     Time:28.126235 s
Epoch 2
Train MAE:0.008503    Val MAE:0.008456    Test MAE:0.008340     Time:28.621672 s
Epoch 3
Train MAE:0.008564    Val MAE:0.008066    Test MAE:0.008002     Time:29.464919 s
Epoch 4
Train MAE:0.008436    Val MAE:0.008345    Test MAE:0.008274     Time:29.105677 s
Epoch 5
Train MAE:0.008380    Val MAE:0.008773    Test MAE:0.008693     Time:28.268985 s
Epoch 6
Train MAE:0.008476    Val MAE:0.008121    Test MAE:0.008038     Time:27.933624 s
Epoch 7
Train MAE:0.008412    Val MAE:0.009157    Test MAE:0.009104     Time:30.227737 s
Epoch 8
Train MAE:0.008444    Val MAE:0.009291    Test MAE:0.009190     Time:31.236367 s
Epoch 9
Train MAE:0.008301    Val MAE:0.008200    Test MAE:0.008120     Time:28.104127 s
Epoch 10
Train MAE:0.008382    Val MAE:0.008334    Test MAE:0.008274     Time:31.090159 s
Epoch 11
Train MAE:0.008272    Val MAE:0.008258    Test MAE:0.008133     Time:29.598911 s
Epoch 12
Train MAE:

In [14]:
with open("./result/add_QM9_2.csv","w") as fo:
    fo.write("epoch,train,val,test\n")
    for i in range(len(train_curve)):
        fo.write("%d,%f,%f,%f\n"%(i+1,train_curve[i],val_curve[i],test_curve[i]))

### Exp 3

In [15]:
seed_everything(10086)
QM9_train,QM9_val,QM9_test=split_list(QM9,(0.8,0.1,0.1))

In [16]:
from torch_geometric.loader import DataLoader
from torch_geometric.data import Batch



train_loader = DataLoader(QM9_train, batch_size=128, shuffle=True)
val_loader = DataLoader(QM9_val, batch_size=1024, shuffle=False)
test_loader = DataLoader(QM9_test, batch_size=1024, shuffle=False)
num_features=QM9_train[0].num_node_features

In [17]:
from torch.nn import MSELoss
import time

gcn=GCN(num_features)
optimizer = torch.optim.Adam(params=gcn.parameters(), lr=0.001)
loss_fn=MSELoss()

gcn=gcn.cuda()
loss_fn=loss_fn.cuda()

epochs=50
train_curve=[]
val_curve=[]
test_curve=[]
for i in range(1,epochs+1):
    t_start=time.time()
    gcn,train_mae = train(gcn, train_loader, optimizer, loss_fn)
    val_mae=test(gcn,val_loader)
    test_mae=test(gcn,test_loader)
    t_end=time.time()
    print("Epoch %d"%i)
    print("Train MAE:%f    Val MAE:%f    Test MAE:%f     Time:%f s"%(train_mae,val_mae,test_mae,t_end-t_start))
    
    train_curve.append(train_mae)
    val_curve.append(val_mae)
    test_curve.append(test_mae)

Epoch 1
Train MAE:0.038906    Val MAE:0.019501    Test MAE:0.019511     Time:10.660135 s
Epoch 2
Train MAE:0.018903    Val MAE:0.018323    Test MAE:0.018324     Time:10.596021 s
Epoch 3
Train MAE:0.018236    Val MAE:0.017977    Test MAE:0.017907     Time:10.534784 s
Epoch 4
Train MAE:0.017786    Val MAE:0.017808    Test MAE:0.017727     Time:10.579240 s
Epoch 5
Train MAE:0.017291    Val MAE:0.017025    Test MAE:0.016907     Time:10.564251 s
Epoch 6
Train MAE:0.016904    Val MAE:0.017248    Test MAE:0.017201     Time:10.500715 s
Epoch 7
Train MAE:0.016447    Val MAE:0.015901    Test MAE:0.015800     Time:10.547025 s
Epoch 8
Train MAE:0.016139    Val MAE:0.015664    Test MAE:0.015616     Time:10.486035 s
Epoch 9
Train MAE:0.015722    Val MAE:0.015987    Test MAE:0.016010     Time:10.938538 s
Epoch 10
Train MAE:0.015525    Val MAE:0.015217    Test MAE:0.015260     Time:11.125907 s
Epoch 11
Train MAE:0.015108    Val MAE:0.014248    Test MAE:0.014272     Time:10.845234 s
Epoch 12
Train MAE:

In [18]:
epochs=50
optimizer=torch.optim.Adam(params=gcn.parameters(), lr=0.0005)
train_loader = DataLoader(QM9_train, batch_size=32, shuffle=True)
for i in range(1,epochs+1):
    t_start=time.time()
    gcn,train_mae = train(gcn, train_loader, optimizer, loss_fn)
    val_mae=test(gcn,val_loader)
    test_mae=test(gcn,test_loader)
    t_end=time.time()
    print("Epoch %d"%i)
    print("Train MAE:%f    Val MAE:%f    Test MAE:%f     Time:%f s"%(train_mae,val_mae,test_mae,t_end-t_start))
    
    train_curve.append(train_mae)
    val_curve.append(val_mae)
    test_curve.append(test_mae)

Epoch 1
Train MAE:0.010025    Val MAE:0.010322    Test MAE:0.010374     Time:29.782142 s
Epoch 2
Train MAE:0.009756    Val MAE:0.009345    Test MAE:0.009393     Time:29.940613 s
Epoch 3
Train MAE:0.009641    Val MAE:0.009745    Test MAE:0.009857     Time:29.658635 s
Epoch 4
Train MAE:0.009426    Val MAE:0.009764    Test MAE:0.009752     Time:29.971265 s
Epoch 5
Train MAE:0.009345    Val MAE:0.010614    Test MAE:0.010666     Time:30.551906 s
Epoch 6
Train MAE:0.009303    Val MAE:0.008996    Test MAE:0.009136     Time:30.758164 s
Epoch 7
Train MAE:0.009243    Val MAE:0.012012    Test MAE:0.012158     Time:30.849506 s
Epoch 8
Train MAE:0.009238    Val MAE:0.009364    Test MAE:0.009388     Time:30.410216 s
Epoch 9
Train MAE:0.009099    Val MAE:0.008670    Test MAE:0.008767     Time:29.675457 s
Epoch 10
Train MAE:0.009005    Val MAE:0.009370    Test MAE:0.009486     Time:29.861810 s
Epoch 11
Train MAE:0.009007    Val MAE:0.008680    Test MAE:0.008752     Time:30.004535 s
Epoch 12
Train MAE:

In [19]:
with open("./result/add_QM9_3.csv","w") as fo:
    fo.write("epoch,train,val,test\n")
    for i in range(len(train_curve)):
        fo.write("%d,%f,%f,%f\n"%(i+1,train_curve[i],val_curve[i],test_curve[i]))

# OPV

In [7]:
from torch_geometric.seed import seed_everything


max_nodes=122
OPV=load_dataset("./dataset/OPV/OPV.csv","smile","homo","lumo")

100%|███████████████████████████████████████████████████████████████████████████| 90823/90823 [02:10<00:00, 696.74it/s]


### Exp 1

In [8]:
seed_everything(1222)
OPV_train,OPV_val,OPV_test=split_list(OPV,(0.8,0.1,0.1))

In [9]:
from torch_geometric.loader import DataLoader
from torch_geometric.data import Batch



train_loader = DataLoader(OPV_train, batch_size=128, shuffle=True)
val_loader = DataLoader(OPV_val, batch_size=1024, shuffle=False)
test_loader = DataLoader(OPV_test, batch_size=1024, shuffle=False)
num_features=OPV_train[0].num_node_features

In [10]:
from torch.nn import MSELoss
import time

gcn=GCN(num_features)
optimizer = torch.optim.Adam(params=gcn.parameters(), lr=0.001)
loss_fn=MSELoss()

gcn=gcn.cuda()
loss_fn=loss_fn.cuda()

epochs=50
train_curve=[]
val_curve=[]
test_curve=[]
for i in range(1,epochs+1):
    t_start=time.time()
    gcn,train_mae = train(gcn, train_loader, optimizer, loss_fn)
    val_mae=test(gcn,val_loader)
    test_mae=test(gcn,test_loader)
    t_end=time.time()
    print("Epoch %d"%i)
    print("Train MAE:%f    Val MAE:%f    Test MAE:%f     Time:%f s"%(train_mae,val_mae,test_mae,t_end-t_start))
    
    train_curve.append(train_mae)
    val_curve.append(val_mae)
    test_curve.append(test_mae)

Epoch 1
Train MAE:1.029906    Val MAE:0.840099    Test MAE:0.834244     Time:7.757654 s
Epoch 2
Train MAE:0.734528    Val MAE:0.651267    Test MAE:0.646685     Time:7.629367 s
Epoch 3
Train MAE:0.475745    Val MAE:0.354436    Test MAE:0.357872     Time:8.507131 s
Epoch 4
Train MAE:0.300074    Val MAE:0.266318    Test MAE:0.267701     Time:8.975364 s
Epoch 5
Train MAE:0.269408    Val MAE:0.260329    Test MAE:0.261090     Time:8.302126 s
Epoch 6
Train MAE:0.259677    Val MAE:0.243091    Test MAE:0.243489     Time:8.719703 s
Epoch 7
Train MAE:0.248404    Val MAE:0.240666    Test MAE:0.241590     Time:8.734623 s
Epoch 8
Train MAE:0.244723    Val MAE:0.242691    Test MAE:0.242118     Time:8.546660 s
Epoch 9
Train MAE:0.241106    Val MAE:0.236973    Test MAE:0.236846     Time:8.561647 s
Epoch 10
Train MAE:0.237861    Val MAE:0.233309    Test MAE:0.232794     Time:9.014513 s
Epoch 11
Train MAE:0.228343    Val MAE:0.256211    Test MAE:0.255089     Time:8.608917 s
Epoch 12
Train MAE:0.223991   

In [11]:
epochs=50
optimizer=torch.optim.Adam(params=gcn.parameters(), lr=0.0005)
train_loader = DataLoader(OPV_train, batch_size=32, shuffle=True)
for i in range(1,epochs+1):
    t_start=time.time()
    gcn,train_mae = train(gcn, train_loader, optimizer, loss_fn)
    val_mae=test(gcn,val_loader)
    test_mae=test(gcn,test_loader)
    t_end=time.time()
    print("Epoch %d"%i)
    print("Train MAE:%f    Val MAE:%f    Test MAE:%f     Time:%f s"%(train_mae,val_mae,test_mae,t_end-t_start))
    
    train_curve.append(train_mae)
    val_curve.append(val_mae)
    test_curve.append(test_mae)

Epoch 1
Train MAE:0.138364    Val MAE:0.133369    Test MAE:0.132632     Time:22.488034 s
Epoch 2
Train MAE:0.136205    Val MAE:0.135285    Test MAE:0.133737     Time:22.139810 s
Epoch 3
Train MAE:0.134900    Val MAE:0.137306    Test MAE:0.135755     Time:23.335250 s
Epoch 4
Train MAE:0.133748    Val MAE:0.139698    Test MAE:0.137180     Time:21.699088 s
Epoch 5
Train MAE:0.133177    Val MAE:0.130258    Test MAE:0.128920     Time:21.515346 s
Epoch 6
Train MAE:0.131533    Val MAE:0.125676    Test MAE:0.124984     Time:21.683317 s
Epoch 7
Train MAE:0.131318    Val MAE:0.129066    Test MAE:0.128134     Time:21.656097 s
Epoch 8
Train MAE:0.130681    Val MAE:0.121209    Test MAE:0.119193     Time:22.364965 s
Epoch 9
Train MAE:0.129105    Val MAE:0.121675    Test MAE:0.120299     Time:22.866952 s
Epoch 10
Train MAE:0.128735    Val MAE:0.123574    Test MAE:0.122148     Time:22.925816 s
Epoch 11
Train MAE:0.127870    Val MAE:0.130467    Test MAE:0.129510     Time:23.018086 s
Epoch 12
Train MAE:

In [12]:
with open("./result/add_OPV_1.csv","w") as fo:
    fo.write("epoch,train,val,test\n")
    for i in range(len(train_curve)):
        fo.write("%d,%f,%f,%f\n"%(i+1,train_curve[i],val_curve[i],test_curve[i]))

### Exp 2

In [13]:
seed_everything(324)
OPV_train,OPV_val,OPV_test=split_list(OPV,(0.8,0.1,0.1))

In [14]:
from torch_geometric.loader import DataLoader
from torch_geometric.data import Batch



train_loader = DataLoader(OPV_train, batch_size=128, shuffle=True)
val_loader = DataLoader(OPV_val, batch_size=1024, shuffle=False)
test_loader = DataLoader(OPV_test, batch_size=1024, shuffle=False)
num_features=OPV_train[0].num_node_features

In [15]:
from torch.nn import MSELoss
import time

gcn=GCN(num_features)
optimizer = torch.optim.Adam(params=gcn.parameters(), lr=0.001)
loss_fn=MSELoss()

gcn=gcn.cuda()
loss_fn=loss_fn.cuda()

epochs=50
train_curve=[]
val_curve=[]
test_curve=[]
for i in range(1,epochs+1):
    t_start=time.time()
    gcn,train_mae = train(gcn, train_loader, optimizer, loss_fn)
    val_mae=test(gcn,val_loader)
    test_mae=test(gcn,test_loader)
    t_end=time.time()
    print("Epoch %d"%i)
    print("Train MAE:%f    Val MAE:%f    Test MAE:%f     Time:%f s"%(train_mae,val_mae,test_mae,t_end-t_start))
    
    train_curve.append(train_mae)
    val_curve.append(val_mae)
    test_curve.append(test_mae)

Epoch 1
Train MAE:0.900548    Val MAE:0.698292    Test MAE:0.694440     Time:10.491447 s
Epoch 2
Train MAE:0.564872    Val MAE:0.426035    Test MAE:0.424888     Time:10.050725 s
Epoch 3
Train MAE:0.339086    Val MAE:0.295269    Test MAE:0.298256     Time:10.407543 s
Epoch 4
Train MAE:0.276643    Val MAE:0.257259    Test MAE:0.257409     Time:9.696388 s
Epoch 5
Train MAE:0.256453    Val MAE:0.241629    Test MAE:0.242629     Time:10.266613 s
Epoch 6
Train MAE:0.244935    Val MAE:0.250922    Test MAE:0.250340     Time:10.486255 s
Epoch 7
Train MAE:0.237603    Val MAE:0.281863    Test MAE:0.279414     Time:9.942109 s
Epoch 8
Train MAE:0.229916    Val MAE:0.217909    Test MAE:0.218774     Time:9.815038 s
Epoch 9
Train MAE:0.221922    Val MAE:0.214642    Test MAE:0.216563     Time:9.615483 s
Epoch 10
Train MAE:0.214932    Val MAE:0.205916    Test MAE:0.206923     Time:10.022131 s
Epoch 11
Train MAE:0.204387    Val MAE:0.207830    Test MAE:0.207158     Time:9.635051 s
Epoch 12
Train MAE:0.198

In [16]:
epochs=50
optimizer=torch.optim.Adam(params=gcn.parameters(), lr=0.0005)
train_loader = DataLoader(OPV_train, batch_size=32, shuffle=True)
for i in range(1,epochs+1):
    t_start=time.time()
    gcn,train_mae = train(gcn, train_loader, optimizer, loss_fn)
    val_mae=test(gcn,val_loader)
    test_mae=test(gcn,test_loader)
    t_end=time.time()
    print("Epoch %d"%i)
    print("Train MAE:%f    Val MAE:%f    Test MAE:%f     Time:%f s"%(train_mae,val_mae,test_mae,t_end-t_start))
    
    train_curve.append(train_mae)
    val_curve.append(val_mae)
    test_curve.append(test_mae)

Epoch 1
Train MAE:0.115992    Val MAE:0.113842    Test MAE:0.113351     Time:22.317189 s
Epoch 2
Train MAE:0.115338    Val MAE:0.109012    Test MAE:0.109684     Time:22.224247 s
Epoch 3
Train MAE:0.114503    Val MAE:0.118269    Test MAE:0.118280     Time:22.408585 s
Epoch 4
Train MAE:0.113110    Val MAE:0.122508    Test MAE:0.122340     Time:21.814739 s
Epoch 5
Train MAE:0.111539    Val MAE:0.112248    Test MAE:0.112335     Time:22.590846 s
Epoch 6
Train MAE:0.110788    Val MAE:0.115355    Test MAE:0.115561     Time:22.297125 s
Epoch 7
Train MAE:0.110043    Val MAE:0.106424    Test MAE:0.106376     Time:21.854786 s
Epoch 8
Train MAE:0.108995    Val MAE:0.105572    Test MAE:0.105100     Time:21.760057 s
Epoch 9
Train MAE:0.108182    Val MAE:0.106712    Test MAE:0.106800     Time:22.150644 s
Epoch 10
Train MAE:0.106965    Val MAE:0.105933    Test MAE:0.105496     Time:22.740145 s
Epoch 11
Train MAE:0.106476    Val MAE:0.103346    Test MAE:0.103446     Time:21.934880 s
Epoch 12
Train MAE:

In [17]:
with open("./result/add_OPV_2.csv","w") as fo:
    fo.write("epoch,train,val,test\n")
    for i in range(len(train_curve)):
        fo.write("%d,%f,%f,%f\n"%(i+1,train_curve[i],val_curve[i],test_curve[i]))

### Exp 3

In [18]:
seed_everything(10086)
OPV_train,OPV_val,OPV_test=split_list(OPV,(0.8,0.1,0.1))

In [19]:
from torch_geometric.loader import DataLoader
from torch_geometric.data import Batch



train_loader = DataLoader(OPV_train, batch_size=128, shuffle=True)
val_loader = DataLoader(OPV_val, batch_size=1024, shuffle=False)
test_loader = DataLoader(OPV_test, batch_size=1024, shuffle=False)
num_features=OPV_train[0].num_node_features

In [20]:
from torch.nn import MSELoss
import time

gcn=GCN(num_features)
optimizer = torch.optim.Adam(params=gcn.parameters(), lr=0.001)
loss_fn=MSELoss()

gcn=gcn.cuda()
loss_fn=loss_fn.cuda()

epochs=50
train_curve=[]
val_curve=[]
test_curve=[]
for i in range(1,epochs+1):
    t_start=time.time()
    gcn,train_mae = train(gcn, train_loader, optimizer, loss_fn)
    val_mae=test(gcn,val_loader)
    test_mae=test(gcn,test_loader)
    t_end=time.time()
    print("Epoch %d"%i)
    print("Train MAE:%f    Val MAE:%f    Test MAE:%f     Time:%f s"%(train_mae,val_mae,test_mae,t_end-t_start))
    
    train_curve.append(train_mae)
    val_curve.append(val_mae)
    test_curve.append(test_mae)

Epoch 1
Train MAE:1.198642    Val MAE:0.886652    Test MAE:0.891289     Time:9.821631 s
Epoch 2
Train MAE:0.825880    Val MAE:0.760654    Test MAE:0.759565     Time:9.584570 s
Epoch 3
Train MAE:0.607271    Val MAE:0.494748    Test MAE:0.489092     Time:9.598932 s
Epoch 4
Train MAE:0.404798    Val MAE:0.335594    Test MAE:0.331304     Time:9.478515 s
Epoch 5
Train MAE:0.301792    Val MAE:0.269259    Test MAE:0.266798     Time:9.993204 s
Epoch 6
Train MAE:0.266146    Val MAE:0.250712    Test MAE:0.249755     Time:9.579704 s
Epoch 7
Train MAE:0.252787    Val MAE:0.282961    Test MAE:0.280994     Time:9.583213 s
Epoch 8
Train MAE:0.245890    Val MAE:0.244339    Test MAE:0.244014     Time:9.646999 s
Epoch 9
Train MAE:0.241617    Val MAE:0.271652    Test MAE:0.270562     Time:10.115134 s
Epoch 10
Train MAE:0.241635    Val MAE:0.236415    Test MAE:0.235745     Time:10.341423 s
Epoch 11
Train MAE:0.237529    Val MAE:0.235220    Test MAE:0.235930     Time:10.755197 s
Epoch 12
Train MAE:0.228970

In [21]:
epochs=50
optimizer=torch.optim.Adam(params=gcn.parameters(), lr=0.0005)
train_loader = DataLoader(OPV_train, batch_size=32, shuffle=True)
for i in range(1,epochs+1):
    t_start=time.time()
    gcn,train_mae = train(gcn, train_loader, optimizer, loss_fn)
    val_mae=test(gcn,val_loader)
    test_mae=test(gcn,test_loader)
    t_end=time.time()
    print("Epoch %d"%i)
    print("Train MAE:%f    Val MAE:%f    Test MAE:%f     Time:%f s"%(train_mae,val_mae,test_mae,t_end-t_start))
    
    train_curve.append(train_mae)
    val_curve.append(val_mae)
    test_curve.append(test_mae)

Epoch 1
Train MAE:0.145122    Val MAE:0.147335    Test MAE:0.144656     Time:23.001515 s
Epoch 2
Train MAE:0.144472    Val MAE:0.148645    Test MAE:0.146776     Time:23.252312 s
Epoch 3
Train MAE:0.142782    Val MAE:0.137422    Test MAE:0.136556     Time:22.754387 s
Epoch 4
Train MAE:0.141963    Val MAE:0.137663    Test MAE:0.137343     Time:21.949644 s
Epoch 5
Train MAE:0.140854    Val MAE:0.145935    Test MAE:0.143070     Time:22.525177 s
Epoch 6
Train MAE:0.140157    Val MAE:0.138851    Test MAE:0.138248     Time:22.885174 s
Epoch 7
Train MAE:0.138364    Val MAE:0.137161    Test MAE:0.136511     Time:22.767437 s
Epoch 8
Train MAE:0.138178    Val MAE:0.138947    Test MAE:0.139461     Time:21.673560 s
Epoch 9
Train MAE:0.136628    Val MAE:0.130228    Test MAE:0.129016     Time:21.612436 s
Epoch 10
Train MAE:0.136296    Val MAE:0.133520    Test MAE:0.133687     Time:21.684922 s
Epoch 11
Train MAE:0.133859    Val MAE:0.133568    Test MAE:0.132339     Time:21.665143 s
Epoch 12
Train MAE:

In [22]:
with open("./result/add_OPV_3.csv","w") as fo:
    fo.write("epoch,train,val,test\n")
    for i in range(len(train_curve)):
        fo.write("%d,%f,%f,%f\n"%(i+1,train_curve[i],val_curve[i],test_curve[i]))