In [1]:
import copy
import os.path as osp
import os
import torch
import torch.nn.functional as F
from tqdm import tqdm
import numpy as np
from time import *

from torch_geometric.datasets import Reddit
from torch_geometric.loader import NeighborLoader
from torch_geometric.nn import SAGEConv

# download and loading the obg dataset
path = osp.join(osp.dirname(osp.realpath('./')), 'data', 'Reddit')
dataset = Reddit(path)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data = dataset[0].to(device, 'x', 'y')

BS=4096
kwargs = {'batch_size': BS, 'num_workers': 6, 'persistent_workers': True}
train_loader = NeighborLoader(data, input_nodes=data.train_mask,
                              num_neighbors=[25, 10], shuffle=True, **kwargs)
test_loader = NeighborLoader(data, input_nodes=data.test_mask,
                              num_neighbors=[25, 10], shuffle=True, **kwargs)
val_loader = NeighborLoader(data, input_nodes=data.val_mask,
                              num_neighbors=[25, 10], shuffle=True, **kwargs)

In [2]:
class SAGE(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.convs = torch.nn.ModuleList()
        self.convs.append(SAGEConv(in_channels, hidden_channels))
        for i in range(NC-2):
            self.convs.append(SAGEConv(hidden_channels, hidden_channels))
        self.convs.append(SAGEConv(hidden_channels, out_channels))

    def forward(self, x, edge_index):
        for i, conv in enumerate(self.convs):
            x = conv(x, edge_index)
            if i < len(self.convs) - 1:
                x = x.relu_()
                x = F.dropout(x, p=0.5, training=self.training)
        return x
    
def train(epoch):
    model.train()

    #pbar = tqdm(total=int(len(train_loader.dataset)))
    #pbar.set_description(f'Epoch {epoch:02d}')

    total_loss = total_correct = total_examples = 0
    for batch in train_loader:
        optimizer.zero_grad()
        y = batch.y[:batch.batch_size]
        y_hat = model(batch.x, batch.edge_index.to(device))[:batch.batch_size]
        loss = F.cross_entropy(y_hat, y)
        loss.backward()
        optimizer.step()

        total_loss += float(loss) * batch.batch_size
        total_correct += int((y_hat.argmax(dim=-1) == y).sum())
        total_examples += batch.batch_size
        #pbar.update(batch.batch_size)
    
    #pbar.close()

    return total_loss / total_examples, total_correct / total_examples

@torch.no_grad()
def test1():
    model.eval()
    #pbar = tqdm(total=int(len(train_loader.dataset)+len(val_loader.dataset)+len(test_loader.dataset)))
    #pbar.set_description(f'Epoch {epoch:02d} Evaluation: ')
    total_correct_train = total_examples_train = 0
    total_correct_val = total_examples_val = 0
    total_correct_test = total_examples_test = 0
    for batch in train_loader:
        y = batch.y[:batch.batch_size]
        y_hat = model(batch.x, batch.edge_index.to(device))[:batch.batch_size]
        total_correct_train += int((y_hat.argmax(dim=-1) == y).sum())
        total_examples_train += batch.batch_size
        #pbar.update(batch.batch_size)  
    for batch in val_loader:
        y = batch.y[:batch.batch_size]
        y_hat = model(batch.x, batch.edge_index.to(device))[:batch.batch_size]
        total_correct_val += int((y_hat.argmax(dim=-1) == y).sum())
        total_examples_val += batch.batch_size
        #pbar.update(batch.batch_size) 
    for batch in test_loader:
        y = batch.y[:batch.batch_size]
        y_hat = model(batch.x, batch.edge_index.to(device))[:batch.batch_size]
        total_correct_test += int((y_hat.argmax(dim=-1) == y).sum())
        total_examples_test += batch.batch_size
        #pbar.update(batch.batch_size)
    #pbar.close()
    train_acc = total_correct_train / total_examples_train
    val_acc =  total_correct_val / total_examples_val
    test_acc = total_correct_test / total_examples_test
    return  train_acc, val_acc, test_acc

In [3]:
def run_gnn(total_epoch):
    loss_epoch= np.zeros(total_epoch)
    acc_epoch= np.zeros(total_epoch)
    time_epoch = np.zeros(total_epoch)
    begin_tt = time()
    for epoch in range(1, total_epoch+1):
        begin_t = time()
        loss_epoch[epoch-1], acc_epoch[epoch-1] = train(epoch)
        end_t = time()
        time_epoch[epoch-1] = end_t-begin_t
    #print(f'Epoch {epoch:02d}, Time {time_epoch[-1]:.4f}, Loss: {loss_epoch[-1]:.4f}, Acc: {acc_epoch[-1]:.4f}')
    train_acc, val_acc, test_acc = test1()
    end_tt = time()
    run_tt = end_tt-begin_tt
    print(f'HC: {HC:02d}, NC: {NC:02d},  Time {run_tt:.4f}, Train: {train_acc:.4f}, Val: {val_acc:.4f}, '
      f'Test: {test_acc:.4f}')
    return loss_epoch, acc_epoch, time_epoch

In [4]:
rep_time = 1
LR=0.003
epochs=30
HC=16
NC=6
HC_list=[16, 32, 128]
NC_list=[6, 10, 14]
model = SAGE(dataset.num_features, HC, dataset.num_classes).to(device)
loss_train = np.zeros((len(HC_list),len(NC_list),epochs))
acc_train  = np.zeros((len(HC_list),len(NC_list),epochs))
time_train  = np.zeros((len(HC_list),len(NC_list),epochs))
for hhh in range(len(HC_list)):
    HC = HC_list[hhh]
    for  nnn in range(len(NC_list)):
        NC = NC_list[nnn]
        model = SAGE(dataset.num_features, HC, dataset.num_classes).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=LR)
        loss_train[hhh,nnn,:], acc_train[hhh,nnn,:],time_train[hhh,nnn,:] =run_gnn(epochs)
        path = osp.join(os.path.abspath(''), 'trained_model', 'Sage1','SAGE_HC'+str(HC)+'_NC'+str(NC)+'_EP'+str(epochs)+'_v'+str(rep_time)+'.pt')
        torch.save(model, path)

HC: 16, NC: 06,  Time 120.7811, Train: 0.8863, Val: 0.8952, Test: 0.8946
HC: 16, NC: 10,  Time 112.0392, Train: 0.6103, Val: 0.6507, Test: 0.6345
HC: 16, NC: 14,  Time 112.8161, Train: 0.0260, Val: 0.0271, Test: 0.0248
HC: 32, NC: 06,  Time 112.6944, Train: 0.9196, Val: 0.9264, Test: 0.9257
HC: 32, NC: 10,  Time 112.3423, Train: 0.8527, Val: 0.8549, Test: 0.8581
HC: 32, NC: 14,  Time 110.6557, Train: 0.0606, Val: 0.0537, Test: 0.0538
HC: 128, NC: 06,  Time 111.9009, Train: 0.9632, Val: 0.9580, Test: 0.9576
HC: 128, NC: 10,  Time 132.7848, Train: 0.9193, Val: 0.9232, Test: 0.9238
HC: 128, NC: 14,  Time 168.1328, Train: 0.2191, Val: 0.2856, Test: 0.2816


In [5]:
rep_time = 2
LR=0.003
epochs=30
HC=16
NC=6
HC_list=[16, 32, 128]
NC_list=[6, 10, 14]
model = SAGE(dataset.num_features, HC, dataset.num_classes).to(device)
loss_train = np.zeros((len(HC_list),len(NC_list),epochs))
acc_train  = np.zeros((len(HC_list),len(NC_list),epochs))
time_train  = np.zeros((len(HC_list),len(NC_list),epochs))
for hhh in range(len(HC_list)):
    HC = HC_list[hhh]
    for  nnn in range(len(NC_list)):
        NC = NC_list[nnn]
        model = SAGE(dataset.num_features, HC, dataset.num_classes).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=LR)
        loss_train[hhh,nnn,:], acc_train[hhh,nnn,:],time_train[hhh,nnn,:] =run_gnn(epochs)
        path = osp.join(os.path.abspath(''), 'trained_model', 'Sage1','SAGE_HC'+str(HC)+'_NC'+str(NC)+'_EP'+str(epochs)+'_v'+str(rep_time)+'.pt')
        torch.save(model, path)

HC: 16, NC: 06,  Time 113.0997, Train: 0.8898, Val: 0.9006, Test: 0.8981
HC: 16, NC: 10,  Time 112.1445, Train: 0.5806, Val: 0.6144, Test: 0.6082
HC: 16, NC: 14,  Time 111.4994, Train: 0.0785, Val: 0.0778, Test: 0.0720
HC: 32, NC: 06,  Time 112.5869, Train: 0.9206, Val: 0.9267, Test: 0.9259
HC: 32, NC: 10,  Time 113.2091, Train: 0.7894, Val: 0.8113, Test: 0.8105
HC: 32, NC: 14,  Time 112.7193, Train: 0.1325, Val: 0.1122, Test: 0.1051
HC: 128, NC: 06,  Time 110.4762, Train: 0.9634, Val: 0.9581, Test: 0.9573
HC: 128, NC: 10,  Time 133.5865, Train: 0.9290, Val: 0.9325, Test: 0.9322
HC: 128, NC: 14,  Time 168.2834, Train: 0.1347, Val: 0.1239, Test: 0.1219


In [6]:
rep_time = 3
LR=0.003
epochs=30
HC=16
NC=6
HC_list=[16, 32, 128]
NC_list=[6, 10, 14]
model = SAGE(dataset.num_features, HC, dataset.num_classes).to(device)
loss_train = np.zeros((len(HC_list),len(NC_list),epochs))
acc_train  = np.zeros((len(HC_list),len(NC_list),epochs))
time_train  = np.zeros((len(HC_list),len(NC_list),epochs))
for hhh in range(len(HC_list)):
    HC = HC_list[hhh]
    for  nnn in range(len(NC_list)):
        NC = NC_list[nnn]
        model = SAGE(dataset.num_features, HC, dataset.num_classes).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=LR)
        loss_train[hhh,nnn,:], acc_train[hhh,nnn,:],time_train[hhh,nnn,:] =run_gnn(epochs)
        path = osp.join(os.path.abspath(''), 'trained_model', 'Sage1','SAGE_HC'+str(HC)+'_NC'+str(NC)+'_EP'+str(epochs)+'_v'+str(rep_time)+'.pt')
        torch.save(model, path)

HC: 16, NC: 06,  Time 111.9119, Train: 0.8953, Val: 0.9036, Test: 0.9032
HC: 16, NC: 10,  Time 112.9148, Train: 0.7142, Val: 0.6817, Test: 0.6751
HC: 16, NC: 14,  Time 112.7656, Train: 0.0218, Val: 0.0212, Test: 0.0208
HC: 32, NC: 06,  Time 112.6068, Train: 0.9221, Val: 0.9269, Test: 0.9272
HC: 32, NC: 10,  Time 111.8981, Train: 0.8164, Val: 0.8191, Test: 0.8195
HC: 32, NC: 14,  Time 111.8104, Train: 0.0930, Val: 0.0917, Test: 0.0856
HC: 128, NC: 06,  Time 110.0512, Train: 0.9642, Val: 0.9590, Test: 0.9582
HC: 128, NC: 10,  Time 132.0968, Train: 0.9202, Val: 0.9249, Test: 0.9239
HC: 128, NC: 14,  Time 167.7234, Train: 0.1546, Val: 0.1339, Test: 0.1310
