In [1]:
import copy
import os.path as osp
import os
import torch
import torch.nn.functional as F
from tqdm import tqdm
import numpy as np
from time import *

from torch_geometric.datasets import Reddit
from torch_geometric.loader import NeighborLoader
from torch_geometric.nn import SAGEConv

# download and loading the obg dataset
path = osp.join(osp.dirname(osp.realpath('./')), 'data', 'Reddit')
dataset = Reddit(path)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data = dataset[0].to(device, 'x', 'y')

BS=4096
kwargs = {'batch_size': BS, 'num_workers': 6, 'persistent_workers': True}
train_loader = NeighborLoader(data, input_nodes=data.train_mask,
                              num_neighbors=[25, 10], shuffle=True, **kwargs)
test_loader = NeighborLoader(data, input_nodes=data.test_mask,
                              num_neighbors=[25, 10], shuffle=True, **kwargs)
val_loader = NeighborLoader(data, input_nodes=data.val_mask,
                              num_neighbors=[25, 10], shuffle=True, **kwargs)

In [2]:
class pSAGE(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.convs = torch.nn.ModuleList()
        self.convs.append(SAGEConv(in_channels, hidden_channels))
        for i in range(NC-2):
            self.convs.append(SAGEConv(hidden_channels, hidden_channels))
        self.convs.append(SAGEConv(hidden_channels, out_channels))

    def forward(self, x, edge_index):
        for i, conv in enumerate(self.convs):
            x = conv(x, edge_index)
            if i < len(self.convs) - 1:
                x = x.relu_()
                x = F.dropout(x, p=0.5, training=self.training)
            if i < len(self.convs) - 1 and i>0:
                x = conv(x, edge_index)
                x = x.relu_()
                x = F.dropout(x, p=0.5, training=self.training)
        return x
    
def train(epoch):
    model.train()

    #pbar = tqdm(total=int(len(train_loader.dataset)))
    #pbar.set_description(f'Epoch {epoch:02d}')

    total_loss = total_correct = total_examples = 0
    for batch in train_loader:
        optimizer.zero_grad()
        y = batch.y[:batch.batch_size]
        y_hat = model(batch.x, batch.edge_index.to(device))[:batch.batch_size]
        loss = F.cross_entropy(y_hat, y)
        loss.backward()
        optimizer.step()

        total_loss += float(loss) * batch.batch_size
        total_correct += int((y_hat.argmax(dim=-1) == y).sum())
        total_examples += batch.batch_size
        #pbar.update(batch.batch_size)
    
    #pbar.close()

    return total_loss / total_examples, total_correct / total_examples

@torch.no_grad()
def test1():
    model.eval()
    #pbar = tqdm(total=int(len(train_loader.dataset)+len(val_loader.dataset)+len(test_loader.dataset)))
    #pbar.set_description(f'Epoch {epoch:02d} Evaluation: ')
    total_correct_train = total_examples_train = 0
    total_correct_val = total_examples_val = 0
    total_correct_test = total_examples_test = 0
    for batch in train_loader:
        y = batch.y[:batch.batch_size]
        y_hat = model(batch.x, batch.edge_index.to(device))[:batch.batch_size]
        total_correct_train += int((y_hat.argmax(dim=-1) == y).sum())
        total_examples_train += batch.batch_size
        #pbar.update(batch.batch_size)  
    for batch in val_loader:
        y = batch.y[:batch.batch_size]
        y_hat = model(batch.x, batch.edge_index.to(device))[:batch.batch_size]
        total_correct_val += int((y_hat.argmax(dim=-1) == y).sum())
        total_examples_val += batch.batch_size
        #pbar.update(batch.batch_size) 
    for batch in test_loader:
        y = batch.y[:batch.batch_size]
        y_hat = model(batch.x, batch.edge_index.to(device))[:batch.batch_size]
        total_correct_test += int((y_hat.argmax(dim=-1) == y).sum())
        total_examples_test += batch.batch_size
        #pbar.update(batch.batch_size)
    #pbar.close()
    train_acc = total_correct_train / total_examples_train
    val_acc =  total_correct_val / total_examples_val
    test_acc = total_correct_test / total_examples_test
    return  train_acc, val_acc, test_acc

In [3]:
def run_gnn(total_epoch):
    loss_epoch= np.zeros(total_epoch)
    acc_epoch= np.zeros(total_epoch)
    time_epoch = np.zeros(total_epoch)
    begin_tt = time()
    for epoch in range(1, total_epoch+1):
        begin_t = time()
        loss_epoch[epoch-1], acc_epoch[epoch-1] = train(epoch)
        end_t = time()
        time_epoch[epoch-1] = end_t-begin_t
    #print(f'Epoch {epoch:02d}, Time {time_epoch[-1]:.4f}, Loss: {loss_epoch[-1]:.4f}, Acc: {acc_epoch[-1]:.4f}')
    train_acc, val_acc, test_acc = test1()
    end_tt = time()
    run_tt = end_tt-begin_tt
    print(f'HC: {HC:02d}, NC: {NC:02d},  Time {run_tt:.4f}, Train: {train_acc:.4f}, Val: {val_acc:.4f}, '
      f'Test: {test_acc:.4f}')
    return loss_epoch, acc_epoch, time_epoch

In [4]:
LR=0.003
epochs=30
rep_time = 1
HC=16
NC=4
HC_list=[16, 32, 128]
NC_list=[4, 6, 8]
model = pSAGE(dataset.num_features, HC, dataset.num_classes).to(device)
loss_train = np.zeros((len(HC_list),len(NC_list),rep_time,epochs))
acc_train  = np.zeros((len(HC_list),len(NC_list),rep_time,epochs))
time_train  = np.zeros((len(HC_list),len(NC_list),rep_time,epochs))
for hhh in range(len(HC_list)):
    HC = HC_list[hhh]
    for  nnn in range(len(NC_list)):
        NC = NC_list[nnn]
        model = pSAGE(dataset.num_features, HC, dataset.num_classes).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=LR)
        loss_train[hhh,nnn,:], acc_train[hhh,nnn,:],time_train[hhh,nnn,:] =run_gnn(epochs)
        path = osp.join(os.path.abspath(''), 'trained_model', 'PSage','PSAGE_HC'+str(HC)+'_NC'+str(NC)+'_EP'+str(epochs)+'_v'+str(rep_time)+'.pt')
        torch.save(model, path)

HC: 16, NC: 04,  Time 129.8561, Train: 0.8933, Val: 0.9012, Test: 0.8998
HC: 16, NC: 06,  Time 124.9941, Train: 0.5424, Val: 0.5408, Test: 0.5275
HC: 16, NC: 08,  Time 120.4273, Train: 0.0764, Val: 0.1569, Test: 0.1560
HC: 32, NC: 04,  Time 120.8244, Train: 0.9180, Val: 0.9242, Test: 0.9241
HC: 32, NC: 06,  Time 119.9794, Train: 0.8179, Val: 0.8324, Test: 0.8303
HC: 32, NC: 08,  Time 122.1780, Train: 0.0522, Val: 0.0509, Test: 0.0446
HC: 128, NC: 04,  Time 124.8450, Train: 0.9600, Val: 0.9545, Test: 0.9555
HC: 128, NC: 06,  Time 149.3690, Train: 0.9252, Val: 0.9284, Test: 0.9296
HC: 128, NC: 08,  Time 186.5160, Train: 0.3682, Val: 0.3662, Test: 0.3635


In [5]:
rep_time = 2

for hhh in range(len(HC_list)):
    HC = HC_list[hhh]
    for  nnn in range(len(NC_list)):
        NC = NC_list[nnn]
        model = pSAGE(dataset.num_features, HC, dataset.num_classes).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=LR)
        loss_train[hhh,nnn,:], acc_train[hhh,nnn,:],time_train[hhh,nnn,:] =run_gnn(epochs)
        path = osp.join(os.path.abspath(''), 'trained_model', 'PSage','PSAGE_HC'+str(HC)+'_NC'+str(NC)+'_EP'+str(epochs)+'_v'+str(rep_time)+'.pt')
        torch.save(model, path)

HC: 16, NC: 04,  Time 123.0118, Train: 0.8946, Val: 0.9032, Test: 0.9012
HC: 16, NC: 06,  Time 120.8202, Train: 0.6949, Val: 0.7066, Test: 0.7035
HC: 16, NC: 08,  Time 119.1872, Train: 0.0802, Val: 0.1966, Test: 0.1984
HC: 32, NC: 04,  Time 119.8309, Train: 0.9191, Val: 0.9248, Test: 0.9251
HC: 32, NC: 06,  Time 119.8482, Train: 0.7355, Val: 0.7047, Test: 0.7108
HC: 32, NC: 08,  Time 114.8595, Train: 0.0549, Val: 0.0527, Test: 0.0513
HC: 128, NC: 04,  Time 113.4160, Train: 0.9606, Val: 0.9554, Test: 0.9570
HC: 128, NC: 06,  Time 145.7305, Train: 0.9213, Val: 0.9287, Test: 0.9277
HC: 128, NC: 08,  Time 185.6597, Train: 0.2295, Val: 0.2190, Test: 0.2152


In [None]:
rep_time = 3

for hhh in range(len(HC_list)):
    HC = HC_list[hhh]
    for  nnn in range(len(NC_list)):
        NC = NC_list[nnn]
        model = pSAGE(dataset.num_features, HC, dataset.num_classes).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=LR)
        loss_train[hhh,nnn,:], acc_train[hhh,nnn,:],time_train[hhh,nnn,:] =run_gnn(epochs)
        path = osp.join(os.path.abspath(''), 'trained_model', 'PSage','PSAGE_HC'+str(HC)+'_NC'+str(NC)+'_EP'+str(epochs)+'_v'+str(rep_time)+'.pt')
        torch.save(model, path)

HC: 16, NC: 04,  Time 113.2835, Train: 0.8979, Val: 0.9071, Test: 0.9058
HC: 16, NC: 06,  Time 114.1406, Train: 0.7560, Val: 0.7834, Test: 0.7735
HC: 16, NC: 08,  Time 113.5848, Train: 0.0497, Val: 0.0399, Test: 0.0445
HC: 32, NC: 04,  Time 111.1936, Train: 0.9166, Val: 0.9230, Test: 0.9230
HC: 32, NC: 06,  Time 113.4288, Train: 0.7038, Val: 0.7011, Test: 0.7027
HC: 32, NC: 08,  Time 113.6899, Train: 0.0671, Val: 0.0566, Test: 0.0528
HC: 128, NC: 04,  Time 116.0682, Train: 0.9602, Val: 0.9562, Test: 0.9557
HC: 128, NC: 06,  Time 148.3772, Train: 0.9201, Val: 0.9255, Test: 0.9247


In [5]:
#path = osp.join(os.path.abspath(''), 'trained_model', 'PSage')
#np.save(path+'/loss_train.npy',loss_train)
#np.save(path+'/acc_train.npy',acc_train)