In [1]:

import networkx as nx
from networkx import ego_graph

import torch.optim as optim
import argparse
import numpy as np
import torch
import torch.nn.functional as F

import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv, SAGEConv

from ogb.nodeproppred import PygNodePropPredDataset, Evaluator

#from logger import Logger
from torch_geometric.datasets import TUDataset
from torch_geometric.datasets import Planetoid
from torch_geometric.loader import DataLoader

In [2]:
dataset = Planetoid(root='/tmp/PubMed', name='PubMed',transform=T.ToSparseTensor())
data = dataset[0]
#data.adj_t = data.adj_t.to_symmetric()
#data.adj_t = data.adj_t.to_symmetric()
print(data)
#split_idx = dataset.get_idx_split()
#train_idx = split_idx['train'].to(device)

Data(x=[19717, 500], y=[19717], train_mask=[19717], val_mask=[19717], test_mask=[19717], adj_t=[19717, 19717, nnz=88648])


In [3]:
train_index = np.where(data.train_mask)[0]
print(len(train_index))
valid_index = np.where(data.val_mask)[0]
print(len(valid_index))
test_index = np.where(data.test_mask)[0]
print(len(test_index))

60
500
1000


# GSAGE

In [3]:
import torch


class Logger(object):
    def __init__(self, runs, info=None):
        self.info = info
        self.results = [[] for _ in range(runs)]

    def add_result(self, run, result):
        assert len(result) == 3
        assert run >= 0 and run < len(self.results)
        self.results[run].append(result)

    def print_statistics(self, run=None):
        if run is not None:
            result = 100 * torch.tensor(self.results[run])
            argmax = result[:, 1].argmax().item()
            print(f'Run {run + 1:02d}:')
            print(f'Highest Train: {result[:, 0].max():.2f}')
            print(f'Highest Valid: {result[:, 1].max():.2f}')
            print(f'  Final Train: {result[argmax, 0]:.2f}')
            print(f'   Final Test: {result[argmax, 2]:.2f}')
        else:
            result = 100 * torch.tensor(self.results)

            best_results = []
            for r in result:
                train1 = r[:, 0].max().item()
                valid = r[:, 1].max().item()
                train2 = r[r[:, 1].argmax(), 0].item()
                test = r[r[:, 1].argmax(), 2].item()
                best_results.append((train1, valid, train2, test))

            best_result = torch.tensor(best_results)

            print(f'All runs:')
            r = best_result[:, 0]
            print(f'Highest Train: {r.mean():.2f} ± {r.std():.2f}')
            r = best_result[:, 1]
            print(f'Highest Valid: {r.mean():.2f} ± {r.std():.2f}')
            r = best_result[:, 2]
            print(f'  Final Train: {r.mean():.2f} ± {r.std():.2f}')
            r = best_result[:, 3]
            print(f'   Final Test: {r.mean():.2f} ± {r.std():.2f}')

In [145]:
class SAGE(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers,
                 dropout,heads):
        super(SAGE, self).__init__()

        self.convs = torch.nn.ModuleList()
        self.convs.append(SAGEConv(in_channels, hidden_channels))
        self.bns = torch.nn.ModuleList()
        self.bns.append(torch.nn.BatchNorm1d(hidden_channels))
        for _ in range(num_layers - 2):
            self.convs.append(SAGEConv(hidden_channels, hidden_channels))
            self.bns.append(torch.nn.BatchNorm1d(hidden_channels))
        self.convs.append(SAGEConv(hidden_channels, out_channels))

        self.dropout = dropout
        self.heads=heads

    def reset_parameters(self):
        for conv in self.convs:
            conv.reset_parameters()
        for bn in self.bns:
            bn.reset_parameters()

    def forward(self, x, adj_t):
        for i, conv in enumerate(self.convs[:-1]):
            x = conv(x, adj_t)
            x = self.bns[i](x)
            x = F.relu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.convs[-1](x, adj_t)
        return x.log_softmax(dim=-1)


def train(model, data, train_idx, optimizer):
    model.train()

    optimizer.zero_grad()
    out = model(data.x, data.adj_t)[train_idx]
    #print(len(out))
    #print(data.y.squeeze(1)[train_idx])
    loss = F.nll_loss(out, data.y.squeeze()[train_idx])
    loss.backward()
    optimizer.step()

    return loss.item()


def ACC(Prediction, Label):
    correct = Prediction.view(-1).eq(Label).sum().item()
    total=len(Label)
    return correct / total

@torch.no_grad()
def test(model, data, train_idx,valid_idx,test_idx):
    model.eval()

    out = model(data.x, data.adj_t)
    y_pred = out.argmax(dim=-1, keepdim=True)
    y_pred=y_pred.view(-1)
    train_acc=ACC(data.y[train_idx],y_pred[train_idx])
    valid_acc=ACC(data.y[valid_idx],y_pred[valid_idx])
    test_acc =ACC(data.y[test_idx],y_pred[test_idx])
    return train_acc, valid_acc, test_acc

class objectview(object):
    def __init__(self, d):
        self.__dict__ = d

In [10]:
def main():
    args={'model_type': 'GCN', 'dataset': 'cora', 'num_layers': 2, 'heads': 1, 
         'batch_size': 32, 'hidden_channels': 16, 'dropout': 0.5, 'epochs': 100, 
         'opt': 'adam', 'opt_scheduler': 'none', 'opt_restart': 0,'runs':10, 'log_steps':1,
         'weight_decay': 5e-4, 'lr': 0.01}

    args = objectview(args)
    print(args)
    # call the dataset here with x,y,train_mask,test_mask,Val_mask, and Adj
    # To add extra feature we can simply update data.x=new fev tensor or we can add new feature
    dataset = Planetoid(root='/tmp/PubMed', name='PubMed',transform=T.ToSparseTensor())
    data = dataset[0]
    data.adj_t = data.adj_t.to_symmetric()
    
    train_idx = np.where(data.train_mask)[0]
    valid_idx = np.where(data.val_mask)[0]
    test_idx = np.where(data.test_mask)[0]
    
    model = SAGE(data.num_features, args.hidden_channels,dataset.num_classes, args.num_layers,
                    args.dropout,args.heads)

    logger = Logger(args.runs, args)

    for run in range(args.runs):
        model.reset_parameters()
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
        for epoch in range(1, 1 + args.epochs):
            loss = train(model, data, train_idx, optimizer)
            result = test(model, data, train_idx,valid_idx,test_idx)
            logger.add_result(run, result)

            if epoch % args.log_steps == 0:
                train_acc, valid_acc, test_acc = result
                print(f'Run: {run + 1:02d}, '
                      f'Epoch: {epoch:02d}, '
                      f'Loss: {loss:.4f}, '
                      f'Train: {100 * train_acc:.2f}%, '
                      f'Valid: {100 * valid_acc:.2f}% '
                      f'Test: {100 * test_acc:.2f}%')

        logger.print_statistics(run)
    logger.print_statistics()


if __name__ == "__main__":
    main()

<__main__.objectview object at 0x15dff41f0>
Run: 01, Epoch: 01, Loss: 1.1315, Train: 61.67%, Valid: 58.20% Test: 57.10%
Run: 01, Epoch: 02, Loss: 0.5678, Train: 63.33%, Valid: 58.80% Test: 57.10%
Run: 01, Epoch: 03, Loss: 0.4080, Train: 65.00%, Valid: 61.00% Test: 59.40%
Run: 01, Epoch: 04, Loss: 0.3180, Train: 66.67%, Valid: 63.20% Test: 61.10%
Run: 01, Epoch: 05, Loss: 0.2252, Train: 71.67%, Valid: 62.80% Test: 62.40%
Run: 01, Epoch: 06, Loss: 0.2026, Train: 83.33%, Valid: 62.60% Test: 63.30%
Run: 01, Epoch: 07, Loss: 0.1549, Train: 90.00%, Valid: 64.00% Test: 64.80%
Run: 01, Epoch: 08, Loss: 0.1158, Train: 95.00%, Valid: 65.60% Test: 66.80%
Run: 01, Epoch: 09, Loss: 0.1462, Train: 96.67%, Valid: 69.40% Test: 68.20%
Run: 01, Epoch: 10, Loss: 0.0747, Train: 96.67%, Valid: 70.00% Test: 69.40%
Run: 01, Epoch: 11, Loss: 0.0925, Train: 98.33%, Valid: 71.00% Test: 71.00%
Run: 01, Epoch: 12, Loss: 0.0722, Train: 98.33%, Valid: 71.60% Test: 71.80%
Run: 01, Epoch: 13, Loss: 0.0627, Train: 98.

Run: 02, Epoch: 06, Loss: 0.2346, Train: 95.00%, Valid: 58.00% Test: 55.40%
Run: 02, Epoch: 07, Loss: 0.1690, Train: 98.33%, Valid: 65.60% Test: 65.90%
Run: 02, Epoch: 08, Loss: 0.1183, Train: 98.33%, Valid: 70.60% Test: 70.50%
Run: 02, Epoch: 09, Loss: 0.0918, Train: 98.33%, Valid: 71.80% Test: 72.00%
Run: 02, Epoch: 10, Loss: 0.1222, Train: 98.33%, Valid: 72.80% Test: 73.30%
Run: 02, Epoch: 11, Loss: 0.0967, Train: 100.00%, Valid: 73.20% Test: 73.50%
Run: 02, Epoch: 12, Loss: 0.0640, Train: 100.00%, Valid: 73.60% Test: 73.40%
Run: 02, Epoch: 13, Loss: 0.0446, Train: 100.00%, Valid: 73.60% Test: 73.30%
Run: 02, Epoch: 14, Loss: 0.0502, Train: 100.00%, Valid: 73.40% Test: 73.80%
Run: 02, Epoch: 15, Loss: 0.0519, Train: 100.00%, Valid: 73.20% Test: 73.50%
Run: 02, Epoch: 16, Loss: 0.0323, Train: 100.00%, Valid: 72.60% Test: 73.50%
Run: 02, Epoch: 17, Loss: 0.0314, Train: 100.00%, Valid: 72.60% Test: 73.40%
Run: 02, Epoch: 18, Loss: 0.0248, Train: 100.00%, Valid: 72.60% Test: 73.70%
Run:

Run: 03, Epoch: 12, Loss: 0.0844, Train: 93.33%, Valid: 54.60% Test: 53.00%
Run: 03, Epoch: 13, Loss: 0.0569, Train: 96.67%, Valid: 56.60% Test: 53.70%
Run: 03, Epoch: 14, Loss: 0.0391, Train: 98.33%, Valid: 58.00% Test: 55.00%
Run: 03, Epoch: 15, Loss: 0.0565, Train: 100.00%, Valid: 59.80% Test: 56.90%
Run: 03, Epoch: 16, Loss: 0.0268, Train: 100.00%, Valid: 61.60% Test: 58.70%
Run: 03, Epoch: 17, Loss: 0.0573, Train: 100.00%, Valid: 62.60% Test: 60.00%
Run: 03, Epoch: 18, Loss: 0.0449, Train: 100.00%, Valid: 63.40% Test: 61.30%
Run: 03, Epoch: 19, Loss: 0.0163, Train: 100.00%, Valid: 64.60% Test: 62.70%
Run: 03, Epoch: 20, Loss: 0.0192, Train: 100.00%, Valid: 65.40% Test: 63.70%
Run: 03, Epoch: 21, Loss: 0.0303, Train: 100.00%, Valid: 67.60% Test: 65.40%
Run: 03, Epoch: 22, Loss: 0.0151, Train: 100.00%, Valid: 68.20% Test: 66.60%
Run: 03, Epoch: 23, Loss: 0.0384, Train: 100.00%, Valid: 69.00% Test: 68.50%
Run: 03, Epoch: 24, Loss: 0.0516, Train: 100.00%, Valid: 69.40% Test: 68.90%
Ru

Run: 04, Epoch: 18, Loss: 0.0556, Train: 100.00%, Valid: 73.20% Test: 73.10%
Run: 04, Epoch: 19, Loss: 0.0317, Train: 100.00%, Valid: 73.80% Test: 72.90%
Run: 04, Epoch: 20, Loss: 0.0481, Train: 100.00%, Valid: 74.20% Test: 73.10%
Run: 04, Epoch: 21, Loss: 0.0319, Train: 100.00%, Valid: 73.80% Test: 73.40%
Run: 04, Epoch: 22, Loss: 0.0314, Train: 100.00%, Valid: 73.40% Test: 73.60%
Run: 04, Epoch: 23, Loss: 0.0210, Train: 100.00%, Valid: 73.60% Test: 73.10%
Run: 04, Epoch: 24, Loss: 0.0124, Train: 100.00%, Valid: 73.40% Test: 73.00%
Run: 04, Epoch: 25, Loss: 0.0244, Train: 100.00%, Valid: 73.80% Test: 72.70%
Run: 04, Epoch: 26, Loss: 0.0063, Train: 100.00%, Valid: 73.60% Test: 72.50%
Run: 04, Epoch: 27, Loss: 0.0123, Train: 100.00%, Valid: 73.80% Test: 72.20%
Run: 04, Epoch: 28, Loss: 0.0161, Train: 100.00%, Valid: 74.20% Test: 72.10%
Run: 04, Epoch: 29, Loss: 0.0155, Train: 100.00%, Valid: 74.20% Test: 71.70%
Run: 04, Epoch: 30, Loss: 0.0242, Train: 100.00%, Valid: 74.40% Test: 71.30%

Run: 05, Epoch: 24, Loss: 0.0144, Train: 100.00%, Valid: 67.40% Test: 68.50%
Run: 05, Epoch: 25, Loss: 0.0203, Train: 100.00%, Valid: 67.60% Test: 68.80%
Run: 05, Epoch: 26, Loss: 0.0191, Train: 100.00%, Valid: 68.40% Test: 69.10%
Run: 05, Epoch: 27, Loss: 0.0168, Train: 100.00%, Valid: 69.40% Test: 68.90%
Run: 05, Epoch: 28, Loss: 0.0070, Train: 100.00%, Valid: 69.60% Test: 68.90%
Run: 05, Epoch: 29, Loss: 0.0067, Train: 100.00%, Valid: 69.60% Test: 69.10%
Run: 05, Epoch: 30, Loss: 0.0040, Train: 100.00%, Valid: 70.80% Test: 70.00%
Run: 05, Epoch: 31, Loss: 0.0258, Train: 100.00%, Valid: 71.20% Test: 69.80%
Run: 05, Epoch: 32, Loss: 0.0159, Train: 100.00%, Valid: 71.00% Test: 69.90%
Run: 05, Epoch: 33, Loss: 0.0092, Train: 100.00%, Valid: 71.40% Test: 70.00%
Run: 05, Epoch: 34, Loss: 0.0082, Train: 100.00%, Valid: 70.60% Test: 69.90%
Run: 05, Epoch: 35, Loss: 0.0040, Train: 100.00%, Valid: 70.80% Test: 70.20%
Run: 05, Epoch: 36, Loss: 0.0116, Train: 100.00%, Valid: 71.40% Test: 70.70%

Run: 06, Epoch: 30, Loss: 0.0493, Train: 100.00%, Valid: 63.00% Test: 61.40%
Run: 06, Epoch: 31, Loss: 0.0169, Train: 100.00%, Valid: 62.80% Test: 61.80%
Run: 06, Epoch: 32, Loss: 0.0224, Train: 100.00%, Valid: 62.80% Test: 62.30%
Run: 06, Epoch: 33, Loss: 0.0057, Train: 100.00%, Valid: 63.60% Test: 62.90%
Run: 06, Epoch: 34, Loss: 0.0261, Train: 100.00%, Valid: 64.20% Test: 63.20%
Run: 06, Epoch: 35, Loss: 0.0184, Train: 100.00%, Valid: 64.60% Test: 63.50%
Run: 06, Epoch: 36, Loss: 0.0173, Train: 100.00%, Valid: 64.40% Test: 63.80%
Run: 06, Epoch: 37, Loss: 0.0064, Train: 100.00%, Valid: 64.60% Test: 64.00%
Run: 06, Epoch: 38, Loss: 0.0054, Train: 100.00%, Valid: 66.00% Test: 64.80%
Run: 06, Epoch: 39, Loss: 0.0044, Train: 100.00%, Valid: 66.40% Test: 65.30%
Run: 06, Epoch: 40, Loss: 0.0135, Train: 100.00%, Valid: 66.60% Test: 65.90%
Run: 06, Epoch: 41, Loss: 0.0033, Train: 100.00%, Valid: 67.00% Test: 65.90%
Run: 06, Epoch: 42, Loss: 0.0299, Train: 100.00%, Valid: 67.60% Test: 66.00%

Run: 07, Epoch: 36, Loss: 0.0110, Train: 100.00%, Valid: 74.80% Test: 71.80%
Run: 07, Epoch: 37, Loss: 0.0213, Train: 100.00%, Valid: 74.40% Test: 71.90%
Run: 07, Epoch: 38, Loss: 0.0275, Train: 100.00%, Valid: 74.20% Test: 71.90%
Run: 07, Epoch: 39, Loss: 0.0110, Train: 100.00%, Valid: 74.60% Test: 71.80%
Run: 07, Epoch: 40, Loss: 0.0345, Train: 100.00%, Valid: 74.60% Test: 71.50%
Run: 07, Epoch: 41, Loss: 0.0029, Train: 100.00%, Valid: 74.40% Test: 71.60%
Run: 07, Epoch: 42, Loss: 0.0065, Train: 100.00%, Valid: 74.60% Test: 71.50%
Run: 07, Epoch: 43, Loss: 0.0012, Train: 100.00%, Valid: 74.40% Test: 71.30%
Run: 07, Epoch: 44, Loss: 0.0057, Train: 100.00%, Valid: 74.20% Test: 71.40%
Run: 07, Epoch: 45, Loss: 0.0015, Train: 100.00%, Valid: 73.80% Test: 71.00%
Run: 07, Epoch: 46, Loss: 0.0218, Train: 100.00%, Valid: 74.00% Test: 70.90%
Run: 07, Epoch: 47, Loss: 0.0022, Train: 100.00%, Valid: 74.00% Test: 70.80%
Run: 07, Epoch: 48, Loss: 0.0093, Train: 100.00%, Valid: 74.00% Test: 70.90%

Run: 08, Epoch: 42, Loss: 0.0024, Train: 100.00%, Valid: 69.00% Test: 69.60%
Run: 08, Epoch: 43, Loss: 0.0388, Train: 100.00%, Valid: 69.00% Test: 69.80%
Run: 08, Epoch: 44, Loss: 0.0067, Train: 100.00%, Valid: 69.20% Test: 69.90%
Run: 08, Epoch: 45, Loss: 0.0108, Train: 100.00%, Valid: 69.60% Test: 70.00%
Run: 08, Epoch: 46, Loss: 0.0219, Train: 100.00%, Valid: 69.60% Test: 70.10%
Run: 08, Epoch: 47, Loss: 0.0219, Train: 100.00%, Valid: 69.60% Test: 70.00%
Run: 08, Epoch: 48, Loss: 0.0018, Train: 100.00%, Valid: 69.60% Test: 70.00%
Run: 08, Epoch: 49, Loss: 0.0064, Train: 100.00%, Valid: 69.80% Test: 69.90%
Run: 08, Epoch: 50, Loss: 0.0093, Train: 100.00%, Valid: 70.20% Test: 70.40%
Run: 08, Epoch: 51, Loss: 0.0135, Train: 100.00%, Valid: 70.00% Test: 70.60%
Run: 08, Epoch: 52, Loss: 0.0060, Train: 100.00%, Valid: 69.80% Test: 70.70%
Run: 08, Epoch: 53, Loss: 0.0030, Train: 100.00%, Valid: 69.80% Test: 71.00%
Run: 08, Epoch: 54, Loss: 0.0022, Train: 100.00%, Valid: 70.20% Test: 71.20%

Run: 09, Epoch: 48, Loss: 0.0018, Train: 100.00%, Valid: 73.00% Test: 71.30%
Run: 09, Epoch: 49, Loss: 0.0300, Train: 100.00%, Valid: 73.20% Test: 71.30%
Run: 09, Epoch: 50, Loss: 0.0031, Train: 100.00%, Valid: 73.40% Test: 71.40%
Run: 09, Epoch: 51, Loss: 0.0094, Train: 100.00%, Valid: 73.40% Test: 71.40%
Run: 09, Epoch: 52, Loss: 0.0039, Train: 100.00%, Valid: 73.40% Test: 71.30%
Run: 09, Epoch: 53, Loss: 0.0066, Train: 100.00%, Valid: 73.40% Test: 71.30%
Run: 09, Epoch: 54, Loss: 0.0022, Train: 100.00%, Valid: 73.60% Test: 71.50%
Run: 09, Epoch: 55, Loss: 0.0049, Train: 100.00%, Valid: 73.60% Test: 71.40%
Run: 09, Epoch: 56, Loss: 0.0086, Train: 100.00%, Valid: 73.60% Test: 71.50%
Run: 09, Epoch: 57, Loss: 0.0054, Train: 100.00%, Valid: 73.60% Test: 71.40%
Run: 09, Epoch: 58, Loss: 0.0086, Train: 100.00%, Valid: 73.60% Test: 71.30%
Run: 09, Epoch: 59, Loss: 0.0100, Train: 100.00%, Valid: 73.40% Test: 71.30%
Run: 09, Epoch: 60, Loss: 0.0209, Train: 100.00%, Valid: 73.40% Test: 71.30%

Run: 10, Epoch: 55, Loss: 0.0057, Train: 100.00%, Valid: 68.60% Test: 67.50%
Run: 10, Epoch: 56, Loss: 0.0145, Train: 100.00%, Valid: 69.00% Test: 67.40%
Run: 10, Epoch: 57, Loss: 0.0018, Train: 100.00%, Valid: 69.00% Test: 67.20%
Run: 10, Epoch: 58, Loss: 0.0185, Train: 100.00%, Valid: 69.40% Test: 67.40%
Run: 10, Epoch: 59, Loss: 0.0466, Train: 100.00%, Valid: 69.20% Test: 67.20%
Run: 10, Epoch: 60, Loss: 0.0043, Train: 100.00%, Valid: 69.00% Test: 67.10%
Run: 10, Epoch: 61, Loss: 0.0020, Train: 100.00%, Valid: 69.00% Test: 67.20%
Run: 10, Epoch: 62, Loss: 0.0105, Train: 100.00%, Valid: 69.00% Test: 67.10%
Run: 10, Epoch: 63, Loss: 0.0113, Train: 100.00%, Valid: 69.00% Test: 67.30%
Run: 10, Epoch: 64, Loss: 0.0022, Train: 100.00%, Valid: 68.80% Test: 67.40%
Run: 10, Epoch: 65, Loss: 0.0319, Train: 100.00%, Valid: 68.80% Test: 67.30%
Run: 10, Epoch: 66, Loss: 0.0473, Train: 100.00%, Valid: 69.20% Test: 67.20%
Run: 10, Epoch: 67, Loss: 0.0031, Train: 100.00%, Valid: 69.00% Test: 67.10%

# WISE EMBEDDING

In [63]:
dataset = Planetoid(root='/tmp/PubMed', name='PubMed',transform=T.ToSparseTensor())
data = dataset[0]
print(data)

Data(x=[19717, 500], y=[19717], train_mask=[19717], val_mask=[19717], test_mask=[19717], adj_t=[19717, 19717, nnz=88648])


In [64]:
import pandas as pd
Domain_Fec=pd.DataFrame(data.x.numpy())
#label=pd.DataFrame(data.y.numpy(),columns =['class'])
#Data=pd.concat([Domain_Fec,label], axis=1)
Domain_Fec.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,490,491,492,493,494,495,496,497,498,499
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.004999,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016434,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.104636,0.0,0.0,0.0,0.035178,0.0,0.0,0.019555,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.008582,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.007356,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [65]:
Domain_Fec[Domain_Fec != 0] = 1.0

In [66]:
label=pd.DataFrame(data.y.numpy(),columns =['class'])
Data=pd.concat([Domain_Fec,label], axis=1)
Data.head(20)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,491,492,493,494,495,496,497,498,499,class
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
2,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
5,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
7,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
9,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1


In [67]:
Number_nodes=len(data.y)
fe_len=len(data.x[0])
catagories=Data['class'].to_numpy()
data_by_class = {cls: Data.loc[Data['class'] == cls].drop(['class'], axis=1) for cls in range(max(catagories) + 1)}
basis = [[max(df[i]) for i in range(len(df.columns))] for df in data_by_class.values()]
sel_basis = [[int(list(df[i].to_numpy()).count(1) >= int(len(df[i].index)*0.1)) 
              for i in range(len(df.columns))]
             for df in data_by_class.values()]
feature_names = [ii for ii in range(fe_len)]

In [92]:
dataset = Planetoid(root='/tmp/PubMed', name='PubMed',transform=T.ToSparseTensor())
data = dataset[0]
Domain=pd.DataFrame(data.x.numpy())
#label=pd.DataFrame(data.y.numpy(),columns =['class'])
Domain=pd.concat([Domain,label], axis=1)
Domain.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,491,492,493,494,495,496,497,498,499,class
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.004999,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016434,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
2,0.104636,0.0,0.0,0.0,0.035178,0.0,0.0,0.019555,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.008582,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.007356,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1


In [93]:
Number_nodes=len(data.y)
fe_len=len(data.x[0])
catagories=data.y.numpy()
ddata_by_class = {cls: Domain.loc[Domain['class'] == cls].drop(['class'], axis=1) for cls in range(max(catagories) + 1)}

In [94]:
import numpy as np
def Average(lst):
    # average function
    avg = np.average(lst)
    return(avg)
sel_basis = [[Average(list(df[i].to_numpy())) for i in range(len(df.columns))]for df in ddata_by_class.values()]

In [95]:
feature_names = [ii for ii in range(fe_len)]
Euc_Fec=[]
for i in range(Number_nodes):
    print("\rProcessing file {} ({}%)".format(i, 100*i//(Number_nodes-1)), end='', flush=True)
    vec=[]
    f=Domain.loc[i, feature_names].values.flatten().tolist()
    for j in range(max(catagories)+1):
        vec.append(np.linalg.norm(np.array(f) - np.array(sel_basis[j])))
    f.clear()
    Euc_Fec.append(vec)

Processing file 19716 (100%)

In [96]:
print(Euc_Fec[1])

[0.2859691472918565, 0.2451948567242483, 0.26428544455036057]


In [102]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
S_Fec=scaler.fit_transform(SFec)

In [143]:
Euc_fe=torch.tensor(Euc_Fec)
Inc_fe=torch.tensor(Fec)
sel_fe=torch.tensor(S_Fec)
#CC_domain=torch.cat((sel_fe,Euc_fe), 1).float()
#topo_fe=torch.cat((topo_betti0,topo_betti1),1)
CC_domain=torch.tensor(Euc_Fec).float()
print(CC_domain)
CC_domain.type()

tensor([[0.2535, 0.2234, 0.2358],
        [0.2860, 0.2452, 0.2643],
        [0.2814, 0.3077, 0.3154],
        ...,
        [0.3384, 0.3121, 0.3134],
        [0.4211, 0.4479, 0.4445],
        [0.3380, 0.3139, 0.3143]])


'torch.FloatTensor'

# W-GSAGE

In [144]:
data.x=CC_domain
print(data)

Data(x=[19717, 3], y=[19717], train_mask=[19717], val_mask=[19717], test_mask=[19717], adj_t=[19717, 19717, nnz=88648], topo=[19717, 42])


In [147]:
def main():
    args={'model_type': 'GCN', 'dataset': 'cora', 'num_layers': 2, 'heads': 8, 
         'batch_size': 32, 'hidden_channels': 16, 'dropout': 0.5, 'epochs': 200, 
         'opt': 'adam', 'opt_scheduler': 'none', 'opt_restart': 0,'runs':10, 'log_steps':1,
         'weight_decay': 5e-4, 'lr': 0.01}

    args = objectview(args)
    print(args)
    # call the dataset here with x,y,train_mask,test_mask,Val_mask, and Adj
    # To add extra feature we can simply update data.x=new fev tensor or we can add new feature
    #dataset = Planetoid(root='/tmp/cora', name='Cora',transform=T.ToSparseTensor())
    #data = dataset[0]
    data.adj_t = data.adj_t.to_symmetric()
    
    train_idx = np.where(data.train_mask)[0]
    valid_idx = np.where(data.val_mask)[0]
    test_idx = np.where(data.test_mask)[0]
    
    model = SAGE(data.num_features, args.hidden_channels,
                    dataset.num_classes, args.num_layers,
                    args.dropout, args.heads)

    logger = Logger(args.runs, args)

    for run in range(args.runs):
        model.reset_parameters()
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
        for epoch in range(1, 1 + args.epochs):
            loss = train(model, data, train_idx, optimizer)
            result = test(model, data, train_idx,valid_idx,test_idx)
            logger.add_result(run, result)

            if epoch % args.log_steps == 0:
                train_acc, valid_acc, test_acc = result
                #print(f'Run: {run + 1:02d}, 'f'Epoch: {epoch:02d}, 'f'Loss: {loss:.4f}, 'f'Train: {100 * train_acc:.2f}%, '
                 #     f'Valid: {100 * valid_acc:.2f}% '
                  #    f'Test: {100 * test_acc:.2f}%')

        logger.print_statistics(run)
    logger.print_statistics()


if __name__ == "__main__":
    main()

<__main__.objectview object at 0x15e475510>
Run 01:
Highest Train: 95.00
Highest Valid: 81.80
  Final Train: 90.00
   Final Test: 77.30
Run 02:
Highest Train: 91.67
Highest Valid: 78.20
  Final Train: 86.67
   Final Test: 74.30
Run 03:
Highest Train: 90.00
Highest Valid: 76.80
  Final Train: 90.00
   Final Test: 73.50
Run 04:
Highest Train: 90.00
Highest Valid: 76.00
  Final Train: 83.33
   Final Test: 73.60
Run 05:
Highest Train: 91.67
Highest Valid: 78.00
  Final Train: 91.67
   Final Test: 72.70
Run 06:
Highest Train: 80.00
Highest Valid: 68.20
  Final Train: 80.00
   Final Test: 66.40
Run 07:
Highest Train: 91.67
Highest Valid: 80.40
  Final Train: 86.67
   Final Test: 76.70
Run 08:
Highest Train: 93.33
Highest Valid: 75.20
  Final Train: 83.33
   Final Test: 73.10
Run 09:
Highest Train: 93.33
Highest Valid: 77.60
  Final Train: 81.67
   Final Test: 74.40
Run 10:
Highest Train: 88.33
Highest Valid: 74.40
  Final Train: 86.67
   Final Test: 70.20
All runs:
Highest Train: 90.50 ± 4.1

# TOPOLOGICAL ENCODING 

In [111]:
dataset = Planetoid(root='/tmp/PubMed', name='PubMed')
data = dataset[0]
print(data)

Data(x=[19717, 500], edge_index=[2, 88648], y=[19717], train_mask=[19717], val_mask=[19717], test_mask=[19717])


In [112]:
print(data.edge_index.numpy())

[[ 1378  1544  6092 ... 12278  4284 16030]
 [    0     0     0 ... 19714 19715 19716]]


In [None]:
Edge_idx=data.edge_index.numpy()
Node=range(Number_nodes)
Edgelist=[]
for i in range(len(Edge_idx[1])):
    Edgelist.append((Edge_idx[0][i],Edge_idx[1][i]))
#print(Edgelist)

In [None]:
# a "plain" graph is undirected
G = nx.DiGraph()

# give each a node a 'name', which is a letter in this case.
#G.add_node('a')

# the add_nodes_from method allows adding nodes from a sequence, in this case a list
#nodes_to_add = ['b', 'c', 'd']
G.add_nodes_from(Node)

# add edge from 'a' to 'b'
# since this graph is undirected, the order doesn't matter here
#G.add_edge('a', 'b')

# just like add_nodes_from, we can add edges from a sequence
# edges should be specified as 2-tuples
#edges_to_add = [('a', 'c'), ('b', 'c'), ('c', 'd')]
G.add_edges_from(Edgelist)


In [None]:
print(G.number_of_edges())

In [None]:
def Topological_Feature_subLevel(adj,filtration_fun, Filtration):
        betti_0=[]
        betti_1=[]
        for p in range(len(Filtration)):
            n_active = np.where(np.array(filtration_fun) <= Filtration[p])[0].tolist()
            Active_node=np.unique(n_active)
            if (len(Active_node)==0):
                betti_0.append(0)
                betti_1.append(0)
            else:
                b=adj[Active_node,:][:,Active_node]
                my_flag=pyflagser.flagser_unweighted(b, min_dimension=0, max_dimension=2, directed=False, coeff=2, approximation=None)
                x = my_flag["betti"]
                betti_0.append(x[0])
                betti_1.append(x[1])
            n_active.clear()
        return betti_0,betti_1

In [None]:
def Degree_list(Graph):
    degree_list = [Graph.degree(node) for node in Graph.nodes]
    return np.array(degree_list)

In [None]:
degree_list=Degree_list(G)
unique_list=np.unique(degree_list)
for d in unique_list:
    count=0
    for i in range(len(degree_list)):
        if degree_list[i]==d:
            count=count+1
    print(int(d)," | ",count,'\n')

In [None]:
import pyflagser
Node_fil=[0,2,4,6,8,10,12,14,16,18,20,22,24,30,34]
topo_betti_0=[]
topo_betti_1=[]
Node_Edge=[]
for i in range(Number_nodes):
    print("\rProcessing file {} ({}%)".format(i, 100*i//(Number_nodes-1)), end='', flush=True)
    subgraph=ego_graph(G, i, radius=2, center=False, undirected=True, distance=None)
    filt=Degree_list(subgraph)
    A_sub = nx.to_numpy_array(subgraph)# adjacency matrix of subgraph
    fe=Topological_Feature_subLevel(A_sub,filt,Node_fil)
    topo_betti_0.append(fe[0])
    topo_betti_1.append(fe[1])
    Node_Edge.append([subgraph.number_of_nodes(),subgraph.number_of_edges()])
    #topo_with_NE.app

In [None]:
print( Node_Edge)

In [5]:
import pandas as pd
data = pd.read_csv('Feature_Pubmed.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,0,1,2,3,4,5,6,7,8,...,11.1,12.1,13.1,14.1,15.1,16.1,17.1,18.1,19.1,20.1
0,0,72,87,94,94,96,96,96,91,91,...,0,0,0,0,0,1,4,4,7,11
1,1,20,25,28,25,25,21,21,21,21,...,1,1,1,1,1,5,5,5,5,5
2,2,42,43,44,44,44,44,44,44,44,...,0,0,0,0,0,0,0,0,0,0
3,3,10,14,15,12,12,12,12,12,12,...,0,0,0,0,0,0,0,0,0,0
4,4,6,29,30,31,30,30,30,30,30,...,0,0,0,0,0,0,0,1,0,0


In [6]:
Data1=data.drop(['Unnamed: 0'], axis=1)
Data1.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,11.1,12.1,13.1,14.1,15.1,16.1,17.1,18.1,19.1,20.1
0,72,87,94,94,96,96,96,91,91,91,...,0,0,0,0,0,1,4,4,7,11
1,20,25,28,25,25,21,21,21,21,21,...,1,1,1,1,1,5,5,5,5,5
2,42,43,44,44,44,44,44,44,44,44,...,0,0,0,0,0,0,0,0,0,0
3,10,14,15,12,12,12,12,12,12,12,...,0,0,0,0,0,0,0,0,0,0
4,6,29,30,31,30,30,30,30,30,30,...,0,0,0,0,0,0,0,1,0,0


In [7]:
Topo_fe=torch.tensor(Data1.values).float()

In [118]:
print(len(topo_fe[0]))

42


In [None]:
from sklearn.preprocessing import MinMaxScaler
#for i in range(300):
X0=[]
scaler = MinMaxScaler()

# Fit scaler to data and transform data
topo_fe=scaler.fit_transform(topo_fe)
print(topo_fe[0])
topo_fe=np.array(topo_fe)

In [None]:
print(len(Topo_fe))

In [125]:
dataset = Planetoid(root='/tmp/PubMed', name='PubMed',transform=T.ToSparseTensor())
data = dataset[0]
print(data)

Data(x=[19717, 500], y=[19717], train_mask=[19717], val_mask=[19717], test_mask=[19717], adj_t=[19717, 19717, nnz=88648])


In [None]:
topo_betti0=torch.tensor(topo_betti_0).float()
topo_betti1=torch.tensor(topo_betti_1).float()
NodeEdge=torch.tensor(Node_Edge).float()

In [126]:
data.x=CC_domain
#data.topo=torch.tensor(topo_fe).float()
data.topo=Topo_fe
print(data)

Data(x=[19717, 3], y=[19717], train_mask=[19717], val_mask=[19717], test_mask=[19717], adj_t=[19717, 19717, nnz=88648], topo=[19717, 42])


# TOPO-W-GSAGE

In [8]:
class SAGE(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers,
                 dropout,heads):
        super(SAGE, self).__init__()

        self.convs = torch.nn.ModuleList()
        self.convs.append(SAGEConv(in_channels, hidden_channels))
        self.bns = torch.nn.ModuleList()
        self.bns.append(torch.nn.BatchNorm1d(hidden_channels))
        for _ in range(num_layers - 2):
            self.convs.append(SAGEConv(hidden_channels, hidden_channels))
            self.bns.append(torch.nn.BatchNorm1d(hidden_channels))
        self.convs.append(SAGEConv(hidden_channels, out_channels))

        self.dropout = dropout
        self.heads=heads

    def reset_parameters(self):
        for conv in self.convs:
            conv.reset_parameters()
        for bn in self.bns:
            bn.reset_parameters()

    def forward(self, x, adj_t):
        for i, conv in enumerate(self.convs[:-1]):
            x = conv(x, adj_t)
            x = self.bns[i](x)
            x = F.relu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.convs[-1](x, adj_t)
        #return x.log_softmax(dim=-1)
        return x

class MLP(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers,
                 dropout):
        super(MLP, self).__init__()

        self.lins = torch.nn.ModuleList()
        self.lins.append(torch.nn.Linear(in_channels, hidden_channels))
        self.bns = torch.nn.ModuleList()
        self.bns.append(torch.nn.BatchNorm1d(hidden_channels))
        for _ in range(num_layers - 2):
            self.lins.append(torch.nn.Linear(hidden_channels, hidden_channels))
            self.bns.append(torch.nn.BatchNorm1d(hidden_channels))
        self.lins.append(torch.nn.Linear(hidden_channels, out_channels))

        self.dropout = dropout

    def reset_parameters_mlp(self):
        for lin in self.lins:
            lin.reset_parameters()
        for bn in self.bns:
            bn.reset_parameters()

    def forward(self, x):
        for i, lin in enumerate(self.lins[:-1]):
            x = lin(x)
            x = self.bns[i](x)
            #x = F.relu(x)
            x=F.sigmoid(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.lins[-1](x)
        #return torch.log_softmax(x, dim=-1)
        return x
    
class MLP2(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers,
                 dropout):
        super(MLP2, self).__init__()

        self.lins = torch.nn.ModuleList()
        self.lins.append(torch.nn.Linear(in_channels, hidden_channels))
        self.bns = torch.nn.ModuleList()
        self.bns.append(torch.nn.BatchNorm1d(hidden_channels))
        for _ in range(num_layers - 2):
            self.lins.append(torch.nn.Linear(hidden_channels, hidden_channels))
            self.bns.append(torch.nn.BatchNorm1d(hidden_channels))
        self.lins.append(torch.nn.Linear(hidden_channels, out_channels))

        self.dropout = dropout

    def reset_parameters_mlp2(self):
        for lin in self.lins:
            lin.reset_parameters()
        for bn in self.bns:
            bn.reset_parameters()

    def forward(self, x):
        for i, lin in enumerate(self.lins[:-1]):
            x = lin(x)
            x = self.bns[i](x)
            #x = F.relu(x)
            x=F.sigmoid(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.lins[-1](x)
        return torch.log_softmax(x, dim=-1)
    

def train(model,mlp_model,mlp_2,data, train_idx, optimizer,optimizer_mlp,optimizer_mlp2):
    model.train()
    mlp_model.train()
    mlp_2.train()
    optimizer.zero_grad()
    optimizer_mlp.zero_grad()
    optimizer_mlp2.zero_grad()
    gcn_embedding = model(data.x, data.adj_t)[train_idx]
    #print(gcn_embedding)
    mlp_embedding = mlp_model(data.topo[train_idx])
    #print(mlp_embedding)
    combined_embedding = torch.cat((gcn_embedding, mlp_embedding), dim=1)
    #print(combined_embedding)
    mlp_emb = mlp_2(combined_embedding)
    #print(mlp_emb)
    loss = F.nll_loss(mlp_emb, data.y.squeeze()[train_idx])
    #loss = F.nll_loss(combined_embedding, data.y.squeeze()[train_idx])
    loss.backward()
    optimizer_mlp2.step()
    optimizer.step()
    optimizer_mlp.step()
    

    return loss.item()


def ACC(Prediction, Label):
    correct = Prediction.view(-1).eq(Label).sum().item()
    total=len(Label)
    return correct / total



@torch.no_grad()
def test(model,mlp_model,mlp_2,data, train_idx,valid_idx,test_idx):
    model.eval()
    mlp_model.eval()
    mlp_2.eval()

    gcn_out = model(data.x, data.adj_t)
    #print(gcn_out[0])
    mlp_out=mlp_model(data.topo)
    #print(mlp_out)
    #out=torch.cat((gcn_out,mlp_out),dim=1)
    Com=torch.cat((gcn_out,mlp_out),dim=1)
    out=mlp_2(Com)
    y_pred = out.argmax(dim=-1, keepdim=True)
    #print(y_pred[0])
    y_pred=y_pred.view(-1)
    train_acc=ACC(data.y[train_idx],y_pred[train_idx])
    valid_acc=ACC(data.y[valid_idx],y_pred[valid_idx])
    test_acc =ACC(data.y[test_idx],y_pred[test_idx])
    return train_acc, valid_acc, test_acc

class objectview(object):
    def __init__(self, d):
        self.__dict__ = d

In [142]:
def main():
    args={'model_type': 'GCN', 'dataset': 'cora', 'num_layers': 2, 'heads': 8, 
         'batch_size': 32, 'hidden_channels': 16, 'dropout': 0.5, 'epochs': 200, 
         'opt': 'adam', 'opt_scheduler': 'none', 'opt_restart': 0,'runs':10, 'log_steps':1,
         'weight_decay': 5e-4, 'lr': 0.01,'hidden_channels_mlp': 20,'dropout_mlp': 0.5,'num_layers_mlp': 3}

    args = objectview(args)
    print(args)
    # call the dataset here with x,y,train_mask,test_mask,Val_mask, and Adj
    # To add extra feature we can simply update data.x=new fev tensor or we can add new feature
    #dataset = Planetoid(root='/tmp/cora', name='Cora',transform=T.ToSparseTensor())
    #data = dataset[0]
    X = data.topo
    y_true = data.y
    data.adj_t = data.adj_t.to_symmetric()
    
    train_idx = np.where(data.train_mask)[0]
    valid_idx = np.where(data.val_mask)[0]
    test_idx = np.where(data.test_mask)[0]
    
    model = SAGE(data.num_features, args.hidden_channels,20, args.num_layers,args.dropout,args.heads)
    mlp_model = MLP(X.size(-1), args.hidden_channels_mlp, 10,args.num_layers_mlp, args.dropout_mlp)
    #print(mlp_model.parameters())
    mlp_2 = MLP2(30, 100, dataset.num_classes,3, 0.3)

    logger = Logger(args.runs, args)

    for run in range(args.runs):
        model.reset_parameters()
        mlp_model.reset_parameters_mlp()
        mlp_2.reset_parameters_mlp2()
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
        optimizer_mlp=torch.optim.Adam(mlp_model.parameters(), lr=0.001)
        optimizer_mlp2=torch.optim.Adam(mlp_2.parameters(), lr=0.01)
        for epoch in range(1, 1 + args.epochs):
            loss = train(model,mlp_model,mlp_2,data, train_idx, optimizer,optimizer_mlp,optimizer_mlp2)
            result = test(model,mlp_model,mlp_2,data, train_idx,valid_idx,test_idx)
            logger.add_result(run, result)

            if epoch % args.log_steps == 0:
                train_acc, valid_acc, test_acc = result
                #print(f'Run: {run + 1:02d}, 'f'Epoch: {epoch:02d}, 'f'Loss: {loss:.4f}, 'f'Train: {100 * train_acc:.2f}%, '
                      #f'Valid: {100 * valid_acc:.2f}% '
                      #f'Test: {100 * test_acc:.2f}%')

        logger.print_statistics(run)
    logger.print_statistics()


if __name__ == "__main__":
    main()

<__main__.objectview object at 0x2ab20a2c0>
Run 01:
Highest Train: 86.67
Highest Valid: 79.00
  Final Train: 86.67
   Final Test: 77.00
Run 02:
Highest Train: 90.00
Highest Valid: 77.40
  Final Train: 88.33
   Final Test: 74.60
Run 03:
Highest Train: 96.67
Highest Valid: 77.00
  Final Train: 96.67
   Final Test: 70.00
Run 04:
Highest Train: 95.00
Highest Valid: 74.40
  Final Train: 90.00
   Final Test: 71.20
Run 05:
Highest Train: 90.00
Highest Valid: 81.80
  Final Train: 86.67
   Final Test: 77.60
Run 06:
Highest Train: 91.67
Highest Valid: 73.20
  Final Train: 81.67
   Final Test: 72.30
Run 07:
Highest Train: 91.67
Highest Valid: 78.00
  Final Train: 83.33
   Final Test: 76.70
Run 08:
Highest Train: 91.67
Highest Valid: 75.80
  Final Train: 91.67
   Final Test: 74.10
Run 09:
Highest Train: 93.33
Highest Valid: 75.80
  Final Train: 86.67
   Final Test: 74.70
Run 10:
Highest Train: 86.67
Highest Valid: 76.20
  Final Train: 86.67
   Final Test: 71.70
All runs:
Highest Train: 91.33 ± 3.2

# Topo-GSAGE

In [12]:
dataset = Planetoid(root='/tmp/PubMed', name='PubMed',transform=T.ToSparseTensor())
data = dataset[0]
data.topo=Topo_fe
print(data)

Data(x=[19717, 500], y=[19717], train_mask=[19717], val_mask=[19717], test_mask=[19717], adj_t=[19717, 19717, nnz=88648], topo=[19717, 42])


In [14]:
def main():
    args={'model_type': 'GCN', 'dataset': 'cora', 'num_layers': 2, 'heads': 8, 
         'batch_size': 32, 'hidden_channels': 16, 'dropout': 0.5, 'epochs': 100, 
         'opt': 'adam', 'opt_scheduler': 'none', 'opt_restart': 0,'runs':10, 'log_steps':1,
         'weight_decay': 5e-4, 'lr': 0.01,'hidden_channels_mlp': 20,'dropout_mlp': 0.5,'num_layers_mlp': 3}

    args = objectview(args)
    print(args)
    # call the dataset here with x,y,train_mask,test_mask,Val_mask, and Adj
    # To add extra feature we can simply update data.x=new fev tensor or we can add new feature
    #dataset = Planetoid(root='/tmp/cora', name='Cora',transform=T.ToSparseTensor())
    #data = dataset[0]
    X = data.topo
    y_true = data.y
    data.adj_t = data.adj_t.to_symmetric()
    
    train_idx = np.where(data.train_mask)[0]
    valid_idx = np.where(data.val_mask)[0]
    test_idx = np.where(data.test_mask)[0]
    
    model = SAGE(data.num_features, args.hidden_channels,20, args.num_layers,args.dropout,args.heads)
    mlp_model = MLP(X.size(-1), args.hidden_channels_mlp, 10,args.num_layers_mlp, args.dropout_mlp)
    #print(mlp_model.parameters())
    mlp_2 = MLP2(30, 100, dataset.num_classes,3, 0.3)

    logger = Logger(args.runs, args)

    for run in range(args.runs):
        model.reset_parameters()
        mlp_model.reset_parameters_mlp()
        mlp_2.reset_parameters_mlp2()
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
        optimizer_mlp=torch.optim.Adam(mlp_model.parameters(), lr=0.001)
        optimizer_mlp2=torch.optim.Adam(mlp_2.parameters(), lr=0.01)
        for epoch in range(1, 1 + args.epochs):
            loss = train(model,mlp_model,mlp_2,data, train_idx, optimizer,optimizer_mlp,optimizer_mlp2)
            result = test(model,mlp_model,mlp_2,data, train_idx,valid_idx,test_idx)
            logger.add_result(run, result)

            if epoch % args.log_steps == 0:
                train_acc, valid_acc, test_acc = result
                #print(f'Run: {run + 1:02d}, 'f'Epoch: {epoch:02d}, 'f'Loss: {loss:.4f}, 'f'Train: {100 * train_acc:.2f}%, '
                      #f'Valid: {100 * valid_acc:.2f}% '
                      #f'Test: {100 * test_acc:.2f}%')

        logger.print_statistics(run)
    logger.print_statistics()


if __name__ == "__main__":
    main()

<__main__.objectview object at 0x148958b20>
Run 01:
Highest Train: 100.00
Highest Valid: 79.60
  Final Train: 100.00
   Final Test: 76.40
Run 02:
Highest Train: 100.00
Highest Valid: 78.80
  Final Train: 100.00
   Final Test: 76.20
Run 03:
Highest Train: 100.00
Highest Valid: 78.00
  Final Train: 100.00
   Final Test: 75.70
Run 04:
Highest Train: 100.00
Highest Valid: 78.00
  Final Train: 100.00
   Final Test: 74.50
Run 05:
Highest Train: 100.00
Highest Valid: 79.80
  Final Train: 100.00
   Final Test: 74.30
Run 06:
Highest Train: 100.00
Highest Valid: 77.80
  Final Train: 100.00
   Final Test: 73.80
Run 07:
Highest Train: 100.00
Highest Valid: 78.80
  Final Train: 100.00
   Final Test: 74.50
Run 08:
Highest Train: 100.00
Highest Valid: 79.00
  Final Train: 100.00
   Final Test: 76.70
Run 09:
Highest Train: 100.00
Highest Valid: 78.20
  Final Train: 100.00
   Final Test: 75.80
Run 10:
Highest Train: 100.00
Highest Valid: 76.80
  Final Train: 100.00
   Final Test: 75.80
All runs:
Highes