In [1]:

import networkx as nx
from networkx import ego_graph

import torch.optim as optim
import argparse
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F

import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv, SAGEConv, GATConv

from ogb.nodeproppred import PygNodePropPredDataset, Evaluator

#from logger import Logger
from torch_geometric.datasets import TUDataset
from torch_geometric.datasets import Planetoid
from torch_geometric.loader import DataLoader

In [2]:
dataset = Planetoid(root='/tmp/PubMed', name='PubMed',transform=T.ToSparseTensor())
data = dataset[0]
#data.adj_t = data.adj_t.to_symmetric()
#data.adj_t = data.adj_t.to_symmetric()
print(data)
#split_idx = dataset.get_idx_split()
#train_idx = split_idx['train'].to(device)

Data(x=[19717, 500], y=[19717], train_mask=[19717], val_mask=[19717], test_mask=[19717], adj_t=[19717, 19717, nnz=88648])


In [3]:
train_index = np.where(data.train_mask)[0]
print(len(train_index))
valid_index = np.where(data.val_mask)[0]
print(len(valid_index))
test_index = np.where(data.test_mask)[0]
print(len(test_index))

60
500
1000


# GAT with Original domain Feature

In [3]:
import torch


class Logger(object):
    def __init__(self, runs, info=None):
        self.info = info
        self.results = [[] for _ in range(runs)]

    def add_result(self, run, result):
        assert len(result) == 3
        assert run >= 0 and run < len(self.results)
        self.results[run].append(result)

    def print_statistics(self, run=None):
        if run is not None:
            result = 100 * torch.tensor(self.results[run])
            argmax = result[:, 1].argmax().item()
            print(f'Run {run + 1:02d}:')
            print(f'Highest Train: {result[:, 0].max():.2f}')
            print(f'Highest Valid: {result[:, 1].max():.2f}')
            print(f'  Final Train: {result[argmax, 0]:.2f}')
            print(f'   Final Test: {result[argmax, 2]:.2f}')
        else:
            result = 100 * torch.tensor(self.results)

            best_results = []
            for r in result:
                train1 = r[:, 0].max().item()
                valid = r[:, 1].max().item()
                train2 = r[r[:, 1].argmax(), 0].item()
                test = r[r[:, 1].argmax(), 2].item()
                best_results.append((train1, valid, train2, test))

            best_result = torch.tensor(best_results)

            print(f'All runs:')
            r = best_result[:, 0]
            print(f'Highest Train: {r.mean():.2f} ± {r.std():.2f}')
            r = best_result[:, 1]
            print(f'Highest Valid: {r.mean():.2f} ± {r.std():.2f}')
            r = best_result[:, 2]
            print(f'  Final Train: {r.mean():.2f} ± {r.std():.2f}')
            r = best_result[:, 3]
            print(f'   Final Test: {r.mean():.2f} ± {r.std():.2f}')

In [5]:
class GAT(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers,
                 dropout,heads):
        super(GAT, self).__init__()

        self.convs = torch.nn.ModuleList()
        self.convs.append(GATConv(in_channels, hidden_channels))
        self.bns = torch.nn.ModuleList()
        self.bns.append(torch.nn.BatchNorm1d(hidden_channels))
        for _ in range(num_layers - 2):
            self.convs.append(GATConv(hidden_channels, hidden_channels))
            self.bns.append(torch.nn.BatchNorm1d(hidden_channels))
        self.convs.append(GATConv(hidden_channels, out_channels))

        self.dropout = dropout
        self.heads=heads

    def reset_parameters(self):
        for conv in self.convs:
            conv.reset_parameters()
        for bn in self.bns:
            bn.reset_parameters()

    def forward(self, x, adj_t):
        for i, conv in enumerate(self.convs[:-1]):
            x = conv(x, adj_t)
            x = self.bns[i](x)
            x = F.relu(x)
            #x=F.softmax(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.convs[-1](x, adj_t)
        return x.log_softmax(dim=-1)


def train(model, data, train_idx, optimizer):
    model.train()

    optimizer.zero_grad()
    out = model(data.x, data.adj_t)[train_idx]
    #print(len(out))
    #print(data.y.squeeze(1)[train_idx])
    loss = F.nll_loss(out, data.y.squeeze()[train_idx])
    loss.backward()
    optimizer.step()

    return loss.item()


def ACC(Prediction, Label):
    correct = Prediction.view(-1).eq(Label).sum().item()
    total=len(Label)
    return correct / total

@torch.no_grad()
def test(model, data, train_idx,valid_idx,test_idx):
    model.eval()

    out = model(data.x, data.adj_t)
    y_pred = out.argmax(dim=-1, keepdim=True)
    y_pred=y_pred.view(-1)
    train_acc=ACC(data.y[train_idx],y_pred[train_idx])
    valid_acc=ACC(data.y[valid_idx],y_pred[valid_idx])
    test_acc =ACC(data.y[test_idx],y_pred[test_idx])
    return train_acc, valid_acc, test_acc

class objectview(object):
    def __init__(self, d):
        self.__dict__ = d

In [24]:
def main():
    args={'model_type': 'GCN', 'dataset': 'cora', 'num_layers': 2, 'heads': 8, 
         'batch_size': 32, 'hidden_channels': 16, 'dropout': 0.6, 'epochs': 50, 
         'opt': 'adam', 'opt_scheduler': 'none', 'opt_restart': 0,'runs':10, 'log_steps':1,
         'weight_decay': 5e-4, 'lr': 0.01}

    args = objectview(args)
    print(args)
    # call the dataset here with x,y,train_mask,test_mask,Val_mask, and Adj
    # To add extra feature we can simply update data.x=new fev tensor or we can add new feature
    dataset = Planetoid(root='/tmp/PubMed', name='PubMed',transform=T.ToSparseTensor())
    data = dataset[0]
    data.adj_t = data.adj_t.to_symmetric()
    
    train_idx = np.where(data.train_mask)[0]
    valid_idx = np.where(data.val_mask)[0]
    test_idx = np.where(data.test_mask)[0]
    
    model = GAT(data.num_features, args.hidden_channels,dataset.num_classes, args.num_layers,
                    args.dropout,args.heads)

    logger = Logger(args.runs, args)

    for run in range(args.runs):
        model.reset_parameters()
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
        for epoch in range(1, 1 + args.epochs):
            loss = train(model, data, train_idx, optimizer)
            result = test(model, data, train_idx,valid_idx,test_idx)
            logger.add_result(run, result)

            if epoch % args.log_steps == 0:
                train_acc, valid_acc, test_acc = result
                #print(f'Run: {run + 1:02d}, 'f'Epoch: {epoch:02d}, 'f'Loss: {loss:.4f}, 'f'Train: {100 * train_acc:.2f}%, '
                 #     f'Valid: {100 * valid_acc:.2f}% '
                  #    f'Test: {100 * test_acc:.2f}%')

        logger.print_statistics(run)
    logger.print_statistics()


if __name__ == "__main__":
    main()

<__main__.objectview object at 0x14f9c3f40>
Run 01:
Highest Train: 100.00
Highest Valid: 76.60
  Final Train: 100.00
   Final Test: 74.00
Run 02:
Highest Train: 100.00
Highest Valid: 75.00
  Final Train: 100.00
   Final Test: 72.40
Run 03:
Highest Train: 100.00
Highest Valid: 77.00
  Final Train: 100.00
   Final Test: 76.00
Run 04:
Highest Train: 100.00
Highest Valid: 78.60
  Final Train: 100.00
   Final Test: 75.50
Run 05:
Highest Train: 100.00
Highest Valid: 72.20
  Final Train: 100.00
   Final Test: 71.30
Run 06:
Highest Train: 100.00
Highest Valid: 76.20
  Final Train: 100.00
   Final Test: 74.00
Run 07:
Highest Train: 100.00
Highest Valid: 77.40
  Final Train: 100.00
   Final Test: 75.60
Run 08:
Highest Train: 100.00
Highest Valid: 77.40
  Final Train: 100.00
   Final Test: 76.00
Run 09:
Highest Train: 100.00
Highest Valid: 75.60
  Final Train: 100.00
   Final Test: 74.30
Run 10:
Highest Train: 100.00
Highest Valid: 74.00
  Final Train: 100.00
   Final Test: 73.30
All runs:
Highes

# WISE embedding

In [9]:
dataset = Planetoid(root='/tmp/PubMed', name='PubMed',transform=T.ToSparseTensor())
data = dataset[0]
Domain=pd.DataFrame(data.x.numpy())
label=pd.DataFrame(data.y.numpy(),columns =['class'])
Domain=pd.concat([Domain,label], axis=1)
Domain.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,491,492,493,494,495,496,497,498,499,class
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.004999,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016434,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
2,0.104636,0.0,0.0,0.0,0.035178,0.0,0.0,0.019555,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.008582,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.007356,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1


In [10]:
Number_nodes=len(data.y)
fe_len=len(data.x[0])
catagories=data.y.numpy()
ddata_by_class = {cls: Domain.loc[Domain['class'] == cls].drop(['class'], axis=1) for cls in range(max(catagories) + 1)}

In [11]:
import numpy as np
def Average(lst):
    # average function
    avg = np.average(lst)
    return(avg)
sel_basis = [[Average(list(df[i].to_numpy())) for i in range(len(df.columns))]for df in ddata_by_class.values()]

In [12]:
feature_names = [ii for ii in range(fe_len)]
Euc_Fec=[]
for i in range(Number_nodes):
    print("\rProcessing file {} ({}%)".format(i, 100*i//(Number_nodes-1)), end='', flush=True)
    vec=[]
    f=Domain.loc[i, feature_names].values.flatten().tolist()
    for j in range(max(catagories)+1):
        vec.append(np.linalg.norm(np.array(f) - np.array(sel_basis[j])))
    f.clear()
    Euc_Fec.append(vec)

Processing file 19716 (100%)

In [13]:
print(Euc_Fec[1])

[0.2859691472918565, 0.2451948567242483, 0.26428544455036057]


In [14]:
#Euc_fe=torch.tensor(Euc_Fec)
#Inc_fe=torch.tensor(Fec)
#sel_fe=torch.tensor(S_Fec)
#CC_domain=torch.cat((sel_fe,Euc_fe), 1).float()
#topo_fe=torch.cat((topo_betti0,topo_betti1),1)
CC_domain=torch.tensor(Euc_Fec).float()
print(CC_domain)
CC_domain.type()

tensor([[0.2535, 0.2234, 0.2358],
        [0.2860, 0.2452, 0.2643],
        [0.2814, 0.3077, 0.3154],
        ...,
        [0.3384, 0.3121, 0.3134],
        [0.4211, 0.4479, 0.4445],
        [0.3380, 0.3139, 0.3143]])


'torch.FloatTensor'

# WISE GAT

In [26]:
data.x=CC_domain
print(data)

Data(x=[19717, 3], y=[19717], train_mask=[19717], val_mask=[19717], test_mask=[19717], adj_t=[19717, 19717, nnz=88648])


In [22]:
def main():
    args={'model_type': 'GCN', 'dataset': 'cora', 'num_layers': 2, 'heads': 8, 
         'batch_size': 32, 'hidden_channels': 16, 'dropout': 0.5, 'epochs': 200, 
         'opt': 'adam', 'opt_scheduler': 'none', 'opt_restart': 0,'runs':10, 'log_steps':1,
         'weight_decay': 5e-4, 'lr': 0.01}

    args = objectview(args)
    print(args)
    # call the dataset here with x,y,train_mask,test_mask,Val_mask, and Adj
    # To add extra feature we can simply update data.x=new fev tensor or we can add new feature
    #dataset = Planetoid(root='/tmp/cora', name='Cora',transform=T.ToSparseTensor())
    #data = dataset[0]
    data.adj_t = data.adj_t.to_symmetric()
    
    train_idx = np.where(data.train_mask)[0]
    valid_idx = np.where(data.val_mask)[0]
    test_idx = np.where(data.test_mask)[0]
    
    model = GAT(data.num_features, args.hidden_channels,
                    dataset.num_classes, args.num_layers,
                    args.dropout, args.heads)

    logger = Logger(args.runs, args)

    for run in range(args.runs):
        model.reset_parameters()
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
        for epoch in range(1, 1 + args.epochs):
            loss = train(model, data, train_idx, optimizer)
            result = test(model, data, train_idx,valid_idx,test_idx)
            logger.add_result(run, result)

            if epoch % args.log_steps == 0:
                train_acc, valid_acc, test_acc = result
                #print(f'Run: {run + 1:02d}, 'f'Epoch: {epoch:02d}, 'f'Loss: {loss:.4f}, 'f'Train: {100 * train_acc:.2f}%, '
                 #     f'Valid: {100 * valid_acc:.2f}% '
                  #    f'Test: {100 * test_acc:.2f}%')

        logger.print_statistics(run)
    logger.print_statistics()


if __name__ == "__main__":
    main()


<__main__.objectview object at 0x14d6b99c0>
Run 01:
Highest Train: 88.33
Highest Valid: 78.20
  Final Train: 78.33
   Final Test: 75.50
Run 02:
Highest Train: 91.67
Highest Valid: 73.60
  Final Train: 83.33
   Final Test: 70.90
Run 03:
Highest Train: 91.67
Highest Valid: 82.80
  Final Train: 86.67
   Final Test: 79.90
Run 04:
Highest Train: 78.33
Highest Valid: 73.00
  Final Train: 78.33
   Final Test: 71.50
Run 05:
Highest Train: 90.00
Highest Valid: 81.20
  Final Train: 86.67
   Final Test: 75.90
Run 06:
Highest Train: 85.00
Highest Valid: 81.20
  Final Train: 76.67
   Final Test: 76.00
Run 07:
Highest Train: 88.33
Highest Valid: 78.80
  Final Train: 85.00
   Final Test: 77.60
Run 08:
Highest Train: 88.33
Highest Valid: 81.40
  Final Train: 86.67
   Final Test: 77.90
Run 09:
Highest Train: 86.67
Highest Valid: 75.40
  Final Train: 80.00
   Final Test: 72.30
Run 10:
Highest Train: 83.33
Highest Valid: 78.00
  Final Train: 81.67
   Final Test: 75.90
All runs:
Highest Train: 87.17 ± 4.0

# Topological Encoding

In [111]:
dataset = Planetoid(root='/tmp/PubMed', name='PubMed')
data = dataset[0]
print(data)

Data(x=[19717, 500], edge_index=[2, 88648], y=[19717], train_mask=[19717], val_mask=[19717], test_mask=[19717])


In [112]:
print(data.edge_index.numpy())

[[ 1378  1544  6092 ... 12278  4284 16030]
 [    0     0     0 ... 19714 19715 19716]]


In [None]:
Edge_idx=data.edge_index.numpy()
Node=range(Number_nodes)
Edgelist=[]
for i in range(len(Edge_idx[1])):
    Edgelist.append((Edge_idx[0][i],Edge_idx[1][i]))
#print(Edgelist)

In [None]:
# a "plain" graph is undirected
G = nx.DiGraph()

# give each a node a 'name', which is a letter in this case.
#G.add_node('a')

# the add_nodes_from method allows adding nodes from a sequence, in this case a list
#nodes_to_add = ['b', 'c', 'd']
G.add_nodes_from(Node)

# add edge from 'a' to 'b'
# since this graph is undirected, the order doesn't matter here
#G.add_edge('a', 'b')

# just like add_nodes_from, we can add edges from a sequence
# edges should be specified as 2-tuples
#edges_to_add = [('a', 'c'), ('b', 'c'), ('c', 'd')]
G.add_edges_from(Edgelist)


In [None]:
print(G.number_of_edges())

In [None]:
def Topological_Feature_subLevel(adj,filtration_fun, Filtration):
        betti_0=[]
        betti_1=[]
        for p in range(len(Filtration)):
            n_active = np.where(np.array(filtration_fun) <= Filtration[p])[0].tolist()
            Active_node=np.unique(n_active)
            if (len(Active_node)==0):
                betti_0.append(0)
                betti_1.append(0)
            else:
                b=adj[Active_node,:][:,Active_node]
                my_flag=pyflagser.flagser_unweighted(b, min_dimension=0, max_dimension=2, directed=False, coeff=2, approximation=None)
                x = my_flag["betti"]
                betti_0.append(x[0])
                betti_1.append(x[1])
            n_active.clear()
        return betti_0,betti_1

In [None]:
def Degree_list(Graph):
    degree_list = [Graph.degree(node) for node in Graph.nodes]
    return np.array(degree_list)

In [None]:
degree_list=Degree_list(G)
unique_list=np.unique(degree_list)
for d in unique_list:
    count=0
    for i in range(len(degree_list)):
        if degree_list[i]==d:
            count=count+1
    print(int(d)," | ",count,'\n')

In [None]:
import pyflagser
Node_fil=[0,2,4,6,8,10,12,14,16,18,20,22,24,30,34]
topo_betti_0=[]
topo_betti_1=[]
Node_Edge=[]
for i in range(Number_nodes):
    print("\rProcessing file {} ({}%)".format(i, 100*i//(Number_nodes-1)), end='', flush=True)
    subgraph=ego_graph(G, i, radius=2, center=False, undirected=True, distance=None)
    filt=Degree_list(subgraph)
    A_sub = nx.to_numpy_array(subgraph)# adjacency matrix of subgraph
    fe=Topological_Feature_subLevel(A_sub,filt,Node_fil)
    topo_betti_0.append(fe[0])
    topo_betti_1.append(fe[1])
    Node_Edge.append([subgraph.number_of_nodes(),subgraph.number_of_edges()])
    #topo_with_NE.app

In [None]:
print( Node_Edge)

In [15]:
data = pd.read_csv('Feature_pubmed_topo.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,0,1,2,3,4,5,6,7,8,...,5.1,6.1,7.1,8.1,9.1,10.1,11.1,12.1,13.1,14.1
0,0,0,72,87,94,94,96,96,96,91,...,0,0,0,0,0,0,0,0,0,0
1,1,0,20,25,28,25,25,21,21,21,...,0,0,0,0,0,0,0,1,1,1
2,2,0,42,43,44,44,44,44,44,44,...,0,0,0,0,0,0,0,0,0,0
3,3,0,10,14,15,12,12,12,12,12,...,0,0,0,0,0,0,0,0,0,0
4,4,0,6,29,30,31,30,30,30,30,...,0,0,0,0,0,0,0,0,0,0


In [16]:
Data1=data.drop(['Unnamed: 0'], axis=1)
Data1.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,5.1,6.1,7.1,8.1,9.1,10.1,11.1,12.1,13.1,14.1
0,0,72,87,94,94,96,96,96,91,91,...,0,0,0,0,0,0,0,0,0,0
1,0,20,25,28,25,25,21,21,21,21,...,0,0,0,0,0,0,0,1,1,1
2,0,42,43,44,44,44,44,44,44,44,...,0,0,0,0,0,0,0,0,0,0
3,0,10,14,15,12,12,12,12,12,12,...,0,0,0,0,0,0,0,0,0,0
4,0,6,29,30,31,30,30,30,30,30,...,0,0,0,0,0,0,0,0,0,0


In [17]:
Topo_fe=torch.tensor(Data1.values).float()

In [118]:
print(len(topo_fe[0]))

42


In [None]:
from sklearn.preprocessing import MinMaxScaler
#for i in range(300):
X0=[]
scaler = MinMaxScaler()

# Fit scaler to data and transform data
topo_fe=scaler.fit_transform(topo_fe)
print(topo_fe[0])
topo_fe=np.array(topo_fe)

In [None]:
print(len(Topo_fe))

In [18]:
dataset = Planetoid(root='/tmp/PubMed', name='PubMed',transform=T.ToSparseTensor())
data = dataset[0]
print(data)

Data(x=[19717, 500], y=[19717], train_mask=[19717], val_mask=[19717], test_mask=[19717], adj_t=[19717, 19717, nnz=88648])


# TOPO-W-GAT

In [19]:
data.x=CC_domain
#data.topo=torch.tensor(topo_fe).float()
data.topo=Topo_fe
print(data)

Data(x=[19717, 3], y=[19717], train_mask=[19717], val_mask=[19717], test_mask=[19717], adj_t=[19717, 19717, nnz=88648], topo=[19717, 30])


In [None]:
print(len(topo_fe[0]))

In [20]:
class GAT(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers,
                 dropout,heads):
        super(GAT, self).__init__()

        self.convs = torch.nn.ModuleList()
        self.convs.append(GATConv(in_channels, hidden_channels))
        self.bns = torch.nn.ModuleList()
        self.bns.append(torch.nn.BatchNorm1d(hidden_channels))
        for _ in range(num_layers - 2):
            self.convs.append(GATConv(hidden_channels, hidden_channels))
            self.bns.append(torch.nn.BatchNorm1d(hidden_channels))
        self.convs.append(GATConv(hidden_channels, out_channels))

        self.dropout = dropout
        self.heads=heads

    def reset_parameters(self):
        for conv in self.convs:
            conv.reset_parameters()
        for bn in self.bns:
            bn.reset_parameters()

    def forward(self, x, adj_t):
        for i, conv in enumerate(self.convs[:-1]):
            x = conv(x, adj_t)
            x = self.bns[i](x)
            x = F.relu(x)
            #x=F.softmax(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.convs[-1](x, adj_t)
        #return x.log_softmax(dim=-1)
        return x

class MLP(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers,
                 dropout):
        super(MLP, self).__init__()

        self.lins = torch.nn.ModuleList()
        self.lins.append(torch.nn.Linear(in_channels, hidden_channels))
        self.bns = torch.nn.ModuleList()
        self.bns.append(torch.nn.BatchNorm1d(hidden_channels))
        for _ in range(num_layers - 2):
            self.lins.append(torch.nn.Linear(hidden_channels, hidden_channels))
            self.bns.append(torch.nn.BatchNorm1d(hidden_channels))
        self.lins.append(torch.nn.Linear(hidden_channels, out_channels))

        self.dropout = dropout

    def reset_parameters_mlp(self):
        for lin in self.lins:
            lin.reset_parameters()
        for bn in self.bns:
            bn.reset_parameters()

    def forward(self, x):
        for i, lin in enumerate(self.lins[:-1]):
            x = lin(x)
            x = self.bns[i](x)
            #x = F.relu(x)
            x=F.sigmoid(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.lins[-1](x)
        #return torch.log_softmax(x, dim=-1)
        return x
    
class MLP2(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers,
                 dropout):
        super(MLP2, self).__init__()

        self.lins = torch.nn.ModuleList()
        self.lins.append(torch.nn.Linear(in_channels, hidden_channels))
        self.bns = torch.nn.ModuleList()
        self.bns.append(torch.nn.BatchNorm1d(hidden_channels))
        for _ in range(num_layers - 2):
            self.lins.append(torch.nn.Linear(hidden_channels, hidden_channels))
            self.bns.append(torch.nn.BatchNorm1d(hidden_channels))
        self.lins.append(torch.nn.Linear(hidden_channels, out_channels))

        self.dropout = dropout

    def reset_parameters_mlp2(self):
        for lin in self.lins:
            lin.reset_parameters()
        for bn in self.bns:
            bn.reset_parameters()

    def forward(self, x):
        for i, lin in enumerate(self.lins[:-1]):
            x = lin(x)
            x = self.bns[i](x)
            #x = F.relu(x)
            x=F.sigmoid(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.lins[-1](x)
        return torch.log_softmax(x, dim=-1)
    

def train(model,mlp_model,mlp_2,data, train_idx, optimizer,optimizer_mlp,optimizer_mlp2):
    model.train()
    mlp_model.train()
    mlp_2.train()
    optimizer.zero_grad()
    optimizer_mlp.zero_grad()
    optimizer_mlp2.zero_grad()
    gcn_embedding = model(data.x, data.adj_t)[train_idx]
    #print(gcn_embedding)
    mlp_embedding = mlp_model(data.topo[train_idx])
    #print(mlp_embedding)
    combined_embedding = torch.cat((gcn_embedding, mlp_embedding), dim=1)
    #print(combined_embedding)
    mlp_emb = mlp_2(combined_embedding)
    #print(mlp_emb)
    loss = F.nll_loss(mlp_emb, data.y.squeeze()[train_idx])
    #loss = F.nll_loss(combined_embedding, data.y.squeeze()[train_idx])
    loss.backward()
    optimizer_mlp2.step()
    optimizer.step()
    optimizer_mlp.step()
    

    return loss.item()


def ACC(Prediction, Label):
    correct = Prediction.view(-1).eq(Label).sum().item()
    total=len(Label)
    return correct / total



@torch.no_grad()
def test(model,mlp_model,mlp_2,data, train_idx,valid_idx,test_idx):
    model.eval()
    mlp_model.eval()
    mlp_2.eval()

    gcn_out = model(data.x, data.adj_t)
    #print(gcn_out[0])
    mlp_out=mlp_model(data.topo)
    #print(mlp_out)
    #out=torch.cat((gcn_out,mlp_out),dim=1)
    Com=torch.cat((gcn_out,mlp_out),dim=1)
    out=mlp_2(Com)
    y_pred = out.argmax(dim=-1, keepdim=True)
    #print(y_pred[0])
    y_pred=y_pred.view(-1)
    train_acc=ACC(data.y[train_idx],y_pred[train_idx])
    valid_acc=ACC(data.y[valid_idx],y_pred[valid_idx])
    test_acc =ACC(data.y[test_idx],y_pred[test_idx])
    return train_acc, valid_acc, test_acc

class objectview(object):
    def __init__(self, d):
        self.__dict__ = d

In [26]:
def main():
    args={'model_type': 'GCN', 'dataset': 'cora', 'num_layers': 2, 'heads': 8, 
         'batch_size': 32, 'hidden_channels': 32, 'dropout': 0.5, 'epochs': 200, 
         'opt': 'adam', 'opt_scheduler': 'none', 'opt_restart': 0,'runs':10, 'log_steps':1,
         'weight_decay': 5e-4, 'lr': 0.01,'hidden_channels_mlp': 50,'dropout_mlp': 0.5,'num_layers_mlp': 3}

    args = objectview(args)
    print(args)
    # call the dataset here with x,y,train_mask,test_mask,Val_mask, and Adj
    # To add extra feature we can simply update data.x=new fev tensor or we can add new feature
    #dataset = Planetoid(root='/tmp/cora', name='Cora',transform=T.ToSparseTensor())
    #data = dataset[0]
    X = data.topo
    y_true = data.y
    data.adj_t = data.adj_t.to_symmetric()
    
    train_idx = np.where(data.train_mask)[0]
    valid_idx = np.where(data.val_mask)[0]
    test_idx = np.where(data.test_mask)[0]
    
    model = GAT(data.num_features, args.hidden_channels,10, args.num_layers,args.dropout,args.heads)
    mlp_model = MLP(X.size(-1), args.hidden_channels_mlp, 5,args.num_layers_mlp, args.dropout_mlp)
    #print(mlp_model.parameters())
    mlp_2 = MLP2(15, 100, dataset.num_classes,4, 0.3)

    logger = Logger(args.runs, args)

    for run in range(args.runs):
        model.reset_parameters()
        mlp_model.reset_parameters_mlp()
        mlp_2.reset_parameters_mlp2()
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
        optimizer_mlp=torch.optim.Adam(mlp_model.parameters(), lr=0.001)
        optimizer_mlp2=torch.optim.Adam(mlp_2.parameters(), lr=0.001)
        for epoch in range(1, 1 + args.epochs):
            loss = train(model,mlp_model,mlp_2,data, train_idx, optimizer,optimizer_mlp,optimizer_mlp2)
            result = test(model,mlp_model,mlp_2,data, train_idx,valid_idx,test_idx)
            logger.add_result(run, result)

            if epoch % args.log_steps == 0:
                train_acc, valid_acc, test_acc = result
                #print(f'Run: {run + 1:02d}, 'f'Epoch: {epoch:02d}, 'f'Loss: {loss:.4f}, 'f'Train: {100 * train_acc:.2f}%, '
                      #f'Valid: {100 * valid_acc:.2f}% '
                      #f'Test: {100 * test_acc:.2f}%')

        logger.print_statistics(run)
    logger.print_statistics()


if __name__ == "__main__":
    main()

<__main__.objectview object at 0x296639b10>
Run 01:
Highest Train: 91.67
Highest Valid: 77.80
  Final Train: 88.33
   Final Test: 74.00
Run 02:
Highest Train: 95.00
Highest Valid: 76.60
  Final Train: 90.00
   Final Test: 73.90
Run 03:
Highest Train: 88.33
Highest Valid: 80.60
  Final Train: 88.33
   Final Test: 79.80
Run 04:
Highest Train: 91.67
Highest Valid: 80.60
  Final Train: 91.67
   Final Test: 79.40
Run 05:
Highest Train: 81.67
Highest Valid: 76.60
  Final Train: 75.00
   Final Test: 73.80
Run 06:
Highest Train: 88.33
Highest Valid: 81.00
  Final Train: 88.33
   Final Test: 78.80
Run 07:
Highest Train: 91.67
Highest Valid: 81.40
  Final Train: 88.33
   Final Test: 78.70
Run 08:
Highest Train: 91.67
Highest Valid: 79.00
  Final Train: 85.00
   Final Test: 77.10
Run 09:
Highest Train: 75.00
Highest Valid: 77.00
  Final Train: 75.00
   Final Test: 75.80
Run 10:
Highest Train: 91.67
Highest Valid: 76.60
  Final Train: 90.00
   Final Test: 74.30
All runs:
Highest Train: 88.67 ± 5.9

In [27]:
dataset = Planetoid(root='/tmp/PubMed', name='PubMed',transform=T.ToSparseTensor())
data = dataset[0]
data.topo=Topo_fe
print(data)

Data(x=[19717, 500], y=[19717], train_mask=[19717], val_mask=[19717], test_mask=[19717], adj_t=[19717, 19717, nnz=88648], topo=[19717, 30])


In [28]:
def main():
    args={'model_type': 'GCN', 'dataset': 'cora', 'num_layers': 2, 'heads': 8, 
         'batch_size': 32, 'hidden_channels': 32, 'dropout': 0.5, 'epochs': 200, 
         'opt': 'adam', 'opt_scheduler': 'none', 'opt_restart': 0,'runs':10, 'log_steps':1,
         'weight_decay': 5e-4, 'lr': 0.01,'hidden_channels_mlp': 50,'dropout_mlp': 0.5,'num_layers_mlp': 3}

    args = objectview(args)
    print(args)
    # call the dataset here with x,y,train_mask,test_mask,Val_mask, and Adj
    # To add extra feature we can simply update data.x=new fev tensor or we can add new feature
    #dataset = Planetoid(root='/tmp/cora', name='Cora',transform=T.ToSparseTensor())
    #data = dataset[0]
    X = data.topo
    y_true = data.y
    data.adj_t = data.adj_t.to_symmetric()
    
    train_idx = np.where(data.train_mask)[0]
    valid_idx = np.where(data.val_mask)[0]
    test_idx = np.where(data.test_mask)[0]
    
    model = GAT(data.num_features, args.hidden_channels,10, args.num_layers,args.dropout,args.heads)
    mlp_model = MLP(X.size(-1), args.hidden_channels_mlp, 5,args.num_layers_mlp, args.dropout_mlp)
    #print(mlp_model.parameters())
    mlp_2 = MLP2(15, 100, dataset.num_classes,4, 0.3)

    logger = Logger(args.runs, args)

    for run in range(args.runs):
        model.reset_parameters()
        mlp_model.reset_parameters_mlp()
        mlp_2.reset_parameters_mlp2()
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
        optimizer_mlp=torch.optim.Adam(mlp_model.parameters(), lr=0.001)
        optimizer_mlp2=torch.optim.Adam(mlp_2.parameters(), lr=0.001)
        for epoch in range(1, 1 + args.epochs):
            loss = train(model,mlp_model,mlp_2,data, train_idx, optimizer,optimizer_mlp,optimizer_mlp2)
            result = test(model,mlp_model,mlp_2,data, train_idx,valid_idx,test_idx)
            logger.add_result(run, result)

            if epoch % args.log_steps == 0:
                train_acc, valid_acc, test_acc = result
                #print(f'Run: {run + 1:02d}, 'f'Epoch: {epoch:02d}, 'f'Loss: {loss:.4f}, 'f'Train: {100 * train_acc:.2f}%, '
                      #f'Valid: {100 * valid_acc:.2f}% '
                      #f'Test: {100 * test_acc:.2f}%')

        logger.print_statistics(run)
    logger.print_statistics()


if __name__ == "__main__":
    main()

<__main__.objectview object at 0x287f4abc0>
Run 01:
Highest Train: 100.00
Highest Valid: 76.20
  Final Train: 100.00
   Final Test: 75.60
Run 02:
Highest Train: 100.00
Highest Valid: 77.20
  Final Train: 100.00
   Final Test: 75.20
Run 03:
Highest Train: 100.00
Highest Valid: 77.40
  Final Train: 100.00
   Final Test: 75.00
Run 04:
Highest Train: 100.00
Highest Valid: 77.60
  Final Train: 100.00
   Final Test: 76.30
Run 05:
Highest Train: 100.00
Highest Valid: 77.00
  Final Train: 100.00
   Final Test: 75.70
Run 06:
Highest Train: 100.00
Highest Valid: 75.80
  Final Train: 100.00
   Final Test: 74.90
Run 07:
Highest Train: 100.00
Highest Valid: 77.60
  Final Train: 100.00
   Final Test: 76.80
Run 08:
Highest Train: 100.00
Highest Valid: 76.80
  Final Train: 100.00
   Final Test: 76.60
Run 09:
Highest Train: 100.00
Highest Valid: 77.00
  Final Train: 100.00
   Final Test: 75.60
Run 10:
Highest Train: 100.00
Highest Valid: 76.00
  Final Train: 100.00
   Final Test: 75.90
All runs:
Highes