In [1]:
import numpy as np
import os.path as osp
import torch
import torch.nn.functional as F
from torch_geometric.nn import SplineConv
from torch_geometric.data import Data
from random import shuffle, randint
import networkx as nx
import matplotlib.pyplot as plt
import random

In [2]:
labels = []
N = 1000 
nodes = range(0, N)
node_features = []
edge_features = []

for node in nodes:
  
  #spammer 
  if random.random() > 0.5:
    #more likely to have many connections with a maximum of 1/5 of the nodes in the graph 
    nb_nbrs = int(random.random() * (N/5))
    #more likely to have sent many bytes
    node_features.append((random.random()+1) / 2.)
    #more likely to have a high trust value 
    edge_features += [(random.random()+2)/3.] * nb_nbrs
    #associate a label 
    labels.append(1)
    
  #non-spammer 
  else:
    #at most connected to 10 nbrs 
    nb_nbrs = int(random.random() * 10 + 1)
    #associate more bytes and random bytes 
    node_features.append(random.random())
    edge_features += [random.random()] * nb_nbrs
    labels.append(0)
  
  #connect to some random nodes 
  nbrs = np.random.choice(nodes, size = nb_nbrs)
  nbrs = nbrs.reshape((1, nb_nbrs))
  
  #add the edges of nbrs 
  node_edges = np.concatenate([np.ones((1, nb_nbrs), dtype = np.int32) * node, nbrs], axis = 0)
  
  #add the overall edges 
  if node == 0:
    edges = node_edges
  else:
    edges = np.concatenate([edges, node_edges], axis = 1)

In [3]:
x = torch.tensor(np.expand_dims(node_features, 1), dtype=torch.float)
y = torch.tensor(labels, dtype=torch.long)

edge_index = torch.tensor(edges, dtype=torch.long)
edge_attr = torch.tensor(np.expand_dims(edge_features, 1), dtype=torch.float)

data = Data(x = x, edge_index=edge_index, y =y, edge_attr=edge_attr )
print(data)

Data(edge_attr=[52577, 1], edge_index=[2, 52577], x=[1000, 1], y=[1000])


In [15]:
data.train_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
data.train_mask[:int(0.8 * data.num_nodes)] = 1 #train only on the 80% nodes
data.test_mask = torch.zeros(data.num_nodes, dtype=torch.bool) #test on 20 % nodes 
data.test_mask[- int(0.2 * data.num_nodes):] = 1

## SplineConv

In [16]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = SplineConv(1, 16, dim=1, kernel_size=5)
        self.conv2 = SplineConv(16, 32, dim=1, kernel_size=5)
        self.conv3 = SplineConv(32, 64, dim=1, kernel_size=7)
        self.conv4 = SplineConv(64, 128, dim=1, kernel_size=7)
        self.conv5 = SplineConv(128, 128, dim=1, kernel_size=11)
        self.conv6 = SplineConv(128, 2, dim=1, kernel_size=11)

    def forward(self):
        x, edge_index, edge_attr = data.x, data.edge_index, data.edge_attr
        x = F.elu(self.conv1(x, edge_index, edge_attr))
        x = self.conv2(x, edge_index, edge_attr)
        x = F.elu(self.conv3(x, edge_index, edge_attr))
        x = self.conv4(x, edge_index, edge_attr)
        x = F.elu(self.conv5(x, edge_index, edge_attr))
        x = self.conv6(x, edge_index, edge_attr)
        x = F.dropout(x, training = self.training)
        return F.log_softmax(x, dim=1)

In [17]:
def evaluate_loss(mode = 'train'):
  
  #use masking for loss evaluation 
  if mode == 'train':
    loss = F.nll_loss(model()[data.train_mask], data.y[data.train_mask])
  else:
    loss = F.nll_loss(model()[data.test_mask], data.y[data.test_mask])
  return loss

def train():
  #training 
  model.train()
  optimizer.zero_grad()
  loss = evaluate_loss()
  loss.backward()
  optimizer.step()
  return loss.detach().cpu().numpy() 

def test():
  #testing 
  model.eval()
  logits, accs = model(), []
  loss = evaluate_loss(mode = 'test').detach().cpu().numpy() 

  for _, mask in data('train_mask', 'test_mask'):
      pred = logits[mask].max(1)[1]
      acc = pred.eq(data.y[mask]).sum().item() / mask.sum().item()
      accs.append(acc)
  return [loss] + accs

In [18]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model, data = Net().to(device), data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [19]:
losses = []
for epoch in range(1, 200):
  train_loss = train()
  log = 'Epoch: {:03d}, train_loss: {:.3f}, test_loss:{:.3f}, train_acc: {:.2f}, test_acc: {:.2f}'
  test_loss = test()[0]
  losses.append([train_loss,test_loss])
  print(log.format(epoch, train_loss, *test()))

Epoch: 001, train_loss: 0.693, test_loss:0.693, train_acc: 0.52, test_acc: 0.49
Epoch: 002, train_loss: 0.693, test_loss:0.693, train_acc: 0.52, test_acc: 0.49
Epoch: 003, train_loss: 0.692, test_loss:0.693, train_acc: 0.52, test_acc: 0.49
Epoch: 004, train_loss: 0.693, test_loss:0.694, train_acc: 0.52, test_acc: 0.49
Epoch: 005, train_loss: 0.692, test_loss:0.694, train_acc: 0.52, test_acc: 0.49
Epoch: 006, train_loss: 0.693, test_loss:0.693, train_acc: 0.52, test_acc: 0.49
Epoch: 007, train_loss: 0.691, test_loss:0.693, train_acc: 0.52, test_acc: 0.49
Epoch: 008, train_loss: 0.690, test_loss:0.693, train_acc: 0.52, test_acc: 0.49
Epoch: 009, train_loss: 0.692, test_loss:0.693, train_acc: 0.52, test_acc: 0.49
Epoch: 010, train_loss: 0.690, test_loss:0.692, train_acc: 0.52, test_acc: 0.49
Epoch: 011, train_loss: 0.690, test_loss:0.690, train_acc: 0.52, test_acc: 0.49
Epoch: 012, train_loss: 0.688, test_loss:0.688, train_acc: 0.52, test_acc: 0.49
Epoch: 013, train_loss: 0.687, test_loss

Epoch: 105, train_loss: 0.572, test_loss:0.556, train_acc: 0.72, test_acc: 0.69
Epoch: 106, train_loss: 0.572, test_loss:0.557, train_acc: 0.73, test_acc: 0.70
Epoch: 107, train_loss: 0.583, test_loss:0.558, train_acc: 0.73, test_acc: 0.70
Epoch: 108, train_loss: 0.590, test_loss:0.557, train_acc: 0.73, test_acc: 0.70
Epoch: 109, train_loss: 0.589, test_loss:0.554, train_acc: 0.73, test_acc: 0.70
Epoch: 110, train_loss: 0.578, test_loss:0.551, train_acc: 0.72, test_acc: 0.70
Epoch: 111, train_loss: 0.573, test_loss:0.550, train_acc: 0.72, test_acc: 0.70
Epoch: 112, train_loss: 0.582, test_loss:0.549, train_acc: 0.73, test_acc: 0.70
Epoch: 113, train_loss: 0.572, test_loss:0.549, train_acc: 0.73, test_acc: 0.70
Epoch: 114, train_loss: 0.589, test_loss:0.549, train_acc: 0.73, test_acc: 0.71
Epoch: 115, train_loss: 0.580, test_loss:0.547, train_acc: 0.74, test_acc: 0.71
Epoch: 116, train_loss: 0.570, test_loss:0.544, train_acc: 0.73, test_acc: 0.70
Epoch: 117, train_loss: 0.569, test_loss