In [1]:
from tqdm import tqdm
import datetime
import numpy as np
import torch
from torch.utils.data import DataLoader
#from models import GNN
from dataset import MineDataset
#from pytorch_net.util import Batch
import sys
import dgl
import torch.nn as nn
import torch.nn.functional as F
import itertools
import numpy as np
import scipy.sparse as sp
from matplotlib import style
style.use('dark_background')

data_dir = "data"
#set seed
torch.manual_seed(0)

<torch._C.Generator at 0x20b98a53870>

In [2]:
# Load dataset
dataset = MineDataset(
    data_dir=data_dir,
    input_steps=3,
    output_steps=3,
)
bhg = dgl.batch([g for g in dataset[:20]])

In [3]:
#HOW TO HAVE BOTH
g = dgl.edge_type_subgraph(bhg, [('neuron_basic', 'electric', 'neuron_basic'),])

#g = dataset[0]
# Split edge set for training and testing
u, v = g.edges()

eids = np.arange(g.number_of_edges())
eids = np.random.permutation(eids)
test_size = int(len(eids) * 0.1)
train_size = g.number_of_edges() - test_size
test_pos_u, test_pos_v = u[eids[:test_size]], v[eids[:test_size]]
train_pos_u, train_pos_v = u[eids[test_size:]], v[eids[test_size:]]

# Find all negative edges and split them for training and testing
adj = sp.coo_matrix((np.ones(len(u)), (u.numpy(), v.numpy())), shape=(g.nodes().shape[0], g.nodes().shape[0])) #am I removing something here? due to lack of edge possibly?
#print(adj.todense().shape)
adj_neg = 1 - adj.todense() - np.eye(g.number_of_nodes())
neg_u, neg_v = np.where(adj_neg != 0)

neg_eids = np.random.choice(len(neg_u), g.number_of_edges()) #sampling
test_neg_u, test_neg_v = neg_u[neg_eids[:test_size]], neg_v[neg_eids[:test_size]]
train_neg_u, train_neg_v = neg_u[neg_eids[test_size:]], neg_v[neg_eids[test_size:]]

#remove edges in test set from original graph
train_g = dgl.remove_edges(g, eids[:test_size])

train_pos_g = dgl.graph((train_pos_u, train_pos_v), num_nodes=g.number_of_nodes())
train_neg_g = dgl.graph((train_neg_u, train_neg_v), num_nodes=g.number_of_nodes())

test_pos_g = dgl.graph((test_pos_u, test_pos_v), num_nodes=g.number_of_nodes())
test_neg_g = dgl.graph((test_neg_u, test_neg_v), num_nodes=g.number_of_nodes())


In [14]:
# ----------- 2. create model -------------- #
# build a two-layer GraphSAGE model
from dgl.nn import SAGEConv
class GraphSAGE(nn.Module):
    def __init__(self, in_feats, h_feats, layers=2):
        super(GraphSAGE, self).__init__()
        self.layers = layers
        for i in range(self.layers):
            setattr(self,"conv{}".format(i),SAGEConv(in_feats if i == 0 else h_feats, h_feats, 'mean'))

    def forward(self, g, in_feat):
        for i in range(self.layers):
            h = getattr(self,"conv{}".format(i))(g, h if i > 0 else in_feat)
            if i != self.layers-1:
                h = F.relu(h)
        return h
    
import dgl.function as fn

class DotPredictor(nn.Module):
    def forward(self, g, h):
        with g.local_scope():
            g.ndata['h'] = h
            # Compute a new edge feature named 'score' by a dot-product between the
            # source node feature 'h' and destination node feature 'h'.
            g.apply_edges(fn.u_dot_v('h', 'h', 'score'))
            # u_dot_v returns a 1-element vector for each edge so you need to squeeze it.
            #print(g.edata['score']) #only works with input/output = 1???
            return g.edata['score'][:, 0]
        
model = GraphSAGE(train_g.ndata['feats'].shape[2]*train_g.ndata['feats'].shape[1], 16)
# You can replace DotPredictor with MLPPredictor.
#pred = MLPPredictor(16)
pred = DotPredictor()

def compute_loss(pos_score, neg_score):
    scores = torch.cat([pos_score, neg_score])
    labels = torch.cat([torch.ones(pos_score.shape[0]), torch.zeros(neg_score.shape[0])])
    return F.binary_cross_entropy_with_logits(scores, labels)

def compute_auc(pos_score, neg_score):
    scores = torch.cat([pos_score, neg_score]).numpy()
    labels = torch.cat(
        [torch.ones(pos_score.shape[0]), torch.zeros(neg_score.shape[0])]).numpy()
    return roc_auc_score(labels, scores)

def compute_acc(pos_score, neg_score):
    
    scores = torch.cat([pos_score, neg_score]).numpy()
    probs = 1 / (1+np.exp(-scores))
    labels = torch.cat(
        [torch.ones(pos_score.shape[0]), torch.zeros(neg_score.shape[0])]).numpy()
    return (probs.round() == labels).mean()

In [17]:
# ----------- 3. set up loss and optimizer -------------- #
epochs = 10000
# in this case, loss will in training loop
optimizer = torch.optim.Adam(itertools.chain(model.parameters(), pred.parameters()), lr=0.001)
#scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs)
# ----------- 4. training -------------------------------- #
all_logits = []
for e in range(epochs):
    # forward
    h = model(train_g, train_g.ndata['feats'].reshape(train_g.ndata['feats'].shape[0],-1)) #squeezing out timesteps????
    pos_score = pred(train_pos_g, h)
    neg_score = pred(train_neg_g, h)
    loss = compute_loss(pos_score, neg_score)

    # backward
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    #scheduler.step()
    if e % 100 == 0:
        
        print('In epoch {}, loss: {}'.format(e, loss))
    if e % 100 == 0:
        from sklearn.metrics import roc_auc_score
        with torch.no_grad():
            pos_score = pred(test_pos_g, h)
            neg_score = pred(test_neg_g, h)
            print('AUC', compute_auc(pos_score, neg_score))
            print('ACC', compute_acc(pos_score, neg_score))
# ----------- 5. check results ------------------------ #
from sklearn.metrics import roc_auc_score
with torch.no_grad():
    pos_score = pred(test_pos_g, h)
    neg_score = pred(test_neg_g, h)
    print('AUC', compute_auc(pos_score, neg_score))


# Thumbnail Courtesy: Link Prediction with Neo4j, Mark Needham
# sphinx_gallery_thumbnail_path = '_static/blitz_4_link_predict.png'

In epoch 0, loss: 0.567095160484314
AUC 0.7211243998628258
ACC 0.6425925925925926
In epoch 100, loss: 0.5665022134780884
AUC 0.7210515260631002
ACC 0.6412037037037037
In epoch 200, loss: 0.5662574768066406
AUC 0.7214681927297667
ACC 0.6435185185185185
In epoch 300, loss: 0.5660574436187744
AUC 0.7215685013717421
ACC 0.6425925925925926
In epoch 400, loss: 0.5658435225486755
AUC 0.7216756687242799
ACC 0.6453703703703704
In epoch 500, loss: 0.5656073093414307
AUC 0.7217245370370371
ACC 0.6449074074074074
In epoch 600, loss: 0.5657152533531189
AUC 0.7221317729766804
ACC 0.6439814814814815
In epoch 700, loss: 0.5654144287109375
AUC 0.7231605795610425
ACC 0.6430555555555556
In epoch 800, loss: 0.5648577213287354
AUC 0.7219834533607681
ACC 0.6439814814814815
In epoch 900, loss: 0.5661309361457825
AUC 0.7202276234567901
ACC 0.6407407407407407
In epoch 1000, loss: 0.5658532381057739
AUC 0.7246369170096022
ACC 0.6439814814814815
In epoch 1100, loss: 0.5659124851226807
AUC 0.725807184499314
ACC 0

KeyboardInterrupt: 

In [11]:
print(1 / (1+torch.exp(-1 * pos_score)))

tensor([0.0728, 1.0000, 0.0046,  ..., 1.0000, 1.0000, 0.9998],
       grad_fn=<MulBackward0>)
