In [2]:
!pip install dgl

Collecting dgl
[?25l  Downloading https://files.pythonhosted.org/packages/71/c4/ce24841375cf4393787dbf9a645e271c19a03d2d9a0e5770b08ba76bcfde/dgl-0.6.1-cp37-cp37m-manylinux1_x86_64.whl (4.4MB)
[K     |████████████████████████████████| 4.4MB 29.4MB/s 
Installing collected packages: dgl
Successfully installed dgl-0.6.1


In [3]:
import dgl
import torch
import torch.nn as nn
import torch.nn.functional as F
import itertools
import numpy as np
import scipy.sparse as sp

DGL backend not selected or invalid.  Assuming PyTorch for now.


Setting the default backend to "pytorch". You can change it in the ~/.dgl/config.json file or export the DGLBACKEND environment variable.  Valid options are: pytorch, mxnet, tensorflow (all lowercase)


Using backend: pytorch


In [4]:
import dgl
import pandas as pd
import torch
import torch.nn.functional as F

def load_zachery():
    nodes_data = pd.read_csv('/content/drive/MyDrive/data/nodes.csv')
    edges_data = pd.read_csv('/content/drive/MyDrive/data/edges.csv')
    src = edges_data['Src'].to_numpy()
    dst = edges_data['Dst'].to_numpy()
    g = dgl.graph((src, dst))
    club = nodes_data['Club'].to_list()
    # Convert to categorical integer values with 0 for 'Mr. Hi', 1 for 'Officer'.
    club = torch.tensor([c == 'Officer' for c in club]).long()
    # We can also convert it to one-hot encoding.
    club_onehot = F.one_hot(club)
    g.ndata.update({'club' : club, 'club_onehot' : club_onehot})
    return g


In [20]:
# from tutorial_utils import load_zachery

# ----------- 0. load graph -------------- #
g = load_zachery()
print(g)

# ----------- 1. node features -------------- #
node_embed = nn.Embedding(g.number_of_nodes(), 5)  
inputs = node_embed.weight                         
nn.init.xavier_uniform_(inputs)

Graph(num_nodes=34, num_edges=156,
      ndata_schemes={'club': Scheme(shape=(), dtype=torch.int64), 'club_onehot': Scheme(shape=(2,), dtype=torch.int64)}
      edata_schemes={})


Parameter containing:
tensor([[ 2.8252e-01,  2.4512e-02, -2.1755e-01, -1.9689e-02,  1.2318e-01],
        [-1.9171e-06,  1.6957e-01,  3.0073e-01,  1.4830e-01, -2.0591e-01],
        [ 2.7520e-01,  3.4707e-01,  3.3099e-02,  1.3570e-01,  1.8273e-01],
        [ 2.9553e-02, -3.2353e-01,  2.7862e-01, -1.9205e-01, -1.7681e-01],
        [-2.0779e-01,  2.1187e-01, -1.8658e-01, -1.3235e-01, -2.2849e-01],
        [ 2.9765e-01,  3.1790e-01, -9.3611e-02,  1.2384e-01,  1.4338e-01],
        [ 2.9307e-01, -1.1837e-01,  3.6452e-01, -2.7733e-01, -1.4249e-01],
        [-1.3736e-01,  3.7447e-01,  1.8029e-01,  3.1552e-01, -1.6521e-01],
        [-6.3813e-03,  1.2031e-01,  2.1149e-02, -2.7937e-01,  1.8282e-01],
        [-5.8726e-03, -1.4529e-01, -3.2130e-01,  2.6642e-01, -3.8183e-01],
        [ 1.8649e-01, -2.4855e-01, -1.1350e-01, -1.1749e-01, -3.6301e-01],
        [ 3.5097e-01, -3.1557e-01,  2.7449e-03, -2.9346e-01, -2.1572e-01],
        [ 1.2653e-01, -1.1478e-01, -3.6342e-01,  2.8542e-01, -3.2778e-01],
   

In [9]:
u, v = g.edges()
eids = np.arange(g.number_of_edges())
eids = np.random.permutation(eids)
test_pos_u, test_pos_v = u[eids[:50]], v[eids[:50]]
train_pos_u, train_pos_v = u[eids[50:]], v[eids[50:]]

In [11]:
adj = sp.coo_matrix((np.ones(len(u)), (u.numpy(), v.numpy())))
adj_neg = 1 - adj.todense() - np.eye(34)
neg_u, neg_v = np.where(adj_neg != 0)
neg_eids = np.random.choice(len(neg_u), 200)
test_neg_u, test_neg_v = neg_u[neg_eids[:50]], neg_v[neg_eids[:50]]
train_neg_u, train_neg_v = neg_u[neg_eids[50:]], neg_v[neg_eids[50:]]

In [13]:
train_u = torch.cat([torch.as_tensor(train_pos_u), torch.as_tensor(train_neg_u)])
train_v = torch.cat([torch.as_tensor(train_pos_v), torch.as_tensor(train_neg_v)])
train_label = torch.cat([torch.zeros(len(train_pos_u)), torch.ones(len(train_neg_u))])

test_u = torch.cat([torch.as_tensor(test_pos_u), torch.as_tensor(test_neg_u)])
test_v = torch.cat([torch.as_tensor(test_pos_v), torch.as_tensor(test_neg_v)])
test_label = torch.cat([torch.zeros(len(test_pos_u)), torch.ones(len(test_neg_u))])

In [15]:
from dgl.nn import SAGEConv

class GraphSAGE(nn.Module):
    def __init__(self, in_feats, h_feats):
        super(GraphSAGE, self).__init__()
        self.conv1 = SAGEConv(in_feats, h_feats, 'mean')
        self.conv2 = SAGEConv(h_feats, h_feats, 'mean')
    
    def forward(self, g, in_feat):
        h = self.conv1(g, in_feat)
        h = F.relu(h)
        h = self.conv2(g, h)
        return h
    
net = GraphSAGE(5, 16)

In [17]:

optimizer = torch.optim.Adam(itertools.chain(net.parameters(), node_embed.parameters()), lr=0.01)


all_logits = []
for e in range(100):
    
    logits = net(g, inputs)
    pred = torch.sigmoid((logits[train_u] * logits[train_v]).sum(dim=1))
    
 
    loss = F.binary_cross_entropy(pred, train_label)
    

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    all_logits.append(logits.detach())
    
    if e % 5 == 0:
        print('In epoch {}, loss: {}'.format(e, loss))

In epoch 0, loss: 0.0010626292787492275
In epoch 5, loss: 0.0002646318753249943
In epoch 10, loss: 7.40094983484596e-05
In epoch 15, loss: 2.759516610240098e-05
In epoch 20, loss: 1.2568804777401965e-05
In epoch 25, loss: 5.9403901104815304e-06
In epoch 30, loss: 3.7221120692265686e-06
In epoch 35, loss: 2.658270204847213e-06
In epoch 40, loss: 2.1073765310575254e-06
In epoch 45, loss: 1.7776803815650055e-06
In epoch 50, loss: 1.5453106243512593e-06
In epoch 55, loss: 1.363699425382947e-06
In epoch 60, loss: 1.2251631460458157e-06
In epoch 65, loss: 1.1199222171853762e-06
In epoch 70, loss: 1.03843058241182e-06
In epoch 75, loss: 9.755655128174112e-07
In epoch 80, loss: 9.241093721357174e-07
In epoch 85, loss: 8.773099580139387e-07
In epoch 90, loss: 8.384268426198105e-07
In epoch 95, loss: 8.002422760000627e-07


In [21]:
pred = torch.sigmoid((logits[test_u] * logits[test_v]).sum(dim=1))
print('Accuracy', ((pred >= 0.5) == test_label).sum().item() / len(pred))

Accuracy 0.85
