In [1]:
!pip install dgl

Collecting dgl
  Downloading dgl-0.6.1-cp37-cp37m-manylinux1_x86_64.whl (4.4 MB)
[K     |████████████████████████████████| 4.4 MB 33.9 MB/s 
Installing collected packages: dgl
Successfully installed dgl-0.6.1


In [2]:
import dgl
import dgl.nn as dglnn
import dgl.function as fn

import torch
import torch.nn as nn
import torch.nn.functional as F

import numpy as np

Setting the default backend to "pytorch". You can change it in the ~/.dgl/config.json file or export the DGLBACKEND environment variable.  Valid options are: pytorch, mxnet, tensorflow (all lowercase)


DGL backend not selected or invalid.  Assuming PyTorch for now.
Using backend: pytorch


In [3]:
def trace(x, name):
  print(f'Shape of {name}: {x.shape}')

In [4]:
n_users = 1000
n_items = 500
n_follows = 3000
n_clicks = 5000
n_dislikes = 500
n_hetero_features = 10
n_user_classes = 5
n_max_clicks = 10

follow_src = np.random.randint(0, n_users, n_follows)
follow_dst = np.random.randint(0, n_users, n_follows)
click_src = np.random.randint(0, n_users, n_clicks)
click_dst = np.random.randint(0, n_items, n_clicks)
dislike_src = np.random.randint(0, n_users, n_dislikes)
dislike_dst = np.random.randint(0, n_items, n_dislikes)

In [5]:
hetero_graph = dgl.heterograph({
    ('user', 'click', 'item') : (click_src, click_dst),
    ('item', 'clicked_by', 'user'): (click_dst, click_src)
}, )

In [6]:
print(f'Total number of nodes {hetero_graph.num_nodes()}')
print(f'Total number of edges: {hetero_graph.num_edges()}')

Total number of nodes 1500
Total number of edges: 10000


In [7]:
# Node features
hetero_graph.nodes['user'].data['features'] = torch.randn(n_users, n_hetero_features)

# item features
hetero_graph.nodes['item'].data['features'] = torch.randn(n_items, n_hetero_features)

# Train mask
hetero_graph.nodes['user'].data['train_mask'] = torch.zeros(n_users, dtype=torch.bool).bernoulli(0.6)
hetero_graph.edges['click'].data['train_mask'] = torch.zeros(n_clicks, dtype=torch.bool).bernoulli(0.6)

In [9]:
class HeteroDotProductPredictor(nn.Module):
  def forward(self, graph, h, etype):
    # h contains the node representations for each node type computed from
    # the GNN defined in the previous section (Section 5.1).
    with graph.local_scope():
      graph.ndata['h'] = h
      graph.apply_edges(fn.u_dot_v('h', 'h', 'score'), etype=etype)
      return graph.edges[etype].data['score']

In [27]:
def construct_negative_graph(graph, k, mask, etype):
  utype, _, vtype = etype
  src, dst = graph.edges(etype=etype)
  src = src[mask]
  neg_src = src.repeat_interleave(k)
  neg_dst = torch.randint(0, graph.num_nodes(vtype), (len(src) * k,))
  return dgl.heterograph(
    {etype: (neg_src, neg_dst)},
    num_nodes_dict={ntype: graph.num_nodes(ntype) for ntype in graph.ntypes})

In [19]:
class RGCN(nn.Module):
  def __init__(self, in_feats, hid_feats, out_feats, rel_names):
    super().__init__()

    self.conv1 = dglnn.HeteroGraphConv({
        rel: dglnn.GraphConv(in_feats, hid_feats)
        for rel in rel_names}, aggregate='sum')
    self.conv2 = dglnn.HeteroGraphConv({
        rel: dglnn.GraphConv(hid_feats, out_feats)
        for rel in rel_names}, aggregate='sum')

  def forward(self, graph, inputs):
    # inputs are features of nodes
    h = self.conv1(graph, inputs)
    h = {k: F.relu(v) for k, v in h.items()}
    h = self.conv2(graph, h)
    return h

In [35]:
class Model(nn.Module):
  def __init__(self, in_features, hidden_features, out_features, rel_names):
    super().__init__()
    self.sage = RGCN(in_features, hidden_features, out_features, rel_names)
    self.pred = HeteroDotProductPredictor()
  def forward(self, g, neg_g, x, etype):
    h = self.sage(g, x)
    return self.pred(g, h, etype), self.pred(neg_g, h, etype)

In [36]:
def compute_loss(pos_score, neg_score):
    # Margin loss
    
    print(pos_score.unsqueeze(1).shape)
    n_edges = pos_score.shape[0]
    print(neg_score.view(n_edges, -1).shape)

    return (1 - pos_score.unsqueeze(1) + neg_score.view(n_edges, -1)).clamp(min=0).mean()

k = 5
model = Model(10, 20, 5, hetero_graph.etypes)
user_feats = hetero_graph.nodes['user'].data['features']
trace(user_feats,'user_feats')
item_feats = hetero_graph.nodes['item'].data['features']
trace(item_feats,'item_feats')
node_features = {'user': user_feats, 'item': item_feats}
opt = torch.optim.Adam(model.parameters())
for epoch in range(10):
  train_mask = hetero_graph.edges['click'].data['train_mask']
  negative_graph = construct_negative_graph(hetero_graph, k, train_mask, ('user', 'click', 'item'))
  pos_score, neg_score = model(hetero_graph, negative_graph, node_features, ('user', 'click', 'item'))
  break
  trace(pos_score, 'pos_score')
  trace(neg_score, 'neg_score')
  loss = compute_loss(pos_score[train_mask], neg_score)
  opt.zero_grad()
  loss.backward()
  opt.step()
  print(loss.item())

Shape of user_feats: torch.Size([1000, 10])
Shape of item_feats: torch.Size([500, 10])
torch.Size([1000, 5])
torch.Size([500, 5])


In [37]:
hetero_graph

Graph(num_nodes={'item': 500, 'user': 1000},
      num_edges={('item', 'clicked_by', 'user'): 5000, ('user', 'click', 'item'): 5000},
      metagraph=[('item', 'user', 'clicked_by'), ('user', 'item', 'click')])