In [2]:
import dgl
from dgl import nn as gnn
import dgl.function as gf

import torch
import torch.nn as nn
import torch.nn.functional as F

import numpy as np
import scipy

Using backend: pytorch


# Manipulate features in heto graph

In [3]:
data = {('drug', 'interacts', 'drug'): (torch.LongTensor([0, 1]), torch.LongTensor([1, 2])), 
        ('drug', 'interacts', 'gene'):(torch.LongTensor([0, 1]), torch.LongTensor([2, 3])), 
        ('drug', 'treats', 'disease'): (torch.LongTensor([0]), torch.LongTensor([2]))}
data.keys()

dict_keys([('drug', 'interacts', 'drug'), ('drug', 'interacts', 'gene'), ('drug', 'treats', 'disease')])

In [4]:
g = dgl.heterograph(data)
g

Graph(num_nodes={'disease': 3, 'drug': 3, 'gene': 4},
      num_edges={('drug', 'interacts', 'drug'): 2, ('drug', 'interacts', 'gene'): 2, ('drug', 'treats', 'disease'): 1},
      metagraph=[('drug', 'drug', 'interacts'), ('drug', 'gene', 'interacts'), ('drug', 'disease', 'treats')])

In [5]:
g.ntypes, g.etypes, g.canonical_etypes

(['disease', 'drug', 'gene'],
 ['interacts', 'interacts', 'treats'],
 [('drug', 'interacts', 'drug'),
  ('drug', 'interacts', 'gene'),
  ('drug', 'treats', 'disease')])

In [6]:
g.ndata, g.edata

({'disease': {}, 'drug': {}, 'gene': {}},
 {('drug', 'interacts', 'drug'): {}, ('drug', 'interacts', 'gene'): {}, ('drug', 'treats', 'disease'): {}})

In [7]:
### update with ntype-> feature name
g.nodes['drug'].data['a'] = torch.tensor([2., 3, 5])
g.nodes['drug'].data.update({'a': torch.tensor([2., 3, 5])})

In [8]:
g.nodes['drug'].data

{'a': tensor([2., 3., 5.])}

In [9]:
### cant be used to view nodes' feature. has to view feature-> nodes
g.ndata['drug']

{}

In [10]:
### update with feature name -> node type
g.ndata['h'] = {'drug': torch.zeros(3, 2)}
g.ndata['h'] = {'gene': torch.ones(4, 2)}

In [11]:
##accessing feature name -> node type
print(g.ndata['h']['drug'])
print(g.ndata['a']['drug'])

tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])
tensor([2., 3., 5.])


In [12]:
### accessing nodes type -> feature name
print(g.nodes['drug'].data['h'])
print(g.nodes['drug'].data['a'])
type(g.nodes['drug'])

tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])
tensor([2., 3., 5.])


dgl.view.NodeSpace

In [13]:
## get nodes id
g.nodes('drug')

tensor([0, 1, 2])

In [14]:
g.canonical_etypes

[('drug', 'interacts', 'drug'),
 ('drug', 'interacts', 'gene'),
 ('drug', 'treats', 'disease')]

In [15]:
#### SUBGRAPH BY EDGE TYPE
eg = dgl.edge_type_subgraph(g, g.canonical_etypes[:2])

In [16]:
eg

Graph(num_nodes={'drug': 3, 'gene': 4},
      num_edges={('drug', 'interacts', 'drug'): 2, ('drug', 'interacts', 'gene'): 2},
      metagraph=[('drug', 'drug', 'interacts'), ('drug', 'gene', 'interacts')])

# Synthetic dataset 

In [17]:
n_users = 1000
n_items = 3000
n_follows = 5000
n_clicks = 7000
n_dislike = 10000

user_feat_dim = 20
item_feat_dim = 30
click_feat_dim = 25
follow_feat_dim = 45
dislike_feat_dim = 55


src_follow = torch.randint(0, n_users, (n_follows,))
dst_follow = torch.randint(0, n_users, (n_follows,))
src_click = torch.randint(0, n_users, (n_clicks,))
dst_click = torch.randint(0, n_items, (n_clicks,))
src_dislike = torch.randint(0, n_users, (n_dislike,))
dst_dislike = torch.randint(0, n_items, (n_dislike,))

#### this kind of graph init in hetero is sooo horrible

data = {('user', 'follow', 'user'): (src_follow, dst_follow),
        ('user', 'click', 'item'): (src_click, dst_click),
        ('user', 'dislike', 'item'): (src_dislike, dst_dislike),
        ('user', 'followed_by', 'user'): (dst_follow, src_follow),
        ('item', 'clicked_by', 'user'): (dst_click, src_click),
        ('item', 'disliked_by', 'user'): (dst_dislike, src_dislike)}

g = dgl.heterograph(data)

In [18]:
####CREATE FEATURES FOR NODES AND EDGES
def random_feats(name, dim, node = True):
    if node:
        g.nodes[name].data['feats'] = nn.Parameter(torch.randn(g.num_nodes(name), dim))
    else:
        g.edges[name].data['feats'] = nn.Parameter(torch.randn(g.num_edges(name), dim))

## ? do edge type followed and followed by should have the same features?
## ? does edge type should have only 1 features for all num_edge(edge_name?) 
for name, dim in zip(['user', 'item'], [user_feat_dim, item_feat_dim]):
    random_feats(name, dim, node = True )


# Node Classification HeretoGraphConv

In [19]:
####### HETEROGRAPHCONV

In [21]:
def reset_params(net):
    for param in net.parameters():
        nn.init.ones_(param)

In [22]:
### GRAPTHCONV layer # using built in layer in library
he_conv = gnn.HeteroGraphConv(mods = {rel: gnn.GraphConv(num_in, 15) for 
                                      rel, num_in in zip(g.etypes, [30, 30, 20, 20, 20, 20])},
                             aggregate= 'sum')
reset_params(he_conv)

In [23]:
a = nn.Parameter(torch.Tensor(3, 4))
nn.init.ones_(a)
a

Parameter containing:
tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]], requires_grad=True)

In [24]:
inputs = {'item': g.nodes['item'].data['feats']}
a  = he_conv(g, inputs)

a.keys()

dict_keys(['user'])

In [25]:
class HeteGraphConv(nn.Module):
    def __init__(self, rel_mods):
        super().__init__()
        self.rel_mods = nn.ModuleDict(rel_mods)
    
    def forward(self, g, inputs):
        outs = {key: [] for key in g.dsttypes}
        
        for rel, gconv in self.rel_mods.items():
            eg = g[rel]
            src_name = eg.srctypes[0]
            if src_name in inputs.keys():
                outs[eg.dsttypes[0]].append(gconv(eg, inputs[eg.srctypes[0]]))
        return {key: torch.stack(value, dim = 0).sum(0) for 
                key, value in outs.items() if len(value) > 0}                    

In [26]:
### GRAPTHCONV layer # using built in layer in library
he_conv1 = HeteGraphConv(rel_mods = {rel: gnn.GraphConv(num_in, 15, allow_zero_in_degree= True) for 
                        rel, num_in in zip(g.etypes, [30, 30, 20, 20, 20, 20])})
reset_params(he_conv1)

In [27]:
inputs1 = {'item': g.nodes['item'].data['feats']}
a1  = he_conv1(g, inputs)

a1.keys()

dict_keys(['user'])

In [28]:
(a1['user'] == a['user']).all()

tensor(True)

In [411]:
########### RELGRAPH
### rel graph is actually heretographconv with learnable matrix decomposition.
# which can be done for each relation at each layer seperately. 
# And the aggregation of the node itself can be done out side of relational aggregation

# Edge classification

In [41]:
g1 = g['user', :, 'item']; g1

Graph(num_nodes={'user': 1000, 'item': 3000},
      num_edges={('user', 'click+dislike', 'item'): 17000},
      metagraph=[('user', 'item', 'click+dislike')])

# AIFB dataset - heto as homo

In [126]:
from dgl.contrib.data import load_data
data = load_data('aifb')

  return f(*args, **kwds)


Downloading /home/phan.huy.hoang/.dgl/aifb.tgz from https://data.dgl.ai/dataset/aifb.tgz...
Loading dataset aifb
Number of nodes:  8285
Number of edges:  66371
Number of relations:  91
Number of classes:  4
removing nodes that are more than 3 hops away


In [152]:
# why is num_rels different from num edgetype
data.labels.shape, data.edge_dst.shape, data.edge_src.shape, max(data.edge_type)+1, data.num_rels

((8285, 1), (65439,), (65439,), 89, 91)

In [149]:
### what is edge norm?
data.edge_norm

array([1.        , 1.        , 1.        , ..., 0.03703704, 0.03703704,
       1.        ], dtype=float32)

In [155]:
###### CREATE GRAPH
g = dgl.graph((data.edge_src, data.edge_dst), )

<bound method DGLHeteroGraph.num_nodes of Graph(num_nodes=8285, num_edges=65439,
      ndata_schemes={}
      edata_schemes={})>