### Graphs, Nodes, and edges


In [None]:
import dgl
import torch as th
# edges 0->1, 0->2, 0->3, 1->3
u, v = th.tensor([0, 0, 0, 1]), th.tensor([1, 2, 3, 3])
g = dgl.graph((u, v))
print(g) # number of nodes are inferred from the max node IDs in the given edges
# Node IDs
print(g.nodes())
# Edge end nodes
print(g.edges())
# Edge end nodes and edge IDs
print(g.edges(form='all'))
# If the node with the largest ID is isolated (meaning no edges),
# then one needs to explicitly set the number of nodes
g = dgl.graph((u, v), num_nodes=8)

In [2]:
bg = dgl.to_bidirected(g)
bg.edges()

(tensor([0, 0, 0, 1, 1, 2, 3, 3]), tensor([1, 2, 3, 0, 3, 0, 0, 1]))

In [3]:
edges = th.tensor([2, 5, 3]), th.tensor([3, 5, 0])  # edges 2->3, 5->5, 3->0
g64 = dgl.graph(edges)  # DGL uses int64 by default
print(g64.idtype)
g32 = dgl.graph(edges, idtype=th.int32)  # create a int32 graph
g32.idtype
g64_2 = g32.long()  # convert to int64
g64_2.idtype
g32_2 = g64.int()  # convert to int32
g32_2.idtype

torch.int64


torch.int32

### Node and Edge Features

In [4]:
import dgl
import torch as th
g = dgl.graph(([0, 0, 1, 5], [1, 2, 2, 0])) # 6 nodes, 4 edges
print(g)
g.ndata['x'] = th.ones(g.num_nodes(), 3)               # node feature of length 3
g.edata['x'] = th.ones(g.num_edges(), dtype=th.int32)  # scalar integer feature
print(g)
# different names can have different shapes
g.ndata['y'] = th.randn(g.num_nodes(), 5)
print(g.ndata['x'][1])                  # get node 1's feature
print(g.edata['x'][th.tensor([0, 3])])  # get features of edge 0 and 3

Graph(num_nodes=6, num_edges=4,
      ndata_schemes={}
      edata_schemes={})
Graph(num_nodes=6, num_edges=4,
      ndata_schemes={'x': Scheme(shape=(3,), dtype=torch.float32)}
      edata_schemes={'x': Scheme(shape=(), dtype=torch.int32)})
tensor([1., 1., 1.])
tensor([1, 1], dtype=torch.int32)


In [5]:
# edges 0->1, 0->2, 0->3, 1->3
edges = th.tensor([0, 0, 0, 1]), th.tensor([1, 2, 3, 3])
weights = th.tensor([0.1, 0.6, 0.9, 0.7])  # weight of each edge
g = dgl.graph(edges)
g.edata['w'] = weights  # give it a name 'w'
g

Graph(num_nodes=4, num_edges=4,
      ndata_schemes={}
      edata_schemes={'w': Scheme(shape=(), dtype=torch.float32)})

### Creating Graphs from External Sources

In [8]:
import dgl
import torch as th
import scipy.sparse as sp
spmat = sp.rand(100, 100, density=0.05) # 5% nonzero entries
print(dgl.from_scipy(spmat))                   # from SciPy

import networkx as nx
nx_g = nx.path_graph(5) # a chain 0-1-2-3-4
print(dgl.from_networkx(nx_g)) # from networkx

Graph(num_nodes=100, num_edges=500,
      ndata_schemes={}
      edata_schemes={})
Graph(num_nodes=5, num_edges=8,
      ndata_schemes={}
      edata_schemes={})


In [9]:
nxg = nx.DiGraph([(2, 1), (1, 2), (2, 3), (0, 0)])
dgl.from_networkx(nxg)

Graph(num_nodes=4, num_edges=4,
      ndata_schemes={}
      edata_schemes={})

### Heterogeneous Graphs

In [11]:
import dgl
import torch as th
# Create a heterograph with 3 node types and 3 edges types.
graph_data = {
   ('drug', 'interacts', 'drug'): (th.tensor([0, 1]), th.tensor([1, 2])),
   ('drug', 'interacts', 'gene'): (th.tensor([0, 1]), th.tensor([2, 3])),
   ('drug', 'treats', 'disease'): (th.tensor([1]), th.tensor([2]))
}
g = dgl.heterograph(graph_data)
print(g.ntypes)
print(g.etypes)
print(g.canonical_etypes)

['disease', 'drug', 'gene']
['interacts', 'interacts', 'treats']
[('drug', 'interacts', 'drug'), ('drug', 'interacts', 'gene'), ('drug', 'treats', 'disease')]


In [None]:
# A homogeneous graph
dgl.heterograph({('node_type', 'edge_type', 'node_type'): (u, v)})
# A bipartite graph
dgl.heterograph({('source_type', 'edge_type', 'destination_type'): (u, v)})

In [13]:
print(g)
print(g.metagraph().edges())

Graph(num_nodes={'disease': 3, 'drug': 3, 'gene': 4},
      num_edges={('drug', 'interacts', 'drug'): 2, ('drug', 'interacts', 'gene'): 2, ('drug', 'treats', 'disease'): 1},
      metagraph=[('drug', 'drug', 'interacts'), ('drug', 'gene', 'interacts'), ('drug', 'disease', 'treats')])
[('drug', 'drug'), ('drug', 'gene'), ('drug', 'disease')]


In [17]:
# Get the number of all nodes in the graph
print(g.num_nodes())
# Get the number of drug nodes
print(g.num_nodes('drug'))
print(g.nodes('drug'))
# Nodes of different types have separate IDs,
# hence not well-defined without a type specified
print(g.nodes())


10
3
tensor([0, 1, 2])


DGLError: Node type name must be specified if there are more than one node types.

In [19]:
# Set/get feature 'hv' for nodes of type 'drug'
g.nodes['drug'].data['hv'] = th.ones(3, 1)
print(g.nodes['drug'].data['hv'])
# Set/get feature 'he' for edge of type 'treats'
g.edges['treats'].data['he'] = th.zeros(1, 1)
print(g.edges['treats'].data['he'])

tensor([[1.],
        [1.],
        [1.]])
tensor([[0.]])


In [21]:
g = dgl.heterograph({
   ('drug', 'interacts', 'drug'): (th.tensor([0, 1]), th.tensor([1, 2])),
   ('drug', 'is similar', 'drug'): (th.tensor([0, 1]), th.tensor([2, 3]))
})
print(g.nodes())
# To set/get feature with a single type, no need to use the new syntax
g.ndata['hv'] = th.ones(4, 1)

tensor([0, 1, 2, 3])


In [22]:
g = dgl.heterograph({
   ('drug', 'interacts', 'drug'): (th.tensor([0, 1]), th.tensor([1, 2])),
   ('drug', 'interacts', 'gene'): (th.tensor([0, 1]), th.tensor([2, 3])),
   ('drug', 'treats', 'disease'): (th.tensor([1]), th.tensor([2]))
})
g.nodes['drug'].data['hv'] = th.ones(3, 1)
# Retain relations ('drug', 'interacts', 'drug') and ('drug', 'treats', 'disease')
# All nodes for 'drug' and 'disease' will be retained
eg = dgl.edge_type_subgraph(g, [('drug', 'interacts', 'drug'),
                                ('drug', 'treats', 'disease')])
print(eg)
# The associated features will be copied as well
print(eg.nodes['drug'].data['hv'])

Graph(num_nodes={'disease': 3, 'drug': 3},
      num_edges={('drug', 'interacts', 'drug'): 2, ('drug', 'treats', 'disease'): 1},
      metagraph=[('drug', 'drug', 'interacts'), ('drug', 'disease', 'treats')])
tensor([[1.],
        [1.],
        [1.]])


In [25]:
g = dgl.heterograph({
   ('drug', 'interacts', 'drug'): (th.tensor([0, 1]), th.tensor([1, 2])),
   ('drug', 'treats', 'disease'): (th.tensor([1]), th.tensor([2]))})
g.nodes['drug'].data['hv'] = th.zeros(3, 1)
g.nodes['disease'].data['hv'] = th.ones(3, 1)
g.edges['interacts'].data['he'] = th.zeros(2, 1)
g.edges['treats'].data['he'] = th.zeros(1, 2)
# By default, it does not merge any features
hg = dgl.to_homogeneous(g)
print('hv' in hg.ndata)
# Copy node features
hg = dgl.to_homogeneous(g, ndata=['hv'])
print(hg.ndata['hv'])
# Copy edge features
# For feature copy, it expects features to have
# the same size and dtype across node/edge types
hg = dgl.to_homogeneous(g, edata=['he'])


False
tensor([[1.],
        [1.],
        [1.],
        [0.],
        [0.],
        [0.]])


DGLError: Cannot concatenate column he with shape Scheme(shape=(2,), dtype=torch.float32) and shape Scheme(shape=(1,), dtype=torch.float32)

In [26]:
# Order of node types in the heterograph
print(g.ntypes)
# Original node types
print(hg.ndata[dgl.NTYPE])
# Original type-specific node IDs
print(hg.ndata[dgl.NID])
# Order of edge types in the heterograph
print(g.etypes)
# Original edge types
print(hg.edata[dgl.ETYPE])
# Original type-specific edge IDs
print(hg.edata[dgl.EID])

['disease', 'drug']
tensor([0, 0, 0, 1, 1, 1])
tensor([0, 1, 2, 0, 1, 2])
['interacts', 'treats']
tensor([0, 0, 1])
tensor([0, 1, 0])


In [27]:
g = dgl.heterograph({
   ('drug', 'interacts', 'drug'): (th.tensor([0, 1]), th.tensor([1, 2])),
   ('drug', 'interacts', 'gene'): (th.tensor([0, 1]), th.tensor([2, 3])),
   ('drug', 'treats', 'disease'): (th.tensor([1]), th.tensor([2]))
})
sub_g = dgl.edge_type_subgraph(g, [('drug', 'interacts', 'drug'),
                                   ('drug', 'interacts', 'gene')])
h_sub_g = dgl.to_homogeneous(sub_g)
h_sub_g

Graph(num_nodes=7, num_edges=4,
      ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64), '_TYPE': Scheme(shape=(), dtype=torch.int64)}
      edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64), '_TYPE': Scheme(shape=(), dtype=torch.int64)})