# Stellargraph example: Load CORA via NetworkX

Import NetworkX and stellar:

In [1]:
import networkx as nx
import pandas as pd
import numpy as np
import os

import stellargraph as sg

Load the CORA network:

In [2]:
data_dir = os.path.expanduser("~/data/cora")

In [3]:
edgelist = pd.read_csv(os.path.join(data_dir, "cora.cites"), sep='\t', header=None, names=["target", "source"])
edgelist["label"] = "cites"

In [4]:
feature_names = ["w_{}".format(ii) for ii in range(1433)]
column_names =  feature_names + ["subject"]
node_data = pd.read_csv(os.path.join(data_dir, "cora.content"), sep='\t', header=None, names=column_names)

Create a StellarGraph object via a NetworkX graph of the data:

In [5]:
def create_stellargraph_nx(edge_data, node_data, feature_names):
    Gnx = nx.from_pandas_edgelist(edge_data, edge_attr="label")
    nx.set_node_attributes(Gnx, "paper", "label")
    node_features = node_data[feature_names]
    G = sg.StellarGraph(Gnx, node_features=node_features)
    return G

Time the creation of NetworkX-based StellarGraph object:

In [6]:
%timeit create_stellargraph_nx(edgelist, node_data, feature_names)

77.3 ms ± 363 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [7]:
G_nx = create_stellargraph_nx(edgelist, node_data, feature_names)

Time the sampling of node features:

In [8]:
Nsamp = 500
Nloop = 10

In [9]:
def sample_features_stellargraph_nx(graph, nodes):
    if nodes is None:
        nodes = graph.nodes()
    if not isinstance(nodes, list):
        nodes = list(nodes)
    sample_ids = np.random.choice(nodes, Nsamp, replace=False)
    for n in range(Nloop):
        batch = graph.get_feature_for_nodes(sample_ids)

In [10]:
%timeit sample_features_stellargraph_nx(G_nx, G_nx.nodes())

97.5 ms ± 1.17 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


Now time the querying of node neighbours:

In [11]:
def sample_neighbours_stellargraph_nx(graph, nodes):
    if nodes is None:
        nodes = graph.nodes()
    for n in range(Nloop):
        for node_id in nodes:
            neighbours = graph.neighbors(node_id)

In [12]:
%timeit sample_neighbours_stellargraph_nx(G_nx, G_nx.nodes())

6.25 ms ± 121 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


And again with converting the raw NetworkX neighbours to a usable list:

In [13]:
def sample_neighbours_list_stellargraph_nx(graph, nodes):
    if nodes is None:
        nodes = graph.nodes()
    for n in range(Nloop):
        for node_id in nodes:
            neighbours = list(graph.neighbors(node_id))

In [14]:
%timeit sample_neighbours_list_stellargraph_nx(G_nx, G_nx.nodes())

12.4 ms ± 67.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


Now time feature sampling of neighbours:

In [15]:
def sample_neighbour_features_stellargraph_nx(graph, nodes):
    if nodes is None:
        nodes = graph.nodes()
    if not isinstance(nodes, list):
        nodes = list(nodes)
    sample_ids = np.random.choice(nodes, Nsamp, replace=False)
    for n in range(Nloop):
        for node_id in sample_ids:
            neighbours = list(graph.neighbors(node_id))
            batch = graph.get_feature_for_nodes(neighbours)

In [16]:
%timeit sample_neighbour_features_stellargraph_nx(G_nx, G_nx.nodes())

387 ms ± 10.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


Now look at the new homogeneous StellarGraph:

In [17]:
from stellargraph.core.graph_homogeneous import HomogeneousStellarGraph

In [18]:
def create_stellargraph_h(edge_data, node_data, feature_names):
    G = HomogeneousStellarGraph(edge_data, node_data, node_features=feature_names)
    return G

Time the creation of homogeneous StellarGraph object:

In [19]:
%timeit create_stellargraph_h(edgelist, node_data, feature_names)

28.6 ms ± 278 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [20]:
G_h = create_stellargraph_h(edgelist, node_data, feature_names)

Time the sampling of node features:

In [21]:
def sample_features_stellargraph_h(graph, nodes):
    if nodes is None:
        nodes = graph.nodes()
    if not isinstance(nodes, list):
        nodes = list(nodes)
    sample_ids = np.random.choice(nodes, Nsamp, replace=False)
    for n in range(Nloop):
        batch = graph.node_features(sample_ids)

In [22]:
%timeit sample_features_stellargraph_h(G_h, G_h.nodes())

86.6 ms ± 477 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


Time querying of node neighbourhoods:

In [23]:
def sample_neighbours_stellargraph_h(graph, nodes):
    if nodes is None:
        nodes = graph.nodes()
    for n in range(Nloop):
        for node_id in nodes:
            neighbours = graph.neighbour_nodes(node_id)

In [24]:
%timeit sample_neighbours_stellargraph_h(G_h, G_h.nodes())

20.5 ms ± 648 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


Now time feature sampling of neighbours:

In [25]:
def sample_neighbour_features_stellargraph_h(graph, nodes):
    if nodes is None:
        nodes = graph.nodes()
    if not isinstance(nodes, list):
        nodes = list(nodes)
    sample_ids = np.random.choice(nodes, Nsamp, replace=False)
    for n in range(Nloop):
        for node_id in sample_ids:
            neighbours = graph.neighbour_nodes(node_id)
            batch = graph.node_features(neighbours)

In [26]:
%timeit sample_neighbour_features_stellargraph_h(G_h, G_h.nodes())

353 ms ± 22.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
