## 🏃 Getting Started

In [None]:
# imports
from approve.models import HeteroAPPR
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import os
import torch
from torch_geometric.data import HeteroData
from torch_geometric.utils.convert import to_networkx

To demonstrate our heterogeneous personalized PageRank algorithm, consider the following toy-model of a citation network consisting of three papers and two venues.

In [None]:
hetero_data = HeteroData()
hetero_data['paper', 'cites', 'paper'].edge_index = torch.tensor(
    [[1, 2, 2],
     [0, 0, 1]]
)
hetero_data['venue', 'publishes', 'paper'].edge_index = torch.tensor(
    [[0, 1],
     [0, 1]]
)
hetero_data['paper', 'rev_publishes', 'venue'].edge_index = \
    hetero_data['venue', 'publishes', 'paper'].edge_index[[1,0]]
hetero_data['paper'].num_nodes = 3
hetero_data['venue'].num_nodes = 2

In [None]:
data = hetero_data.to_homogeneous()
g = to_networkx(data)
pos = {0: [0, 1], 1: [0, 0], 2: [1, 1], 3: [-1, 1], 4:[-1, 0]}
labels = {0: 0, 1: 1, 2: 2, 3: 0, 4: 1}

# set figure size
plt.figure(dpi=144, figsize=(6, 3))

# draw nodes
nx.draw_networkx_nodes(g, pos, label='paper', nodelist=[0, 1, 2], 
                       node_color='tab:blue', edgecolors='black')
nx.draw_networkx_nodes(g, pos, label='venue', nodelist=[3, 4], 
                       node_color='tab:purple', edgecolors='black')

# draw labels
nx.draw_networkx_labels(g, pos, labels, font_color='whitesmoke')

# draw edges
nx.draw_networkx_edges(g, pos, edgelist=[(1, 0), (2, 0), (2, 1)],
                       edge_color="tab:blue")
nx.draw_networkx_edges(g, pos, edgelist=[(3, 0), (4, 1), (0, 3), (1, 4)],
                       edge_color="tab:purple")

# display legend
legend = plt.legend(loc='center left')
for legend_handle in legend.legend_handles:
    legend_handle._sizes = [100]

# remove axis
plt.axis('off') 

# save figure
plt.savefig(os.path.join(os.getcwd(), 'citation_network'), 
            bbox_inches="tight")

Paper `0` is cited by the other two papers and published by venue `0`, while paper `1` is cited by paper `2` and published by venue `1`.

To compute the type-level PageRank score of each node, we initially assign uniform scores to all nodes of a given type. Since there are three papers and two venues, we assign each paper a third of the total `'paper'` score and each venue half of the total `'venue'` score.

In [None]:
hetero_data['paper'].x = torch.full((3, 1), 1 / 3)
hetero_data['venue'].x = torch.full((2, 1), 1 / 2)

Furthermore, we need to add self-loops to `'paper'` nodes, and a special edge from paper `2` (which is unpublished) to a special `'venue'` node. The addition of the self-loops and special edge prevents the scores for each node type from leaking.

The `approve.models.HeteroAPPR` model takes care of all these considerations and can be used to compute the score of each node as follows.

In [None]:
model = HeteroAPPR(K=30)
output = model(
    hetero_data.x_dict, 
    edge_index_dict=hetero_data.edge_index_dict,
)
output

Unsurprisingly, paper `0` is the most important paper, since it is cited by the other two papers. Venues `0` and `1` have comparable scores; venue `0`'s score is slighlty larger than venue `1`'s score, because venue `0` publishes a higher-ranked paper than the paper published by venue `1`. Venue `2`, the special `'venue'` node, has a comparably low score because it relates to the lowest ranked paper.

In [None]:
hetero_data_update = HeteroData()
for node_type, x in output.items():
    hetero_data_update[node_type].x = x
for edge_type, edge_index in model._norm[0].items():
    hetero_data_update[edge_type].edge_index = edge_index
for node_type, special in model._norm[3].items():
    hetero_data_update[node_type].special = special

In [None]:
data = hetero_data_update.to_homogeneous()
g = to_networkx(data)
pos = {0: [0, 1], 1: [0, 0], 2: [1, 1], 3: [-1, 1], 4:[-1, 0], 5: [1,0]}
labels = {0: 0, 1: 1, 2: 2, 3: 0, 4: 1, 5: 2}

# set figure size
plt.figure(dpi=144, figsize=(6, 3))

# draw nodes
nx.draw_networkx_nodes(g, pos, nodelist=[5],
                       node_color='white', node_size=500, edgecolors='tab:red')
nx.draw_networkx_nodes(g, pos, label='paper', nodelist=[0, 1, 2],
                       node_color='tab:blue', edgecolors='black')
nx.draw_networkx_nodes(g, pos, label='venue', nodelist=[3, 4, 5], 
                       node_color='tab:purple', edgecolors='black')

# draw labels
nx.draw_networkx_labels(g, pos, labels, font_color='whitesmoke')

# draw edges
nx.draw_networkx_edges(g, pos, edgelist=[(1, 0), (2, 0), (2, 1)],
                       edge_color="tab:blue")
nx.draw_networkx_edges(g, pos, edgelist=[(3, 0), (4, 1), (0, 3), (1, 4)],
                       edge_color="tab:purple")
nx.draw_networkx_edges(g, pos, 
                       edgelist=[(0, 0), (1, 1), (2, 2), (2, 5), (5, 2)],
                       edge_color="tab:red")

# display legend
legend = plt.legend(loc='center left')
for legend_handle in legend.legend_handles:
    legend_handle._sizes = [100]

# remove axis
plt.axis('off') 

# save figure
plt.savefig(os.path.join(os.getcwd(), 'citation_network_updated'), 
            bbox_inches="tight")