In [71]:
import numpy as np
import scipy as sp
import torch

from numba import jit

from numba import jit


from ogb.lsc import MAG240MDataset
from ogb.nodeproppred import NodePropPredDataset
from tqdm.auto import tqdm

from cogdl.data import Graph


In [50]:


@jit(nopython=True, fastmath=True)
def random_walk_step(transition_matrix: np.ndarray, energy_vector: np.ndarray) -> np.ndarray:
    energy_vector = np.dot(transition_matrix, energy_vector)  # Power method
    energy_vector = energy_vector / np.linalg.norm(energy_vector)  # Normalize
    return energy_vector


@jit(nopython=True, fastmath=True)
def embed_point_using_random_walk(transition_matrix: np.ndarray, starting_point: int, n_steps: int):
    r = np.zeros(transition_matrix.shape[0])
    r[starting_point] = 1
    for _ in range(n_steps):
        r = random_walk_step(transition_matrix, r)
    return r


@jit(nopython=True, fastmath=True)
def compute_point_embeddings(transition_matrix: np.ndarray, point_indices: np.ndarray, n_steps: int, embeddings: np.ndarray):
    for point_index in point_indices:
        r = np.zeros(transition_matrix.shape[0])
        r[point_index] = 1
        for _ in range(n_steps):
            r = random_walk_step(transition_matrix, r)
        embeddings[point_index] = r

In [9]:
data_set = NodePropPredDataset(name="ogbn-products", root="../data")

In [11]:
splitted_idx = data_set.get_idx_split()

In [12]:
graph, y = data_set[0]

In [41]:
n_points = y.shape[0]

In [42]:
n_points

2449029

In [16]:
graph["node_feat"].shape

(2449029, 100)

In [37]:
row_indices, col_indices = graph["edge_index"][0], graph["edge_index"][1]


(123718280,)

In [58]:
entry_vals = np.ones_like(row)
adj_mat = sp.sparse.csr_matrix((entry_vals, (row_indices, col_indices)), shape=(n_points, n_points), dtype="float64")

In [59]:
np.unique(adj_mat.diagonal())

array([0., 2.])

In [60]:
adj_mat

<2449029x2449029 sparse matrix of type '<class 'numpy.float64'>'
	with 123718152 stored elements in Compressed Sparse Row format>

In [67]:
sums = adj_mat.sum(axis=1, dtype="float64").squeeze()

In [70]:
sums.shape

(1, 2449029)

In [69]:
adj_mat.data /= sums

ValueError: operands could not be broadcast together with shapes (123718152,) (1,2449029) (123718152,) 

In [None]:
transition_matrix = (S / S.sum(axis=1).reshape(-1, 1)).T

In [30]:
np.stack([row, col], axis=0)

(2, 123718280)

In [23]:
np.max(row)

2449028

In [24]:
np.max(col)

2449028

In [26]:
edge_index = torch.stack([torch.from_numpy(row), torch.from_numpy(col)], dim=0)

In [27]:
edge_index.shape

torch.Size([2, 123718280])

In [28]:
edge_index

tensor([[      0,  152857,       0,  ..., 2449028,   53324, 2449028],
        [ 152857,       0,   32104,  ...,  162836, 2449028,   53324]])