In [9]:
import numpy as np
import os
import sys
import urllib.request
import tempfile
import tarfile
from pathlib import Path
import dgl
import torch


In [4]:
actor = dgl.data.ActorDataset(force_reload=True)
chameleon = dgl.data.ChameleonDataset(force_reload=True)
squirrel = dgl.data.SquirrelDataset(force_reload=True)

Done saving data into cached files.
Done saving data into cached files.
Done saving data into cached files.


In [6]:
A = actor[0].adjacency_matrix().to_dense()

In [12]:
def reciprocity(g):
    """
    Compute the reciprocity of a directed graph.
    The reciprocity of a directed graph is defined as the ratio of the number of edges
    that are reciprocated to the total number of edges in the graph.
    """
    num_edges = g.number_of_edges()
    if num_edges == 0:
        return 0.0
    A = g.adjacency_matrix().to_dense()
    
    A_T = A.T
    
    
    num_reciprocated_edges = torch.sum(A * A_T).item()/2
    
    
    return num_reciprocated_edges / num_edges


def degree_corr(g):
    in_degrees = g.in_degrees()
    out_degrees = g.out_degrees()
    
    #compute the correlation between in-degrees and out-degrees
    corr = np.corrcoef(in_degrees.numpy(), out_degrees.numpy())[0, 1]
    return corr

reciprocity_actor = reciprocity(actor[0])
reciprocity_chameleon = reciprocity(chameleon[0])
reciprocity_squirrel = reciprocity(squirrel[0])

print(f"Reciprocity of Actor dataset: {reciprocity_actor}")
print(f"Reciprocity of Chameleon dataset: {reciprocity_chameleon}")
print(f"Reciprocity of Squirrel dataset: {reciprocity_squirrel}")

degree_corr_actor = degree_corr(actor[0])
degree_corr_chameleon = degree_corr(chameleon[0])
degree_corr_squirrel = degree_corr(squirrel[0])
print(f"Degree correlation of Actor dataset: {degree_corr_actor}")
print(f"Degree correlation of Chameleon dataset: {degree_corr_chameleon}")
print(f"Degree correlation of Squirrel dataset: {degree_corr_squirrel}")

Reciprocity of Actor dataset: 0.09923332634542242
Reciprocity of Chameleon dataset: 0.13032879975623943
Reciprocity of Squirrel dataset: 0.08591579791130173
Degree correlation of Actor dataset: 0.2130348499690641
Degree correlation of Chameleon dataset: 0.3529220360075925
Degree correlation of Squirrel dataset: 0.33288354086164884


In [4]:
_DATA_URLS = {
    "squirrel_filtered":
        "https://github.com/yandex-research/heterophilous-graphs/raw/main/data/squirrel_filtered_directed.npz",
    "chameleon_filtered":
        "https://github.com/yandex-research/heterophilous-graphs/raw/main/data/chameleon_filtered_directed.npz",
}

In [6]:
name = "squirrel_filtered"
raw_dir = "~/.dgl/filtered_heterophily"
name = name.lower()
if name not in _DATA_URLS:
    raise ValueError(f"Unknown filtered dataset: {name}")

root = Path(raw_dir).expanduser()
root.mkdir(parents=True, exist_ok=True)
local_file = root / f"{name}.npz"

# download once
if not local_file.exists():
    print(f"→ downloading {name}...")
    urllib.request.urlretrieve(_DATA_URLS[name], local_file)

In [7]:
data = np.load(local_file, allow_pickle=True)

In [17]:
num_nodes = data['node_features'].shape[0]

In [35]:
u,v = data['edges'].T

In [36]:
#to numpy matrix
A = np.zeros((num_nodes, num_nodes), dtype=np.float32)
for i in range(len(u)):
    A[u[i], v[i]] = 1

In [37]:
(A.T==A).mean()  # check if the graph is undirected

0.9884987622753089

In [38]:
np.triu(A).sum(),np.tril(A).sum()  # check if the graph is directed

(36163.0, 29695.0)