In [36]:
from torch_geometric.datasets import WikipediaNetwork
from torch_geometric.transforms import NormalizeFeatures

dataset = WikipediaNetwork(root='data/', name='chameleon', transform=NormalizeFeatures())
data = dataset[0]
print(data)


Data(x=[2277, 2325], edge_index=[2, 36101], y=[2277], train_mask=[2277, 10], val_mask=[2277, 10], test_mask=[2277, 10])


In [4]:
print(f'Dataset: {dataset.name}')
print(f"Data has {data.num_nodes} nodes, {data.num_edges} edges")
print(f"Number of node features: {data.num_node_features}")
print(f"Number of classes: {dataset.num_classes}")


Dataset: chameleon
Data has 2277 nodes, 36101 edges
Number of node features: 2325
Number of classes: 5


## Explore statistics

In [5]:
print("Contains isolated nodes:", data.has_isolated_nodes())
print("Contains self-loops:", data.has_self_loops())
print("Is directed:", data.is_directed())

Contains isolated nodes: False
Contains self-loops: True
Is directed: True


In [6]:
print(f"Node feature matrix shape: {data.x.shape}")
print(f"Edge index shape: {data.edge_index.shape}")



print("Node feature statistics:")
print(f"Min feature value: {data.x.min().item():.4f}")
print(f"Max feature value: {data.x.max().item():.4f}")
print(f"Mean feature value: {data.x.mean().item():.4f}")
print(f"Std deviation of feature values: {data.x.std().item():.4f}")

Node feature matrix shape: torch.Size([2277, 2325])
Edge index shape: torch.Size([2, 36101])
Node feature statistics:
Min feature value: 0.0000
Max feature value: 1.0000
Mean feature value: 0.0004
Std deviation of feature values: 0.0099


In [38]:
import matplotlib.pyplot as plt
import seaborn as sns

labels = data.y
label_counts = labels.bincount()
print("Label distribution:")

for i, count in enumerate(label_counts):
    print(f"Class {i}: {count.item()} nodes")
    percentage = (count.item() / data.num_nodes) * 100
    print(f"Class {i}: {percentage:.2f}%")



Label distribution:
Class 0: 456 nodes
Class 0: 20.03%
Class 1: 460 nodes
Class 1: 20.20%
Class 2: 453 nodes
Class 2: 19.89%
Class 3: 521 nodes
Class 3: 22.88%
Class 4: 387 nodes
Class 4: 17.00%


## Features

In [42]:
y = data.y
edge_index = data.edge_index
row, col = edge_index

same_label = (y[row] == y[col]).sum().item()

total_edges = edge_index.size(1)

homophily = same_label / total_edges
homophily

0.23500734051688318

### Homophily

In [43]:
import torch
from torch_geometric.datasets import Planetoid, WikipediaNetwork
from torch_geometric.transforms import NormalizeFeatures

def compute_homophily(data):
    edge_index = data.edge_index
    row, col = edge_index
    y = data.y
    same_label = (y[row] == y[col]).sum().item()
    total_edges = edge_index.size(1)
    return same_label / total_edges

# Load datasets
cora_dataset = Planetoid(root='data/Cora', name='Cora', transform=NormalizeFeatures())
chameleon_dataset = WikipediaNetwork(root='data/Chameleon', name='chameleon', transform=NormalizeFeatures())

# Extract data objects
cora_data = cora_dataset[0]
chameleon_data = chameleon_dataset[0]

# Compute homophily
cora_homophily = compute_homophily(cora_data)
chameleon_homophily = compute_homophily(chameleon_data)

# Print comparison
print(f"Cora Homophily:      {cora_homophily:.4f}")
print(f"Chameleon Homophily: {chameleon_homophily:.4f}")


Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!
Downloading https://raw.githubusercontent.com/graphdml-uiuc-jlu/geom-gcn/f1fc0d14b3b019c562737240d06ec83b07d16a8f/new_data/chameleon/out1_node_feature_label.txt
Downloading https://raw.githubusercontent.com/graphdml-uiuc-jlu/geom-gcn/f1fc0d14b3b019c562737240d06ec83b07d16a8f/new_data/chameleon/out1_graph_edges.txt
Downloading https://

Cora Homophily:      0.8100
Chameleon Homophily: 0.2350


Done!


## Run some algorithms
### GCN

1. two more datasets
2. two attack models 
3. GCN