Merge branch 'master' into triplet_sampling

pyg-team · Nov 23, 2022 · d344788 · d344788
2 parents fd6ec66 + 4c1c66f
commit d344788
Show file tree

Hide file tree

Showing 25 changed files with 1,312 additions and 222 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 ## [2.2.0] - 2022-MM-DD
 ### Added
 - Added triplet sampling in `LinkNeighborLoader` ([#6004](https://github.com/pyg-team/pytorch_geometric/pull/6004))
+- Added `FusedAggregation` of simple scatter reductions ([#6036](https://github.com/pyg-team/pytorch_geometric/pull/6036))
+- Added `HeteroData` support for `to_captum_model` and added `to_captum_input` ([#5934](https://github.com/pyg-team/pytorch_geometric/pull/5934))
+- Added `HeteroData` support in `RandomNodeLoader` ([#6007](https://github.com/pyg-team/pytorch_geometric/pull/6007))
+- Added bipartite `GraphSAGE` example ([#5834](https://github.com/pyg-team/pytorch_geometric/pull/5834))
 - Added `LRGBDataset` to include 5 datasets from the [Long Range Graph Benchmark](https://openreview.net/pdf?id=in7XC5RcjEn) ([#5935](https://github.com/pyg-team/pytorch_geometric/pull/5935))
 - Added a warning for invalid node and edge type names in `HeteroData` ([#5990](https://github.com/pyg-team/pytorch_geometric/pull/5990))
 - Added PyTorch 1.13 support ([#5975](https://github.com/pyg-team/pytorch_geometric/pull/5975))
@@ -15,7 +19,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Add `to_fixed_size` graph transformer ([#5939](https://github.com/pyg-team/pytorch_geometric/pull/5939))
 - Add support for symbolic tracing of `SchNet` model ([#5938](https://github.com/pyg-team/pytorch_geometric/pull/5938))
 - Add support for customizable interaction graph in `SchNet` model ([#5919](https://github.com/pyg-team/pytorch_geometric/pull/5919))
-- Started adding `torch.sparse` support to PyG ([#5906](https://github.com/pyg-team/pytorch_geometric/pull/5906), [#5944](https://github.com/pyg-team/pytorch_geometric/pull/5944))
+- Started adding `torch.sparse` support to PyG ([#5906](https://github.com/pyg-team/pytorch_geometric/pull/5906), [#5944](https://github.com/pyg-team/pytorch_geometric/pull/5944), [#6003](https://github.com/pyg-team/pytorch_geometric/pull/6003))
 - Added `HydroNet` water cluster dataset ([#5537](https://github.com/pyg-team/pytorch_geometric/pull/5537), [#5902](https://github.com/pyg-team/pytorch_geometric/pull/5902), [#5903](https://github.com/pyg-team/pytorch_geometric/pull/5903))
 - Added explainability support for heterogeneous GNNs ([#5886](https://github.com/pyg-team/pytorch_geometric/pull/5886))
 - Added `SparseTensor` support to `SuperGATConv` ([#5888](https://github.com/pyg-team/pytorch_geometric/pull/5888))

diff --git a/examples/captum_explainability.py b/examples/captum_explainability.py
@@ -7,7 +7,7 @@
 
 import torch_geometric.transforms as T
 from torch_geometric.datasets import Planetoid
-from torch_geometric.nn import Explainer, GCNConv, to_captum
+from torch_geometric.nn import Explainer, GCNConv, to_captum_model
 
 dataset = 'Cora'
 path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'Planetoid')
@@ -49,7 +49,7 @@ def forward(self, x, edge_index):
 
 # Captum assumes that for all given input tensors, dimension 0 is
 # equal to the number of samples. Therefore, we use unsqueeze(0).
-captum_model = to_captum(model, mask_type='edge', output_idx=output_idx)
+captum_model = to_captum_model(model, mask_type='edge', output_idx=output_idx)
 edge_mask = torch.ones(data.num_edges, requires_grad=True, device=device)
 
 ig = IntegratedGradients(captum_model)
@@ -69,7 +69,7 @@ def forward(self, x, edge_index):
 # Node explainability
 # ===================
 
-captum_model = to_captum(model, mask_type='node', output_idx=output_idx)
+captum_model = to_captum_model(model, mask_type='node', output_idx=output_idx)
 
 ig = IntegratedGradients(captum_model)
 ig_attr_node = ig.attribute(data.x.unsqueeze(0), target=target,
@@ -88,8 +88,8 @@ def forward(self, x, edge_index):
 # Node and edge explainability
 # ============================
 
-captum_model = to_captum(model, mask_type='node_and_edge',
-                         output_idx=output_idx)
+captum_model = to_captum_model(model, mask_type='node_and_edge',
+                               output_idx=output_idx)
 
 ig = IntegratedGradients(captum_model)
 ig_attr_node, ig_attr_edge = ig.attribute(

diff --git a/examples/hetero/bipartite_sage.py b/examples/hetero/bipartite_sage.py
@@ -0,0 +1,163 @@
+import os.path as osp
+
+import torch
+import torch.nn.functional as F
+from torch.nn import Embedding, Linear
+
+import torch_geometric.transforms as T
+from torch_geometric.datasets import MovieLens
+from torch_geometric.nn import SAGEConv
+from torch_geometric.nn.conv.gcn_conv import gcn_norm
+
+path = osp.join(osp.dirname(osp.realpath(__file__)), '../../data/MovieLens')
+dataset = MovieLens(path, model_name='all-MiniLM-L6-v2')
+data = dataset[0]
+data['user'].x = torch.arange(data['user'].num_nodes)
+data['user', 'movie'].edge_label = data['user', 'movie'].edge_label.float()
+
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+data = data.to(device)
+
+# Add a reverse ('movie', 'rev_rates', 'user') relation for message passing:
+data = T.ToUndirected()(data)
+del data['movie', 'rev_rates', 'user'].edge_label  # Remove "reverse" label.
+
+# Perform a link-level split into training, validation, and test edges:
+train_data, val_data, test_data = T.RandomLinkSplit(
+    num_val=0.1,
+    num_test=0.1,
+    neg_sampling_ratio=0.0,
+    edge_types=[('user', 'rates', 'movie')],
+    rev_edge_types=[('movie', 'rev_rates', 'user')],
+)(data)
+
+# Generate the co-occurence matrix of movies<>movies:
+metapath = [('movie', 'rev_rates', 'user'), ('user', 'rates', 'movie')]
+train_data = T.AddMetaPaths(metapaths=[metapath])(train_data)
+
+# Apply normalization to filter the metapath:
+_, edge_weight = gcn_norm(
+    train_data['movie', 'movie'].edge_index,
+    num_nodes=train_data['movie'].num_nodes,
+    add_self_loops=False,
+)
+edge_index = train_data['movie', 'movie'].edge_index[:, edge_weight > 0.002]
+
+train_data['movie', 'metapath_0', 'movie'].edge_index = edge_index
+val_data['movie', 'metapath_0', 'movie'].edge_index = edge_index
+test_data['movie', 'metapath_0', 'movie'].edge_index = edge_index
+
+
+class MovieGNNEncoder(torch.nn.Module):
+    def __init__(self, hidden_channels, out_channels):
+        super().__init__()
+
+        self.conv1 = SAGEConv(-1, hidden_channels)
+        self.conv2 = SAGEConv(hidden_channels, hidden_channels)
+        self.lin = Linear(hidden_channels, out_channels)
+
+    def forward(self, x, edge_index):
+        x = self.conv1(x, edge_index).relu()
+        x = self.conv2(x, edge_index).relu()
+        return self.lin(x)
+
+
+class UserGNNEncoder(torch.nn.Module):
+    def __init__(self, hidden_channels, out_channels):
+        super().__init__()
+        self.conv1 = SAGEConv((-1, -1), hidden_channels)
+        self.conv2 = SAGEConv((-1, -1), hidden_channels)
+        self.conv3 = SAGEConv((-1, -1), hidden_channels)
+        self.lin = Linear(hidden_channels, out_channels)
+
+    def forward(self, x_dict, edge_index_dict):
+        movie_x = self.conv1(
+            x_dict['movie'],
+            edge_index_dict[('movie', 'metapath_0', 'movie')],
+        ).relu()
+
+        user_x = self.conv2(
+            (x_dict['movie'], x_dict['user']),
+            edge_index_dict[('movie', 'rev_rates', 'user')],
+        ).relu()
+
+        user_x = self.conv3(
+            (movie_x, user_x),
+            edge_index_dict[('movie', 'rev_rates', 'user')],
+        ).relu()
+
+        return self.lin(user_x)
+
+
+class EdgeDecoder(torch.nn.Module):
+    def __init__(self, hidden_channels):
+        super().__init__()
+        self.lin1 = Linear(2 * hidden_channels, hidden_channels)
+        self.lin2 = Linear(hidden_channels, 1)
+
+    def forward(self, z_src, z_dst, edge_label_index):
+        row, col = edge_label_index
+        z = torch.cat([z_src[row], z_dst[col]], dim=-1)
+
+        z = self.lin1(z).relu()
+        z = self.lin2(z)
+        return z.view(-1)
+
+
+class Model(torch.nn.Module):
+    def __init__(self, num_users, hidden_channels, out_channels):
+        super().__init__()
+        self.user_emb = Embedding(num_users, hidden_channels)
+        self.user_encoder = UserGNNEncoder(hidden_channels, out_channels)
+        self.movie_encoder = MovieGNNEncoder(hidden_channels, out_channels)
+        self.decoder = EdgeDecoder(hidden_channels)
+
+    def forward(self, x_dict, edge_index_dict, edge_label_index):
+        z_dict = {}
+        x_dict['user'] = self.user_emb(x_dict['user'])
+        z_dict['user'] = self.user_encoder(x_dict, edge_index_dict)
+        z_dict['movie'] = self.movie_encoder(
+            x_dict['movie'],
+            edge_index_dict[('movie', 'metapath_0', 'movie')],
+        )
+        return self.decoder(z_dict['user'], z_dict['movie'], edge_label_index)
+
+
+model = Model(data['user'].num_nodes, hidden_channels=64, out_channels=64)
+model = model.to(device)
+optimizer = torch.optim.Adam(model.parameters(), lr=0.0003)
+
+
+def train():
+    model.train()
+    optimizer.zero_grad()
+    out = model(
+        train_data.x_dict,
+        train_data.edge_index_dict,
+        train_data['user', 'movie'].edge_label_index,
+    )
+    loss = F.mse_loss(out, train_data['user', 'movie'].edge_label)
+    loss.backward()
+    optimizer.step()
+    return float(loss)
+
+
+@torch.no_grad()
+def test(data):
+    model.eval()
+    out = model(
+        data.x_dict,
+        data.edge_index_dict,
+        data['user', 'movie'].edge_label_index,
+    ).clamp(min=0, max=5)
+    rmse = F.mse_loss(out, data['user', 'movie'].edge_label).sqrt()
+    return float(rmse)
+
+
+for epoch in range(1, 701):
+    loss = train()
+    train_rmse = test(train_data)
+    val_rmse = test(val_data)
+    test_rmse = test(test_data)
+    print(f'Epoch: {epoch:04d}, Loss: {loss:.4f}, Train: {train_rmse:.4f}, '
+          f'Val: {val_rmse:.4f}, Test: {test_rmse:.4f}')
diff --git a/examples/ogbn_proteins_deepgcn.py b/examples/ogbn_proteins_deepgcn.py
@@ -5,7 +5,7 @@
 from torch_scatter import scatter
 from tqdm import tqdm
 
-from torch_geometric.loader import RandomNodeSampler
+from torch_geometric.loader import RandomNodeLoader
 from torch_geometric.nn import DeepGCNLayer, GENConv
 
 dataset = PygNodePropPredDataset('ogbn-proteins', root='../data')
@@ -24,9 +24,9 @@
     mask[splitted_idx[split]] = True
     data[f'{split}_mask'] = mask
 
-train_loader = RandomNodeSampler(data, num_parts=40, shuffle=True,
-                                 num_workers=5)
-test_loader = RandomNodeSampler(data, num_parts=5, num_workers=5)
+train_loader = RandomNodeLoader(data, num_parts=40, shuffle=True,
+                                num_workers=5)
+test_loader = RandomNodeLoader(data, num_parts=5, num_workers=5)
 
 
 class DeeperGCN(torch.nn.Module):

diff --git a/examples/rev_gnn.py b/examples/rev_gnn.py
@@ -13,7 +13,7 @@
 from tqdm import tqdm
 
 import torch_geometric.transforms as T
-from torch_geometric.loader import RandomNodeSampler
+from torch_geometric.loader import RandomNodeLoader
 from torch_geometric.nn import GroupAddRev, SAGEConv
 from torch_geometric.utils import index_to_mask
 
@@ -91,11 +91,11 @@ def forward(self, x, edge_index):
 for split in ['train', 'valid', 'test']:
     data[f'{split}_mask'] = index_to_mask(split_idx[split], data.y.shape[0])
 
-train_loader = RandomNodeSampler(data, num_parts=10, shuffle=True,
-                                 num_workers=5)
+train_loader = RandomNodeLoader(data, num_parts=10, shuffle=True,
+                                num_workers=5)
 # Increase the num_parts of the test loader if you cannot fit
 # the full batch graph into your GPU:
-test_loader = RandomNodeSampler(data, num_parts=1, num_workers=5)
+test_loader = RandomNodeLoader(data, num_parts=1, num_workers=5)
 
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 model = RevGNN(

diff --git a/test/loader/test_random_node_loader.py b/test/loader/test_random_node_loader.py
@@ -0,0 +1,52 @@
+import torch
+
+from torch_geometric.data import Data, HeteroData
+from torch_geometric.loader import RandomNodeLoader
+
+
+def get_edge_index(num_src_nodes, num_dst_nodes, num_edges):
+    row = torch.randint(num_src_nodes, (num_edges, ), dtype=torch.long)
+    col = torch.randint(num_dst_nodes, (num_edges, ), dtype=torch.long)
+    return torch.stack([row, col], dim=0)
+
+
+def test_random_node_loader():
+    data = Data()
+    data.x = torch.randn(100, 128)
+    data.node_id = torch.arange(100)
+    data.edge_index = get_edge_index(100, 100, 500)
+    data.edge_attr = torch.randn(500, 32)
+
+    loader = RandomNodeLoader(data, num_parts=4, shuffle=True)
+    assert len(loader) == 4
+
+    for batch in loader:
+        assert len(batch) == 4
+        assert batch.node_id.min() >= 0
+        assert batch.node_id.max() < 100
+        assert batch.edge_index.size(1) == batch.edge_attr.size(0)
+        assert torch.allclose(batch.x, data.x[batch.node_id])
+        batch.validate()
+
+
+def test_heterogeneous_random_node_loader():
+    data = HeteroData()
+    data['paper'].x = torch.randn(100, 128)
+    data['paper'].node_id = torch.arange(100)
+    data['author'].x = torch.randn(200, 128)
+    data['author'].node_id = torch.arange(200)
+    data['paper', 'author'].edge_index = get_edge_index(100, 200, 500)
+    data['paper', 'author'].edge_attr = torch.randn(500, 32)
+    data['author', 'paper'].edge_index = get_edge_index(200, 100, 400)
+    data['author', 'paper'].edge_attr = torch.randn(400, 32)
+    data['paper', 'paper'].edge_index = get_edge_index(100, 100, 600)
+    data['paper', 'paper'].edge_attr = torch.randn(600, 32)
+
+    loader = RandomNodeLoader(data, num_parts=4, shuffle=True)
+    assert len(loader) == 4
+
+    for batch in loader:
+        assert len(batch) == 4
+        assert batch.node_types == data.node_types
+        assert batch.edge_types == data.edge_types
+        batch.validate()
diff --git a/test/nn/aggr/test_fused.py b/test/nn/aggr/test_fused.py
@@ -0,0 +1,67 @@
+import pytest
+import torch
+
+from torch_geometric.nn.aggr.fused import FusedAggregation
+from torch_geometric.nn.resolver import aggregation_resolver
+
+
+@pytest.mark.parametrize('aggrs', [
+    ['sum', 'mean', 'min', 'max', 'mul', 'var', 'std'],
+    ['sum', 'min', 'max', 'mul', 'var', 'std'],
+    ['min', 'max', 'mul', 'var', 'std'],
+    ['mean', 'min', 'max', 'mul', 'var', 'std'],
+    ['sum', 'min', 'max', 'mul', 'std'],
+    ['mean', 'min', 'max', 'mul', 'std'],
+    ['min', 'max', 'mul', 'std'],
+])
+def test_fused_aggregation(aggrs):
+    aggrs = [aggregation_resolver(aggr) for aggr in aggrs]
+
+    x = torch.randn(6, 1)
+    y = x.clone()
+    index = torch.tensor([0, 0, 1, 1, 1, 3])
+
+    x.requires_grad_(True)
+    y.requires_grad_(True)
+
+    aggr = FusedAggregation(aggrs)
+    assert str(aggr) == 'FusedAggregation()'
+    out = aggr(x, index)
+
+    expected = torch.cat([aggr(y, index) for aggr in aggrs], dim=-1)
+    assert torch.allclose(out, expected)
+
+    out.mean().backward()
+    assert x.grad is not None
+    expected.mean().backward()
+    assert y.grad is not None
+    assert torch.allclose(x.grad, y.grad)
+
+
+if __name__ == '__main__':
+    import time
+
+    x = torch.randn(50000, 64, device='cuda')
+    index = torch.randint(1000, (x.size(0), ), device='cuda')
+
+    aggrs = ['sum', 'mean', 'max', 'std']
+    aggrs = [aggregation_resolver(aggr) for aggr in aggrs]
+    fused_aggr = FusedAggregation(aggrs)
+
+    num_warmups, num_steps = (500, 1000)
+
+    for i in range(num_warmups + num_steps):
+        if i == num_warmups:
+            torch.cuda.synchronize()
+            t = time.perf_counter()
+        torch.cat([aggr(x, index, dim_size=1000) for aggr in aggrs], dim=-1)
+    torch.cuda.synchronize()
+    print(f'Vanilla implementation: {time.perf_counter() - t:.4f} seconds')
+
+    for i in range(num_warmups + num_steps):
+        if i == num_warmups:
+            torch.cuda.synchronize()
+            t = time.perf_counter()
+        fused_aggr(x, index, dim_size=1000)
+    torch.cuda.synchronize()
+    print(f'Fused implementation:   {time.perf_counter() - t:.4f} seconds')