In [1]:
import torch
from torch_geometric.datasets import DBLP
from torch_geometric.utils import to_torch_sparse_tensor

import torch_geometric.transforms as T
import torch.nn.functional as F
import graph_polluters
import matplotlib.pyplot as plt
from copy import deepcopy

path = './data/dblp'
# We initialize conference node features with a single one-vector as feature:
dataset = DBLP(path, transform=T.Constant(node_types='conference'))
data = dataset[0]
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Sparsify the graph

In [2]:
data = graph_polluters.remove_edges(data, 0.5)

## Get conferences where authors have had papers in

In [3]:
author_to_paper = to_torch_sparse_tensor(data['author', 'to', 'paper'].edge_index, size=(4057,14328)).detach().clone()

paper_to_conference = to_torch_sparse_tensor(data['paper', 'to', 'conference'].edge_index, size=(14328,20)).detach().clone()

In [4]:
author_to_conference = author_to_paper @ paper_to_conference

  author_to_conference = author_to_paper @ paper_to_conference


In [9]:
torch.mm(author_to_paper, paper_to_conference).to_dense()

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 4.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 1., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 1., 0., 0.]])

In [6]:
author_to_conference

tensor(indices=tensor([[   1,    1,    2,  ..., 4052, 4054, 4056],
                       [   2,    9,   15,  ...,   17,   17,   17]]),
       values=tensor([4., 1., 1.,  ..., 1., 1., 1.]),
       size=(4057, 20), nnz=3524, layout=torch.sparse_coo)

In [21]:
a = torch.as_tensor([1,2,3])
b = ['a', 'b', 'c']

In [25]:
a.unsqueeze(-1)

tensor([[1],
        [2],
        [3]])

## Use cosine similarity to tell wheter to add new edge

In [11]:
def make_similar_edges(adjacency_matrix, min_similarity):
    adjacency_matrix = adjacency_matrix.to(device)

    indices = torch.combinations(torch.arange(adjacency_matrix.shape[0])).to(device)
    first_embeddings = adjacency_matrix[indices[:,0]]
    second_embeddings = adjacency_matrix[indices[:,1]]
    similarities = torch.nn.functional.cosine_similarity(first_embeddings, second_embeddings, dim=1)
    return indices[similarities > min_similarity].T

In [12]:
new_edges = make_similar_edges(author_to_conference, min_similarity=0.99)
new_edges.shape

NotImplementedError: Could not run 'aten::index.Tensor' with arguments from the 'SparseCUDA' backend. This could be because the operator doesn't exist for this backend, or was omitted during the selective/custom build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, please visit https://fburl.com/ptmfixes for possible resolutions. 'aten::index.Tensor' is only available for these backends: [CPU, CUDA, HIP, MPS, IPU, XPU, HPU, VE, MTIA, PrivateUse1, PrivateUse2, PrivateUse3, Meta, FPGA, ORT, Vulkan, Metal, QuantizedCPU, QuantizedCUDA, QuantizedHIP, QuantizedMPS, QuantizedIPU, QuantizedXPU, QuantizedHPU, QuantizedVE, QuantizedMTIA, QuantizedPrivateUse1, QuantizedPrivateUse2, QuantizedPrivateUse3, QuantizedMeta, CustomRNGKeyId, MkldnnCPU, SparseCsrCPU, SparseCsrCUDA, BackendSelect, Python, FuncTorchDynamicLayerBackMode, Functionalize, Named, Conjugate, Negative, ZeroTensor, ADInplaceOrView, AutogradOther, AutogradCPU, AutogradCUDA, AutogradHIP, AutogradXLA, AutogradMPS, AutogradIPU, AutogradXPU, AutogradHPU, AutogradVE, AutogradLazy, AutogradMTIA, AutogradPrivateUse1, AutogradPrivateUse2, AutogradPrivateUse3, AutogradMeta, AutogradNestedTensor, Tracer, AutocastCPU, AutocastCUDA, FuncTorchBatched, BatchedNestedTensor, FuncTorchVmapMode, Batched, VmapMode, FuncTorchGradWrapper, PythonTLSSnapshot, FuncTorchDynamicLayerFrontMode, PreDispatch, PythonDispatcher].

Undefined: registered at aten/src/ATen/RegisterCompositeExplicitAutogradNonFunctional.cpp:21592 [default backend kernel]
CPU: registered at aten/src/ATen/RegisterCPU.cpp:31357 [kernel]
CUDA: registered at aten/src/ATen/RegisterCUDA.cpp:44411 [kernel]
HIP: registered at aten/src/ATen/RegisterCompositeExplicitAutogradNonFunctional.cpp:21592 [default backend kernel]
MPS: registered at aten/src/ATen/RegisterCompositeExplicitAutogradNonFunctional.cpp:21592 [default backend kernel]
IPU: registered at aten/src/ATen/RegisterCompositeExplicitAutogradNonFunctional.cpp:21592 [default backend kernel]
XPU: registered at aten/src/ATen/RegisterCompositeExplicitAutogradNonFunctional.cpp:21592 [default backend kernel]
HPU: registered at aten/src/ATen/RegisterCompositeExplicitAutogradNonFunctional.cpp:21592 [default backend kernel]
VE: registered at aten/src/ATen/RegisterCompositeExplicitAutogradNonFunctional.cpp:21592 [default backend kernel]
MTIA: registered at aten/src/ATen/RegisterCompositeExplicitAutogradNonFunctional.cpp:21592 [default backend kernel]
PrivateUse1: registered at aten/src/ATen/RegisterCompositeExplicitAutogradNonFunctional.cpp:21592 [default backend kernel]
PrivateUse2: registered at aten/src/ATen/RegisterCompositeExplicitAutogradNonFunctional.cpp:21592 [default backend kernel]
PrivateUse3: registered at aten/src/ATen/RegisterCompositeExplicitAutogradNonFunctional.cpp:21592 [default backend kernel]
Meta: registered at /dev/null:488 [kernel]
FPGA: registered at aten/src/ATen/RegisterCompositeExplicitAutogradNonFunctional.cpp:21592 [default backend kernel]
ORT: registered at aten/src/ATen/RegisterCompositeExplicitAutogradNonFunctional.cpp:21592 [default backend kernel]
Vulkan: registered at aten/src/ATen/RegisterCompositeExplicitAutogradNonFunctional.cpp:21592 [default backend kernel]
Metal: registered at aten/src/ATen/RegisterCompositeExplicitAutogradNonFunctional.cpp:21592 [default backend kernel]
QuantizedCPU: registered at aten/src/ATen/RegisterQuantizedCPU.cpp:944 [kernel]
QuantizedCUDA: registered at aten/src/ATen/RegisterCompositeExplicitAutogradNonFunctional.cpp:21592 [default backend kernel]
QuantizedHIP: registered at aten/src/ATen/RegisterCompositeExplicitAutogradNonFunctional.cpp:21592 [default backend kernel]
QuantizedMPS: registered at aten/src/ATen/RegisterCompositeExplicitAutogradNonFunctional.cpp:21592 [default backend kernel]
QuantizedIPU: registered at aten/src/ATen/RegisterCompositeExplicitAutogradNonFunctional.cpp:21592 [default backend kernel]
QuantizedXPU: registered at aten/src/ATen/RegisterCompositeExplicitAutogradNonFunctional.cpp:21592 [default backend kernel]
QuantizedHPU: registered at aten/src/ATen/RegisterCompositeExplicitAutogradNonFunctional.cpp:21592 [default backend kernel]
QuantizedVE: registered at aten/src/ATen/RegisterCompositeExplicitAutogradNonFunctional.cpp:21592 [default backend kernel]
QuantizedMTIA: registered at aten/src/ATen/RegisterCompositeExplicitAutogradNonFunctional.cpp:21592 [default backend kernel]
QuantizedPrivateUse1: registered at aten/src/ATen/RegisterCompositeExplicitAutogradNonFunctional.cpp:21592 [default backend kernel]
QuantizedPrivateUse2: registered at aten/src/ATen/RegisterCompositeExplicitAutogradNonFunctional.cpp:21592 [default backend kernel]
QuantizedPrivateUse3: registered at aten/src/ATen/RegisterCompositeExplicitAutogradNonFunctional.cpp:21592 [default backend kernel]
QuantizedMeta: registered at aten/src/ATen/RegisterCompositeExplicitAutogradNonFunctional.cpp:21592 [default backend kernel]
CustomRNGKeyId: registered at aten/src/ATen/RegisterCompositeExplicitAutogradNonFunctional.cpp:21592 [default backend kernel]
MkldnnCPU: registered at aten/src/ATen/RegisterCompositeExplicitAutogradNonFunctional.cpp:21592 [default backend kernel]
SparseCsrCPU: registered at aten/src/ATen/RegisterCompositeExplicitAutogradNonFunctional.cpp:21592 [default backend kernel]
SparseCsrCUDA: registered at aten/src/ATen/RegisterCompositeExplicitAutogradNonFunctional.cpp:21592 [default backend kernel]
BackendSelect: fallthrough registered at ../aten/src/ATen/core/BackendSelectFallbackKernel.cpp:3 [backend fallback]
Python: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:154 [backend fallback]
FuncTorchDynamicLayerBackMode: registered at ../aten/src/ATen/functorch/DynamicLayer.cpp:498 [backend fallback]
Functionalize: registered at ../aten/src/ATen/FunctionalizeFallbackKernel.cpp:324 [backend fallback]
Named: registered at ../aten/src/ATen/core/NamedRegistrations.cpp:7 [backend fallback]
Conjugate: registered at ../aten/src/ATen/ConjugateFallback.cpp:17 [backend fallback]
Negative: registered at ../aten/src/ATen/native/NegateFallback.cpp:19 [backend fallback]
ZeroTensor: registered at ../aten/src/ATen/ZeroTensorFallback.cpp:86 [backend fallback]
ADInplaceOrView: fallthrough registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:86 [backend fallback]
AutogradOther: registered at ../torch/csrc/autograd/generated/VariableType_1.cpp:16254 [autograd kernel]
AutogradCPU: registered at ../torch/csrc/autograd/generated/VariableType_1.cpp:16254 [autograd kernel]
AutogradCUDA: registered at ../torch/csrc/autograd/generated/VariableType_1.cpp:16254 [autograd kernel]
AutogradHIP: registered at ../torch/csrc/autograd/generated/VariableType_1.cpp:16254 [autograd kernel]
AutogradXLA: registered at ../torch/csrc/autograd/generated/VariableType_1.cpp:16254 [autograd kernel]
AutogradMPS: registered at ../torch/csrc/autograd/generated/VariableType_1.cpp:16254 [autograd kernel]
AutogradIPU: registered at ../torch/csrc/autograd/generated/VariableType_1.cpp:16254 [autograd kernel]
AutogradXPU: registered at ../torch/csrc/autograd/generated/VariableType_1.cpp:16254 [autograd kernel]
AutogradHPU: registered at ../torch/csrc/autograd/generated/VariableType_1.cpp:16254 [autograd kernel]
AutogradVE: registered at ../torch/csrc/autograd/generated/VariableType_1.cpp:16254 [autograd kernel]
AutogradLazy: registered at ../torch/csrc/autograd/generated/VariableType_1.cpp:16254 [autograd kernel]
AutogradMTIA: registered at ../torch/csrc/autograd/generated/VariableType_1.cpp:16254 [autograd kernel]
AutogradPrivateUse1: registered at ../torch/csrc/autograd/generated/VariableType_1.cpp:16254 [autograd kernel]
AutogradPrivateUse2: registered at ../torch/csrc/autograd/generated/VariableType_1.cpp:16254 [autograd kernel]
AutogradPrivateUse3: registered at ../torch/csrc/autograd/generated/VariableType_1.cpp:16254 [autograd kernel]
AutogradMeta: registered at ../torch/csrc/autograd/generated/VariableType_1.cpp:16254 [autograd kernel]
AutogradNestedTensor: registered at ../torch/csrc/autograd/generated/VariableType_1.cpp:16254 [autograd kernel]
Tracer: registered at ../torch/csrc/autograd/generated/TraceType_1.cpp:16002 [kernel]
AutocastCPU: fallthrough registered at ../aten/src/ATen/autocast_mode.cpp:378 [backend fallback]
AutocastCUDA: fallthrough registered at ../aten/src/ATen/autocast_mode.cpp:244 [backend fallback]
FuncTorchBatched: registered at ../aten/src/ATen/functorch/BatchRulesScatterOps.cpp:1242 [kernel]
BatchedNestedTensor: registered at ../aten/src/ATen/functorch/LegacyBatchingRegistrations.cpp:746 [backend fallback]
FuncTorchVmapMode: fallthrough registered at ../aten/src/ATen/functorch/VmapModeRegistrations.cpp:28 [backend fallback]
Batched: registered at ../aten/src/ATen/LegacyBatchingRegistrations.cpp:1075 [backend fallback]
VmapMode: fallthrough registered at ../aten/src/ATen/VmapModeRegistrations.cpp:33 [backend fallback]
FuncTorchGradWrapper: registered at ../aten/src/ATen/functorch/TensorWrapper.cpp:203 [backend fallback]
PythonTLSSnapshot: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:162 [backend fallback]
FuncTorchDynamicLayerFrontMode: registered at ../aten/src/ATen/functorch/DynamicLayer.cpp:494 [backend fallback]
PreDispatch: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:166 [backend fallback]
PythonDispatcher: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:158 [backend fallback]


In [24]:
data['author', 'to', 'author'].edge_index = new_edges

In [25]:
data

HeteroData(
  author={
    x=[4057, 334],
    y=[4057],
    train_mask=[4057],
    val_mask=[4057],
    test_mask=[4057],
  },
  paper={ x=[14328, 4231] },
  term={ x=[7723, 50] },
  conference={
    num_nodes=20,
    x=[20, 1],
  },
  (author, to, paper)={ edge_index=[2, 9823] },
  (paper, to, author)={
    edge_index=[2, 9823],
    edge_label=[9823],
    edge_label_index=[2, 9823],
  },
  (paper, to, term)={
    edge_index=[2, 42905],
    edge_label=[42905],
    edge_label_index=[2, 42905],
  },
  (paper, to, conference)={
    edge_index=[2, 7164],
    edge_label=[7164],
    edge_label_index=[2, 7164],
  },
  (term, to, paper)={ edge_index=[2, 42905] },
  (conference, to, paper)={ edge_index=[2, 7164] },
  (author, to, author)={ edge_index=[2, 102906] }
)

In [26]:
graph_learning.set_seed()
data_copy = deepcopy(data)
data_copy.to(device)
model, optimizer = graph_learning.init_parameters(data_copy, device)
train_accs, val_accs, test_accs = [],[],[]
for epoch in range(1, 100):
    loss = graph_learning.train_epoch(data=data_copy, model=model, optimizer=optimizer)
    train_acc, val_acc, test_acc = graph_learning.test_epoch(data = data_copy, model=model)
    train_accs.append(train_acc)
    val_accs.append(val_acc)
    test_accs.append(test_acc)
best_epoch = max(enumerate(val_accs),key=lambda x: x[1])[0]
train_acc, val_acc, test_acc = train_accs[best_epoch], val_accs[best_epoch], test_accs[best_epoch]
print(f'Enhanced: Best Epoch: {best_epoch} Train: {train_acc:.4f}, '
        f'Val: {val_acc:.4f}, Test: {test_acc:.4f}')

Enhanced: Best Epoch: 11 Train: 0.9750, Val: 0.8250, Test: 0.8293
