In [1]:
# !nvidia-smi

In [2]:
# -*- coding: utf-8 -*-
#
#    Copyright (C) 2021-2029 by
#    Mahmood Amintoosi <m.amintoosi@gmail.com>
#    All rights reserved.
#    BSD license.
from itertools import combinations, chain

In [3]:
%%time
!pip install -q torch-scatter -f https://data.pyg.org/whl/torch-1.9.0+cu102.html
!pip install -q torch-sparse -f https://data.pyg.org/whl/torch-1.9.0+cu102.html
!pip install -q torch-geometric
# !pip install -q torch-scatter
# !pip install -q torch-sparse 

CPU times: user 415 ms, sys: 66.3 ms, total: 482 ms
Wall time: 10.7 s


In [4]:
# !pip show torch

In [27]:
import argparse
import os.path as osp
from tqdm import tqdm
from sklearn.cluster import KMeans

import torch
from torch.nn import ReLU
import torch.nn.functional as F

import torch_geometric.transforms as T
# from torch_geometric.datasets import OGB_MAG
from torch_geometric.datasets import DBLP
from torch_geometric.loader import NeighborLoader, HGTLoader
from torch_geometric.nn import Sequential, SAGEConv, Linear, to_hetero, HeteroConv

# path = '../data/DBLP/'
path = '/mnt/c/temp/working/data/DBLP/'
dataset = DBLP(path)
data = dataset[0]

# We initialize conference node features with a single feature.
data['conference'].x = torch.ones(data['conference'].num_nodes, 1)

train_input_nodes = ('author', data['author'].train_mask)
val_input_nodes = ('author', data['author'].val_mask)
kwargs = {'batch_size': 64, 'num_workers': 2, 'persistent_workers': True}

train_loader = NeighborLoader(data, num_neighbors=[10] * 2, shuffle=False,
                              input_nodes=train_input_nodes, **kwargs)

val_loader = NeighborLoader(data, num_neighbors=[10] * 2,
                            input_nodes=val_input_nodes, **kwargs)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# r_list is the list of relation which will be considered in network
class HeteroGNN(torch.nn.Module):
    def __init__(self, r_list, hidden_channels, out_channels, num_layers):
        super().__init__()

        self.convs = torch.nn.ModuleList()
        for _ in range(num_layers):
            conv = HeteroConv({
                edge_type: SAGEConv((-1, -1), hidden_channels)
                for edge_type in r_list
                # metadata[1]#[:2] #انتخاب فقط دو رابطه‌ی اول
            })
            self.convs.append(conv)

        self.lin = Linear(hidden_channels, out_channels)

    def forward(self, x_dict, edge_index_dict):
        for conv in self.convs:
            x_dict = conv(x_dict, edge_index_dict)
            x_dict = {key: F.leaky_relu(x) for key, x in x_dict.items()}
        return self.lin(x_dict['author'])


model = HeteroGNN(data.metadata()[1], hidden_channels=64, out_channels=4,
                  num_layers=2)
model = model.to(device)

@torch.no_grad()
def init_params():
    # Initialize lazy parameters via forwarding a single batch to the model:
    print("In init, train_loader:", train_loader)
    batch = next(iter(train_loader))
    batch = batch.to(device)
    model(batch.x_dict, batch.edge_index_dict)


def train():
    model.train()
    i = 0
    total_examples = total_loss = 0
    # for batch in tqdm(train_loader):
    for batch in train_loader:
        optimizer.zero_grad()
        batch = batch.to(device)
        # if i<1:
        #   print(batch)
        # i += 1

        batch_size = batch['author'].batch_size
        out = model(batch.x_dict, batch.edge_index_dict)
        loss = F.cross_entropy(out[:batch_size], batch['author'].y[:batch_size])
        loss.backward()
        optimizer.step()

        total_examples += batch_size
        total_loss += float(loss) * batch_size

    return total_loss / total_examples


@torch.no_grad()
def test(loader):
    model.eval()

    total_examples = total_correct = 0
    for batch in tqdm(loader):
        batch = batch.to(device)
        batch_size = batch['author'].batch_size

        out = model(batch.x_dict, batch.edge_index_dict)
        pred = out.argmax(dim=-1)

        total_examples += batch_size
        total_correct += int((pred[:batch_size] == batch['author'].y[:batch_size]).sum())

    return total_correct / total_examples

In [6]:
def powerset(iterable):
    "powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
    s = list(iterable)
    return chain.from_iterable(combinations(s, r) for r in range(len(s)+1))

In [7]:
all_relations = data.metadata()[1]
# get all combinations, we will use this as indices for the columns later
indices = list(powerset(range(len(all_relations))))
# remove the empty subset
indices.pop(0)
# indices

()

In [8]:
batch = next(iter(train_loader))
batch

HeteroData(
  [1mauthor[0m={
    x=[175, 334],
    y=[175],
    train_mask=[175],
    val_mask=[175],
    test_mask=[175],
    batch_size=64
  },
  [1mpaper[0m={ x=[334, 4231] },
  [1mterm[0m={ x=[941, 50] },
  [1mconference[0m={
    num_nodes=19,
    x=[19, 1]
  },
  [1m(author, to, paper)[0m={ edge_index=[2, 519] },
  [1m(paper, to, author)[0m={ edge_index=[2, 334] },
  [1m(paper, to, term)[0m={ edge_index=[2, 0] },
  [1m(paper, to, conference)[0m={ edge_index=[2, 0] },
  [1m(term, to, paper)[0m={ edge_index=[2, 2019] },
  [1m(conference, to, paper)[0m={ edge_index=[2, 334] }
)

In [9]:
# for idx in indices:       
#     r_idx = list(idx)
#     r_list = [all_relations[x] for x in r_idx]
#     for item in r_list:
#         if 'author' in item:
#             print('Hast')
#     print(r_list)

In [10]:
# indices[-1]
data.node_types

['author', 'paper', 'term', 'conference']

In [32]:
%%time
# with tqdm(total=len(indices)) as progress_bar:
for idx in indices:
# idx = indices[-3]
    r_idx = list(idx)
    r_list = [all_relations[x] for x in r_idx]
    # print('r_list', r_list)
    node_list = []
    author_in_r_list = False
    if ('author', 'to', 'paper') in r_list and ('paper', 'to', 'author') in r_list:
        author_in_r_list = True
    if not author_in_r_list:
        continue
        
    for items in r_list:
        for item in items:
            if item not in node_list and item != 'to':
                node_list.append(item)
        # if 'author' in items:
        #     author_in_r_list = True
            # break
    print('Node_list', node_list)        

    model = HeteroGNN(r_list, hidden_channels=64, out_channels=4,
                    num_layers=2)
    model = model.to(device)

    # train_loader = HGTLoader(
    #     data,
    #     # Sample 64 nodes per type and per iteration for 4 iterations
    #     # num_samples={key: [64] * 4 for key in data.node_types},
    #     num_samples={key: [16] * 2 for key in node_list},
    #     # Use a batch size of 128 for sampling training nodes of type paper
    #     batch_size=32,
    #     input_nodes=train_input_nodes
    # )

    train_loader = NeighborLoader(data, num_neighbors=[10] * 2, shuffle=True,
                                input_nodes=train_input_nodes, **kwargs)
    init_params()  # Initialize parameters.
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

    for epoch in range(1, 11):
        loss = train()
        
    val_acc = test(val_loader)
    print(f'idx: {idx}, Epoch: {epoch:02d}, Loss: {loss:.4f}, Val: {val_acc:.4f}')

r_list [('author', 'to', 'paper')]
r_list [('paper', 'to', 'author')]
r_list [('paper', 'to', 'term')]
r_list [('paper', 'to', 'conference')]
r_list [('term', 'to', 'paper')]
r_list [('conference', 'to', 'paper')]
r_list [('author', 'to', 'paper'), ('paper', 'to', 'author')]
Node_list ['author', 'paper']
In init, train_loader: NeighborLoader()


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/mahmood/anaconda3/lib/python3.7/site-packages/IPython/core/magics/execution.py", line 1321, in time
    exec(code, glob, local_ns)
  File "<timed exec>", line 43, in <module>
  File "<ipython-input-27-26c7be3a7093>", line 85, in train
    out = model(batch.x_dict, batch.edge_index_dict)
  File "/home/mahmood/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
    return forward_call(*input, **kwargs)
  File "<ipython-input-27-26c7be3a7093>", line 54, in forward
    x_dict = conv(x_dict, edge_index_dict)
  File "/home/mahmood/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
    return forward_call(*input, **kwargs)
  File "/home/mahmood/anaconda3/lib/python3.7/site-packages/torch_geometric/nn/conv/hetero_conv.py", line 95, in forward
    **kwargs)
  File "/home/mahmood/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_im

TypeError: object of type 'NoneType' has no len()

In [12]:
data

HeteroData(
  [1mauthor[0m={
    x=[4057, 334],
    y=[4057],
    train_mask=[4057],
    val_mask=[4057],
    test_mask=[4057]
  },
  [1mpaper[0m={ x=[14328, 4231] },
  [1mterm[0m={ x=[7723, 50] },
  [1mconference[0m={
    num_nodes=20,
    x=[20, 1]
  },
  [1m(author, to, paper)[0m={ edge_index=[2, 19645] },
  [1m(paper, to, author)[0m={ edge_index=[2, 19645] },
  [1m(paper, to, term)[0m={ edge_index=[2, 85810] },
  [1m(paper, to, conference)[0m={ edge_index=[2, 14328] },
  [1m(term, to, paper)[0m={ edge_index=[2, 85810] },
  [1m(conference, to, paper)[0m={ edge_index=[2, 14328] }
)

In [31]:
sampled_hetero_data = next(iter(train_loader))

IndexError: _Map_base::at

In [13]:
# https://pytorch-geometric.readthedocs.io/en/latest/notes/heterogeneous.html
# https://github.com/pyg-team/pytorch_geometric/blob/master/examples/hetero/to_hetero_mag.py