In [1]:
# !nvidia-smi

In [7]:
# -*- coding: utf-8 -*-
#
#    Copyright (C) 2021-2029 by
#    Mahmood Amintoosi <m.amintoosi@gmail.com>
#    All rights reserved.
#    BSD license.
from itertools import combinations, chain

In [2]:
%%time
!pip install -q torch-scatter -f https://data.pyg.org/whl/torch-1.9.0+cu102.html
!pip install -q torch-sparse -f https://data.pyg.org/whl/torch-1.9.0+cu102.html
!pip install -q torch-geometric
# !pip install -q torch-scatter
# !pip install -q torch-sparse 

CPU times: user 240 ms, sys: 118 ms, total: 358 ms
Wall time: 8.68 s


In [3]:
# !pip show torch

In [36]:
import argparse
import os.path as osp
from tqdm import tqdm
from sklearn.cluster import KMeans

import torch
from torch.nn import ReLU
import torch.nn.functional as F

import torch_geometric.transforms as T
# from torch_geometric.datasets import OGB_MAG
from torch_geometric.datasets import DBLP
from torch_geometric.loader import NeighborLoader, HGTLoader
from torch_geometric.nn import Sequential, SAGEConv, Linear, to_hetero, HeteroConv

# path = '../data/DBLP/'
path = '/mnt/c/temp/working/data/DBLP/'
dataset = DBLP(path)
data = dataset[0]

# We initialize conference node features with a single feature.
data['conference'].x = torch.ones(data['conference'].num_nodes, 1)

train_input_nodes = ('author', data['author'].train_mask)
val_input_nodes = ('author', data['author'].val_mask)
kwargs = {'batch_size': 64, 'num_workers': 2, 'persistent_workers': True}

train_loader = NeighborLoader(data, num_neighbors=[10] * 2, shuffle=False,
                              input_nodes=train_input_nodes, **kwargs)

val_loader = NeighborLoader(data, num_neighbors=[10] * 2,
                            input_nodes=val_input_nodes, **kwargs)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# r_list is the list of relation which will be considered in network
class HeteroGNN(torch.nn.Module):
    def __init__(self, r_list, hidden_channels, out_channels, num_layers):
        super().__init__()

        self.convs = torch.nn.ModuleList()
        for _ in range(num_layers):
            conv = HeteroConv({
                edge_type: SAGEConv((-1, -1), hidden_channels)
                for edge_type in r_list
                # metadata[1]#[:2] #انتخاب فقط دو رابطه‌ی اول
            })
            self.convs.append(conv)

        self.lin = Linear(hidden_channels, out_channels)

    def forward(self, x_dict, edge_index_dict):
        for conv in self.convs:
            x_dict = conv(x_dict, edge_index_dict)
            x_dict = {key: F.leaky_relu(x) for key, x in x_dict.items()}
        return self.lin(x_dict['author'])


model = HeteroGNN(data.metadata()[1], hidden_channels=64, out_channels=4,
                  num_layers=2)
model = model.to(device)

@torch.no_grad()
def init_params():
    # Initialize lazy parameters via forwarding a single batch to the model:
    batch = next(iter(train_loader))
    batch = batch.to(device)
    model(batch.x_dict, batch.edge_index_dict)


def train():
    model.train()
    i = 0
    total_examples = total_loss = 0
    for batch in tqdm(train_loader):
        optimizer.zero_grad()
        batch = batch.to(device)
        if i<1:
          print(batch)
        i += 1

        batch_size = batch['author'].batch_size
        out = model(batch.x_dict, batch.edge_index_dict)
        loss = F.cross_entropy(out[:batch_size], batch['author'].y[:batch_size])
        loss.backward()
        optimizer.step()

        total_examples += batch_size
        total_loss += float(loss) * batch_size

    return total_loss / total_examples


@torch.no_grad()
def test(loader):
    model.eval()

    total_examples = total_correct = 0
    for batch in tqdm(loader):
        batch = batch.to(device)
        batch_size = batch['author'].batch_size

        out = model(batch.x_dict, batch.edge_index_dict)
        pred = out.argmax(dim=-1)

        total_examples += batch_size
        total_correct += int((pred[:batch_size] == batch['author'].y[:batch_size]).sum())

    return total_correct / total_examples

In [8]:
def powerset(iterable):
    "powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
    s = list(iterable)
    return chain.from_iterable(combinations(s, r) for r in range(len(s)+1))

In [20]:
all_relations = data.metadata()[1]
# get all combinations, we will use this as indices for the columns later
indices = list(powerset(range(len(all_relations))))
# remove the empty subset
indices.pop(0)
# indices

In [26]:
batch = next(iter(train_loader))
batch

HeteroData(
  [1mauthor[0m={
    x=[146, 334],
    y=[146],
    train_mask=[146],
    val_mask=[146],
    test_mask=[146],
    batch_size=64
  },
  [1mpaper[0m={ x=[235, 4231] },
  [1mterm[0m={ x=[692, 50] },
  [1mconference[0m={
    num_nodes=19,
    x=[19, 1]
  },
  [1m(author, to, paper)[0m={ edge_index=[2, 376] },
  [1m(paper, to, author)[0m={ edge_index=[2, 236] },
  [1m(paper, to, term)[0m={ edge_index=[2, 0] },
  [1m(paper, to, conference)[0m={ edge_index=[2, 0] },
  [1m(term, to, paper)[0m={ edge_index=[2, 1416] },
  [1m(conference, to, paper)[0m={ edge_index=[2, 235] }
)

In [25]:
# for idx in indices:       
#     r_idx = list(idx)
#     r_list = [all_relations[x] for x in r_idx]
#     for item in r_list:
#         if 'author' in item:
#             print('Hast')
#     print(r_list)

In [33]:
# indices[-1]
data.node_types

['author', 'paper', 'term', 'conference']

In [43]:
%%time
# with tqdm(total=len(indices)) as progress_bar:
# for idx in indices:
idx = indices[-1]
r_idx = list(idx)
r_list = [all_relations[x] for x in r_idx]

node_list = []
author_in_r_list = False
for items in r_list:
    for item in items:
        if item not in node_list and item != 'to':
            node_list.append(item)
    if 'author' in items:
        author_in_r_list = True
        # break
print('Node_list', node_list)        
# if not author_in_r_list:
#     continue

model = HeteroGNN(r_list, hidden_channels=64, out_channels=4,
                num_layers=2)
model = model.to(device)

train_loader = HGTLoader(
    data,
    # Sample 64 nodes per type and per iteration for 4 iterations
    # num_samples={key: [64] * 4 for key in data.node_types},
    num_samples={key: [64] * 4 for key in node_list},
    # Use a batch size of 128 for sampling training nodes of type paper
    batch_size=32,
    input_nodes=train_input_nodes
)

# train_loader = NeighborLoader(data, num_neighbors=[10] * 2, shuffle=True,
#                             input_nodes=train_input_nodes, **kwargs)
init_params()  # Initialize parameters.
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

for epoch in range(1, 3):
    loss = train()
    
val_acc = test(val_loader)
print(f'idx: {idx}, Epoch: {epoch:02d}, Loss: {loss:.4f}, Val: {val_acc:.4f}')

  0%|          | 0/13 [00:00<?, ?it/s]

Node_list ['author', 'paper', 'term', 'conference']
HeteroData(
  [1mauthor[0m={
    x=[132, 334],
    y=[132],
    train_mask=[132],
    val_mask=[132],
    test_mask=[132],
    batch_size=32
  },
  [1mpaper[0m={ x=[256, 4231] },
  [1mterm[0m={ x=[192, 50] },
  [1mconference[0m={
    num_nodes=19,
    x=[19, 1]
  },
  [1m(author, to, paper)[0m={ edge_index=[2, 367] },
  [1m(paper, to, author)[0m={ edge_index=[2, 360] },
  [1m(paper, to, term)[0m={ edge_index=[2, 347] },
  [1m(paper, to, conference)[0m={ edge_index=[2, 16] },
  [1m(term, to, paper)[0m={ edge_index=[2, 834] },
  [1m(conference, to, paper)[0m={ edge_index=[2, 256] }
)


100%|██████████| 13/13 [00:00<00:00, 20.01it/s]
 15%|█▌        | 2/13 [00:00<00:00, 17.05it/s]

HeteroData(
  [1mauthor[0m={
    x=[138, 334],
    y=[138],
    train_mask=[138],
    val_mask=[138],
    test_mask=[138],
    batch_size=32
  },
  [1mpaper[0m={ x=[256, 4231] },
  [1mterm[0m={ x=[192, 50] },
  [1mconference[0m={
    num_nodes=19,
    x=[19, 1]
  },
  [1m(author, to, paper)[0m={ edge_index=[2, 387] },
  [1m(paper, to, author)[0m={ edge_index=[2, 377] },
  [1m(paper, to, term)[0m={ edge_index=[2, 306] },
  [1m(paper, to, conference)[0m={ edge_index=[2, 15] },
  [1m(term, to, paper)[0m={ edge_index=[2, 831] },
  [1m(conference, to, paper)[0m={ edge_index=[2, 256] }
)


100%|██████████| 13/13 [00:00<00:00, 15.00it/s]
100%|██████████| 7/7 [00:00<00:00, 25.30it/s]

idx: (0, 1, 2, 3, 4, 5), Epoch: 02, Loss: 0.8722, Val: 0.6475
CPU times: user 6.31 s, sys: 122 ms, total: 6.43 s
Wall time: 1.95 s





In [None]:
data

In [None]:
# https://pytorch-geometric.readthedocs.io/en/latest/notes/heterogeneous.html
# https://github.com/pyg-team/pytorch_geometric/blob/master/examples/hetero/to_hetero_mag.py