 ## PPI_Dataset
 - The dataset contains 24 graphs. 
 - The average number of nodes per graph is 2372. 
 - Each node has 50 features and 121 labels.

In [39]:
import numpy as np
import networkx as nx

from sklearn.metrics import accuracy_score

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader

import dgl
import dgl.function as fn
from dgl import DGLGraph
from dgl.data.ppi import PPIDataset

In [2]:
train_dataset = PPIDataset(mode='train')

Loading G...


In [3]:
train_dataset

<dgl.data.ppi.PPIDataset at 0x1109b0b38>

In [4]:
g = train_dataset.graph
print(g)

DGLGraph(num_nodes=44906, num_edges=1271274,
         ndata_schemes={}
         edata_schemes={})


In [5]:
g.nodes()

tensor([    0,     1,     2,  ..., 44903, 44904, 44905])

In [6]:
g.edges()  # tuple

(tensor([    0,     0,     0,  ..., 44905, 44905, 44905]),
 tensor([  372,  1101,   766,  ..., 44608, 44831, 44905]))

In [7]:
labels = train_dataset.labels
print(labels.shape)
labels

(44906, 121)


array([[1, 0, 0, ..., 1, 1, 0],
       [1, 0, 0, ..., 1, 1, 1],
       [1, 1, 1, ..., 0, 0, 0],
       ...,
       [1, 1, 0, ..., 1, 1, 0],
       [1, 0, 0, ..., 1, 1, 1],
       [1, 1, 0, ..., 1, 1, 0]])

In [8]:
labels.sum(axis=1).mean()  # 1ノードあたり平均して持つラベル数

37.199839665078166

In [9]:
features = train_dataset.features
print(features.shape)
features

(44906, 50)


array([[-0.0855143 , -0.08837446, -0.11277995, ..., -0.1398976 ,
        -0.14936616, -0.14811485],
       [-0.0855143 , -0.08837446, -0.11277995, ..., -0.1398976 ,
        -0.14936616, -0.14811485],
       [-0.0855143 , -0.08837446, -0.11277995, ..., -0.1398976 ,
        -0.14936616, -0.14811485],
       ...,
       [-0.0855143 , -0.08837446, -0.11277995, ..., -0.1398976 ,
        -0.14936616, -0.14811485],
       [-0.0855143 , -0.08837446, -0.11277995, ..., -0.1398976 ,
        -0.14936616, -0.14811485],
       [-0.0855143 , -0.08837446, -0.11277995, ..., -0.1398976 ,
        -0.14936616, -0.14811485]])

In [10]:
graph_id = train_dataset.graph_id
print(graph_id)
graph_id

[ 1  1  1 ... 20 20 20]


array([ 1,  1,  1, ..., 20, 20, 20])

## 20個のグラフに分割

In [11]:
graph_id

array([ 1,  1,  1, ..., 20, 20, 20])

In [12]:
print(np.where(graph_id==1))  # 条件を満たすindexだけ取得，注意として要素１のtupleとなっている

(array([   0,    1,    2, ..., 1764, 1765, 1766]),)


In [13]:
# graph_id==1のグラフ
g.subgraph(np.where(graph_id==1)[0])

DGLGraph(num_nodes=1767, num_edges=34085,
         ndata_schemes={}
         edata_schemes={})

In [14]:
# graph_id==1の特徴量
# 行でスライス
features[np.where(graph_id==1)[0], :].shape

(1767, 50)

In [15]:
# graph_id==1のラベル
# 行でスライス
labels[np.where(graph_id==1)[0], :].shape

(1767, 121)

In [16]:
# 20個のDGLGraphを，listに格納
graph_list = []
for id in np.unique(graph_id):
    g_tmp = g.subgraph(np.where(graph_id==id)[0])
    g_tmp.ndata['feature'] = features[np.where(graph_id==id)[0], :]
    g_tmp.ndata['label'] = labels[np.where(graph_id==id)[0], :]
    graph_list.append(g_tmp)

In [17]:
graph_list[1]

DGLGraph(num_nodes=1377, num_edges=31081,
         ndata_schemes={'feature': Scheme(shape=(50,), dtype=dtype('float64')), 'label': Scheme(shape=(121,), dtype=dtype('int64'))}
         edata_schemes={})

In [18]:
graph_list[1].ndata['feature']

array([[-0.0855143 , -0.08837446, -0.11277995, ..., -0.1398976 ,
        -0.14936616, -0.14811485],
       [-0.0855143 , -0.08837446, -0.11277995, ..., -0.1398976 ,
        -0.14936616, -0.14811485],
       [-0.0855143 , -0.08837446, -0.11277995, ..., -0.1398976 ,
        -0.14936616, -0.14811485],
       ...,
       [-0.0855143 , -0.08837446, -0.11277995, ..., -0.1398976 ,
        -0.14936616, -0.14811485],
       [-0.0855143 , -0.08837446, -0.11277995, ..., -0.1398976 ,
        -0.14936616, -0.14811485],
       [-0.0855143 , -0.08837446, -0.11277995, ..., -0.1398976 ,
        -0.14936616, -0.14811485]])

In [19]:
graph_list[1].ndata['label']

array([[1, 0, 0, ..., 1, 1, 0],
       [1, 0, 0, ..., 1, 1, 1],
       [1, 0, 0, ..., 1, 0, 0],
       ...,
       [1, 0, 0, ..., 1, 1, 0],
       [1, 0, 0, ..., 1, 1, 1],
       [1, 1, 0, ..., 1, 1, 0]])