# GPU 확인

In [1]:
!nvidia-smi

Thu Oct 29 09:54:59 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 456.71       Driver Version: 456.71       CUDA Version: 11.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce GTX 105... WDDM  | 00000000:01:00.0  On |                  N/A |
| 45%   24C    P8    N/A /  75W |    390MiB /  4096MiB |      3%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                                  |
|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
|       

In [2]:
import torch
print(torch.__version__)

1.6.0


In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print ('Available devices ', torch.cuda.device_count())
print ('Current cuda device ', torch.cuda.current_device())
print(torch.cuda.get_device_name(device))

Available devices  1
Current cuda device  0
GeForce GTX 1050 Ti


In [4]:
torch.cuda.is_available()

True

In [5]:
print(torch.version.cuda)

10.2


In [6]:
!pip install torch-scatter==latest+cu102 -f https://pytorch-geometric.com/whl/torch-1.6.0.html
!pip install torch-sparse==latest+cu102 -f https://pytorch-geometric.com/whl/torch-1.6.0.html
!pip install torch-cluster==latest+cu102 -f https://pytorch-geometric.com/whl/torch-1.6.0.html
!pip install torch-spline-conv==latest+cu102 -f https://pytorch-geometric.com/whl/torch-1.6.0.html
!pip install torch-geometric

Looking in links: https://pytorch-geometric.com/whl/torch-1.6.0.html
Collecting torch-scatter==latest+cu102
  Using cached https://pytorch-geometric.com/whl/torch-1.6.0/torch_scatter-latest%2Bcu102-cp38-cp38-win_amd64.whl (2.4 MB)
Installing collected packages: torch-scatter
  Attempting uninstall: torch-scatter
    Found existing installation: torch-scatter 2.0.5
    Uninstalling torch-scatter-2.0.5:
      Successfully uninstalled torch-scatter-2.0.5
Successfully installed torch-scatter-2.0.5
Looking in links: https://pytorch-geometric.com/whl/torch-1.6.0.html
Collecting torch-sparse==latest+cu102
  Using cached https://pytorch-geometric.com/whl/torch-1.6.0/torch_sparse-latest%2Bcu102-cp38-cp38-win_amd64.whl (950 kB)
Installing collected packages: torch-sparse
  Attempting uninstall: torch-sparse
    Found existing installation: torch-sparse 0.6.7
    Uninstalling torch-sparse-0.6.7:
      Successfully uninstalled torch-sparse-0.6.7
Successfully installed torch-sparse-0.6.7
Looking in

# Set nodes, edges

In [7]:
import torch
from torch_geometric.data import Data

In [23]:
# 두 개 씩 짝이되고, 양방향
edge_index = torch.tensor([[0, 1, 1, 2],
                           [1, 0, 2, 1]], dtype=torch.long)

# 각 tensor에 할당되는 값
x = torch.tensor([[-1],[0],[1]], dtype=torch.float)


data = Data(x=x, edge_index=edge_index)
data

# Data에서 edge_attr도 설정할 수 있다.

Data(edge_index=[2, 4], x=[3, 1])

![image.png](attachment:image.png)

In [24]:
edge_index = torch.tensor([[0, 1],
                          [1, 0],
                          [1, 2],
                          [2, 1]], dtype=torch.long)

x = torch.tensor([[-1], [0], [1]], dtype=torch.float)

data = Data(x = x, 
           edge_index = edge_index.t().contiguous()) # 데이터 형태를 맞추기 위해 t()로 전치시킨다.
data

Data(edge_index=[2, 4], x=[3, 1])


##### contiguous


새로운 tensor를 생성하지 않고 기존의 tensor에서 메타데이터만 수정하는(메모리상에서 같은 공간 공유) narrow, view, expand, transpose 의 함수가 있다. 하지만 연산 과정에서 tensor가 메모리에 올려진 순서가 중요하다면 에러가 발생한다. 따라서 우리가 기대하는 순서로 유지하기 위해 contiguous를 사용하여 에러가 발생하는 것을 방지할 수 있다.

##### edge_index

그래프의 연결성

(2,4) 크기의 행렬 -> 4개의 edges

In [10]:
print(data.keys)
print(data['x'])

['x', 'edge_index']
tensor([[-1.],
        [ 0.],
        [ 1.]])


In [26]:
for key, item in data :
    print("{} found in data".format(key))
    print(item)

edge_index found in data
tensor([[0, 1, 1, 2],
        [1, 0, 2, 1]])
x found in data
tensor([[-1.],
        [ 0.],
        [ 1.]])


In [12]:
'edge_attr' in data

False

In [27]:
print("node count : ", data.num_nodes)
print("edge count : ", data.num_edges)

print("node features count : ", data.num_node_features)
print("node isolated : ", data.contains_isolated_nodes())
print("itself loop : ", data.contains_self_loops())
print("directed : ", data.is_directed())


# transfer data object to GPU
device = torch.device('cuda')
data = data.to(device)

node count :  3
edge count :  4
node features count :  1
node isolated :  False
itself loop :  False
directed :  False


# common benchmark datasets

torch_geometric.datasets 참고

## ENZYMES dataset

 dataset 안에 600개의 그래프가 있음.

In [28]:
from torch_geometric.datasets import TUDataset
# 그래프 데이터 세트의 non-isomorphic graph 이해 논문에서 동기를 부여한 정리 된 데이터셋
# 일부 데이터셋은 노드 라벨이 없다. use_node_attr인수를 사용하여 추가 연속 노드 속성을 로드 /
# torch_geometric.transforms.Constant 또는 torch_geometric.transforms.OneHotDegree 같은 변환 사용하여 합성노드기능 제공 가능

dataset = TUDataset(root ='/tmp/ENZYMES', name='ENZYMES')
dataset

ENZYMES(600)

In [11]:
print("dataset len : ",len(dataset)) # dataset 안에 600개의 그래프가 있음.
print("dataset num_classes : ", dataset.num_classes)
print("dataset num_node_features : ", dataset.num_node_features)

dataset len :  600
dataset num_classes :  6
dataset num_node_features :  3


In [64]:
# class 종류 : 6개
# 어떤것이 있는지 살펴보자

tmp1 = []
for i in range(600) : 
    tmp1.append(dataset[i].y.item())
    
print(tmp1)

[5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 

In [78]:
data = dataset[0]
print(data) # edge_index=[2, 168] 인 경우에 총 84개의 edge, 37개의 node, 3개의 node features, 1개의 graph level target

print("directed : ", data.is_directed()) # 방향성 x


# train, test set indexing으로 분리
train_dataset = dataset[:540] # 540개 # 9 : 1 의 비율
test_dataset = dataset[540:] # 60개

# shuffle 진행
#dataset = dataset.shuffle()


Data(edge_index=[2, 168], x=[37, 3], y=[1])
directed :  False


## Cora dataset

dataset 전체가 하나의 그래프

총 1443개의 노드특성, 클래스 수는 7개(그래프가 하나이므로 target이 노드임을 알 수 있음)


주로 (semi-supervised) graph node classification 데이터셋으로 사용


하나의 논문은 다른 논문들을 인용할 수 있는데, 이를 연결구조로 표현한 것이 Citation Network

    - 각 논문이 노드, 인용 관계가 엣지
    - 논문의 특정 단어 1433개로 단어 사전 생성 후 -> 논문마다 단어의 등장여부를 feature vector로 생성 -> 노드의 특징
    - 논문 내 등장한 단어들과 인용 관계만으로 어떤 종류의 논문인지 맞히는 task

In [79]:
from torch_geometric.datasets import Planetoid

dataset = Planetoid(root = '/tmp/Cora', name='Cora')
print("dataset len : ",len(dataset)) # dataset 안에 1개의 그래프가 있음.
print("dataset num_classes : ", dataset.num_classes)
print("dataset num_node_features : ", dataset.num_node_features) # node feature가 1433개

dataset len :  1
dataset num_classes :  7
dataset num_node_features :  1433


In [113]:
import pandas as pd

In [115]:
tmp_train = []
tmp_test = []
tmp_val = []

cnt = 0
for i in data.train_mask :
    tmp_train.append(i.sum().item())
    
for i in data.test_mask :
    tmp_test.append(i.sum().item())
    
for i in data.test_mask :
    tmp_val.append(i.sum().item())

In [124]:
tmp = pd.concat([pd.DataFrame(tmp_train), pd.DataFrame(tmp_test), pd.DataFrame(tmp_val)], axis=1)
tmp.columns=["train", 'test', 'val']

In [142]:
data = dataset[0] # 노드 하나
print(data)

print("directed : ", data.is_directed()) # 방향성 x

print(data.train_mask.sum().item()) # 학습하기 위해 사용하는 노드들
print(data.val_mask.sum().item()) # 검증하기 위해 사용하는 노드들

print(data.test_mask.sum().item()) # 테스트하기 위해 사용하는 노드들 #  검증(500) + 테스트(500) = 테스트(1000)

Data(edge_index=[2, 10556], test_mask=[2708], train_mask=[2708], val_mask=[2708], x=[2708, 1433], y=[2708])
directed :  False
140
500
1000


# Mini-batches

pytorch geometric은 sparse block diagonal adjacency matrices를 통해 미니배치 형태로 만들고, 병렬화하여 수행한다.

기존 torch에서는 torch.utils.data.DataLoader를 통해 배치 단위로 데이터를 처리하였다.

torch_geometric에서는 torch_geometric.data.DataLoader 를 통해 그래프 단위 데이터를 처리하게 된다.

In [46]:
from torch_geometric.datasets import TUDataset
from torch_geometric.data import DataLoader

dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES', use_node_attr = True)
loader = DataLoader(dataset, batch_size=32, shuffle=True) #  y값을 32개 가지는 배치 단위로 데이터를 로더한다.

for batch in loader :
    print(batch)
    print(batch.num_graphs)

Batch(batch=[1029], edge_index=[2, 3996], x=[1029, 21], y=[32])
32
Batch(batch=[990], edge_index=[2, 3780], x=[990, 21], y=[32])
32
Batch(batch=[1078], edge_index=[2, 4168], x=[1078, 21], y=[32])
32
Batch(batch=[944], edge_index=[2, 3654], x=[944, 21], y=[32])
32
Batch(batch=[1094], edge_index=[2, 4068], x=[1094, 21], y=[32])
32
Batch(batch=[979], edge_index=[2, 3820], x=[979, 21], y=[32])
32
Batch(batch=[1047], edge_index=[2, 4172], x=[1047, 21], y=[32])
32
Batch(batch=[1062], edge_index=[2, 4230], x=[1062, 21], y=[32])
32
Batch(batch=[1094], edge_index=[2, 3880], x=[1094, 21], y=[32])
32
Batch(batch=[1016], edge_index=[2, 3902], x=[1016, 21], y=[32])
32
Batch(batch=[1084], edge_index=[2, 4154], x=[1084, 21], y=[32])
32
Batch(batch=[1067], edge_index=[2, 3654], x=[1067, 21], y=[32])
32
Batch(batch=[1025], edge_index=[2, 3938], x=[1025, 21], y=[32])
32
Batch(batch=[1069], edge_index=[2, 4100], x=[1069, 21], y=[32])
32
Batch(batch=[838], edge_index=[2, 3258], x=[838, 21], y=[32])
32
Bat

In [47]:
from torch_scatter import scatter_mean
from torch_geometric.datasets import TUDataset
from torch_geometric.data import DataLoader

dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES', use_node_attr=True)
loader = DataLoader(dataset, batch_size=32, shuffle=True)

for data in loader:
    print(data)
    print(data.num_graphs)

    x = scatter_mean(data.x, data.batch, dim=0)
    print(x.size())

Batch(batch=[1054], edge_index=[2, 3922], x=[1054, 21], y=[32])
32
torch.Size([32, 21])
Batch(batch=[1018], edge_index=[2, 3992], x=[1018, 21], y=[32])
32
torch.Size([32, 21])
Batch(batch=[1212], edge_index=[2, 4470], x=[1212, 21], y=[32])
32
torch.Size([32, 21])
Batch(batch=[1226], edge_index=[2, 4126], x=[1226, 21], y=[32])
32
torch.Size([32, 21])
Batch(batch=[973], edge_index=[2, 3822], x=[973, 21], y=[32])
32
torch.Size([32, 21])
Batch(batch=[992], edge_index=[2, 3888], x=[992, 21], y=[32])
32
torch.Size([32, 21])
Batch(batch=[962], edge_index=[2, 3778], x=[962, 21], y=[32])
32
torch.Size([32, 21])
Batch(batch=[1140], edge_index=[2, 4200], x=[1140, 21], y=[32])
32
torch.Size([32, 21])
Batch(batch=[1013], edge_index=[2, 3804], x=[1013, 21], y=[32])
32
torch.Size([32, 21])
Batch(batch=[938], edge_index=[2, 3662], x=[938, 21], y=[32])
32
torch.Size([32, 21])
Batch(batch=[947], edge_index=[2, 3682], x=[947, 21], y=[32])
32
torch.Size([32, 21])
Batch(batch=[1084], edge_index=[2, 4186], 

# Data Transforms

ShapeNet dataset 활용

In [22]:
from torch_geometric.datasets import ShapeNet

dataset = ShapeNet(root='/tmp/ShapeNet', categories=['Airplane'])
dataset[0]

# url이 현재 오픈되지 않아서 진행 불가 
# https://pytorch-geometric.readthedocs.io/en/latest/notes/introduction.html#learning-methods-on-graphs 참고

Downloading https://shapenet.cs.stanford.edu/media/shapenetcore_partanno_segmentation_benchmark_v0_normal.zip


URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1123)>

# Learning Methods on Graphs

In [145]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.datasets import Planetoid

dataset = Planetoid(root='/tmp/Cora', name='Cora')

class Net(torch.nn.Module) :
    def __init__(self) :
        super(Net, self).__init__()
        self.conv1 = GCNConv(dataset.num_node_features, 16)
        self.conv2 = GCNConv(16, dataset.num_classes)
        
    def forward(self, data) :
        
        x, edge_index = data.x, data.edge_index
        
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        
        x = self.conv2(x, edge_index)
        
        return F.log_softmax(x, dim=1)

In [157]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = Net().to(device)
data = dataset[0].to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

model.train()
for epoch in range(1000+1) :
    optimizer.zero_grad() # 초기화
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask]) # train index의 prediction, 실제값의 loss 계산
    loss.backward()
    optimizer.step()
    if epoch % 100 == 0 :
        print('epoch : ', epoch,'loss : ', loss.item())
        # batch는 좀 불안정하게 loss가 감소한다.

epoch :  0 loss :  1.9462432861328125
epoch :  100 loss :  0.04740346223115921
epoch :  200 loss :  0.03155552223324776
epoch :  300 loss :  0.024845609441399574
epoch :  400 loss :  0.016182707622647285
epoch :  500 loss :  0.01877479813992977
epoch :  600 loss :  0.025309834629297256
epoch :  700 loss :  0.016346612945199013
epoch :  800 loss :  0.017613442614674568
epoch :  900 loss :  0.014527320861816406
epoch :  1000 loss :  0.01724926009774208


In [158]:
model.eval()

_, pred = model(data).max(dim=1)
correct = float(pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
acc = correct / data.test_mask.sum().item()

print('Accuracy : {:.4f} %'.format(acc*100))

Accuracy : 80.4000 %


어떻게 활용해볼 수 있을까 ?

* node : 확진자
* edge : 접촉 관계
* node_features : 사람의 특징(성별, 연령 등)
* edge_features : 접촉 시 상황(접촉 장소 특징, 접촉 시 주변 사람 수, 몇차 감염 등)

# Creating Message Passing Networks

In [8]:
import torch
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import add_self_loops, degree

* (j , i) : source_to_target

* (i, j) : target_to_source

tensors -> propagate

---

neighboring node features are first transformed by a weight matrix Θ
 
normalized by their degree & finally summed up
 
Add self-loops to the adjacency matrix.

Linearly transform node feature matrix.

Compute normalization coefficients.

Normalize node features in ϕ.

Sum up neighboring node features ("add" aggregation).

## GCNConv

GCN : Graphic Convolutional Network

graph convolution을 이용하여 그래프에 포함된 node, 그래프 자체를 벡터 형태의 데이터로 변환한다.

기본적인 GCN에서는 node의 feature만을 고려한다(edge feature 고려 x)

G = (A, X) = (A의 인접행렬, node feature matrix) = ( N X N , N x d )

* N : node 수
* d : node feature vector의 차원

H = σ(AXW)
* W : weight matrix

학습 과정이 진행되면서 weight matrix를 조정하게 된다. σ는 sigmoid, ReLU와 같이 비선형적 출력을 생성하기 위한 non-linear activation function이다.

한계점 :

1 ) A에는 neighbor node와의 연결만 표현되어 있기 때문에 graph convolution 과정에서 해당 node 자체에 대한 정보는 latent feature vector 생성 시 고려되지 않는다.

2 ) 일반적으로 A는 정규화되어있지 않으므로 feature vector와 A를 곱할 경우 feature vector의 크기가 불안정하게 변할 수 있다.

=> 이를 해결하기 위해 "self-loop"추가하여 정규화 진행

In [36]:
class GCNConv(MessagePassing) :
    def __init__(self, in_channels, out_channels) :
        super(GCNConv, self).__init__(aggr='add') # Step5 :"add" aggregation 
        self.lin = torch.nn.Linear(in_channels, out_channels)
        
    def forward(self, x, edge_index) :
        # x has shape [N, in_channels]
        # edge_index shape : [ 2, E ]
        
        print('x shape : ', x.shape) 
        print('edge_index shape : ', edge_index.shape)
        
        # Step1 : Add self-loops to the adjacency matrix
        edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))
        # edge_index : [0, 1], [2, 1] 양방향
        # x : -1, 0, 1  # x.size : 3
        
        
        # Step2 : Linearly transform node feature matrix
        x = self.lin(x)
        
        
        # Step3 : Compute normalization
        row, col = edge_index # [ 2, E ]
        print('row :{}, col : {}'.format(row, col))
        
        deg = degree(col, x.size(0), dtype=x.dtype)
        print(deg)
        
        deg_inv_sqrt = deg.pow(-0.5) # sqrt
        
        norm = deg_inv_sqrt[row] * deg_inv_sqrt[col] # 행렬계산

        # Step4 : start propagating messages
        return self.propagate(edge_index, x=x, norm=norm)
    
    def message(self, x_j, norm) : # normalize the neighboring node features
        # x_j shape : [ E, out_channels ]
        
        # Step4 : Normalize node features
        return norm.view(-1, 1) * x_j

In [41]:
edge_index = torch.tensor([[0, 1, 1, 2],
                           [1, 0, 2, 1]], dtype=torch.long)


x = torch.tensor([[-1, 4],
                  [0, 5],
                  [1, 6]], dtype=torch.float)

# in_channel size = x.shape[-1] 과 맞춰주어야함.
conv = GCNConv(x.shape[-1],10)
x = conv(x, edge_index)

print(x)

x shape :  torch.Size([3, 2])
edge_index shape :  torch.Size([2, 4])
row :tensor([0, 1, 1, 2, 0, 1, 2]), col : tensor([1, 0, 2, 1, 0, 1, 2])
tensor([2., 3., 2.])
tensor([[-2.2648,  0.3196, -2.2564,  1.0869,  2.5566, -1.5219,  3.1983, -1.7067,
         -3.3021, -1.4961],
        [-2.8370, -0.0917, -3.4935,  1.2399,  3.7804, -1.6944,  4.1488, -2.6119,
         -4.4382, -2.4921],
        [-2.2171, -0.4646, -3.2627,  0.8719,  3.4157, -1.1549,  3.3559, -2.4197,
         -3.7093, -2.4409]], grad_fn=<ScatterAddBackward>)


## EdgeConv

In [42]:
import torch
from torch.nn import Sequential as Seq, Linear, ReLU
from torch_geometric.nn import MessagePassing

In [43]:
# transform both the target node features x_i, relative source node features x_i - x_j for each edge
class EdgeConv(MessagePassing) : 
    def __init__(self, in_channels, out_channels) :
        super(EdgeConv, self).__init__(aggr="max") # max aggregation
        
        # MLP
        # in_channels에 2 곱하는 이유 ? node 연결 정보가 반복해서 들어오기 때문?
        self.mlp = Seq(Linear(2 * in_channels, out_channels),
                      ReLU(),
                      Linear(out_channels, out_channels))
        
    def forward(self, x, edge_index) :
        # x shape : [N, in_channels]
        # edge_index shape : [2, E]
        
        return self.propagate(edge_index, x=x)
    
    def message(self, x_i, x_j) :
        # x_i shape : [E, in_channels]
        # x_j shape : [E, in_channels]
        print(x_i)
        print(x_j)
        
        # 두 텐서 연결하기, dim=몇번째 차원을 늘릴것인가 ? 
        # 2 x 2 -> dim=0 일 때 4 x 2, dim=1 일 때 2 x 4
        tmp = torch.cat([x_i, x_j - x_i], dim=1) # tmp shape : [E, 2*in_channels]
        return self.mlp(tmp)

In [44]:
from torch_geometric.nn import knn_graph

class DynamicEdgeConv(EdgeConv) :
    def __init__(self, in_channels, out_channels, k=6) :
        super(DynamicEdgeConv, self).__init__(in_channels, out_channels)
        self.k = k
        
    def forward(self, x, batch=None) :
        edge_index = knn_graph(x, self.k, batch, loop=False, flow=self.flow)
        return super(DynamicEdgeConv, self).forward(x, edge_index)

In [45]:
conv = DynamicEdgeConv(3, 128, k=6)
x = conv(x, batch)

NameError: name 'batch' is not defined

# Creating Own Datasets

## Creating "In Memory Datasets"

In [37]:
import torch
from torch_geometric.data import InMemoryDataset

In [39]:
class MyOwnDataset(InMemoryDataset) :
    def __init__(self, root, transform=None, pre_transform=None) :
        super(MyOwnDataset, self).__init__(root, transform, pre_transform)
        self.data, self.slices = torch.load(self.processed_paths[0])
        

    @property
    def raw_file_names(self) :
        return ['some_file_1', 'some_file2', ... ]
    
    @property
    def processed_file_names(self) :
        return ['data.pt']
    
    # def download(self) :
        # download to 'self.raw_dir'
        
    def process(self) :
        # read data into huge 'data' list
        data_list = [...]
        
        if self.pre_filter is not None :
            data_list = [data for data in data_list if self.pre_filter(data)]
        
        if self.pre_transform is not None :
            data_list = [self.pre_transform(data) for data in data_list]
            
        data, slices = self.collate(data_list)
        torch.save((data, slices), self.processed_path[0])

## Creating "Larger" Datasets

In [14]:
import os.path as osp

import torch
from torch_geometric.data import Dataset

In [None]:
class MyOwnDataset(Dataset):
    def __init__(self, root, transform=None, pre_transform=None):
        super(MyOwnDataset, self).__init__(root, transform, pre_transform)

    @property
    def raw_file_names(self):
        return ['some_file_1', 'some_file_2', ...]

    @property
    def processed_file_names(self):
        return ['data_1.pt', 'data_2.pt', ...]

    def download(self):
        # Download to `self.raw_dir`.

    def process(self):
        i = 0
        for raw_path in self.raw_paths:
            # Read data from `raw_path`.
            data = Data(...)

            if self.pre_filter is not None and not self.pre_filter(data):
                continue

            if self.pre_transform is not None:
                data = self.pre_transform(data)

            torch.save(data, osp.join(self.processed_dir, 'data_{}.pt'.format(i)))
            i += 1

    def len(self):
        return len(self.processed_file_names)

    def get(self, idx):
        data = torch.load(osp.join(self.processed_dir, 'data_{}.pt'.format(idx)))
        return data

# Advanced Mini-Batching

## Pairs of Graphs

In [19]:
class PairData(Data) :
    def __init__(self, edge_index_s, x_s, edge_index_t, x_t) :
        self.edge_indexx_s = edge_index_s
        self.x_s = x_s
        self.edge_index_t = edge_index_t
        self.x_t = x_t

In [20]:
def __inc__(self, key, value) :
    if key == 'edge_index_s' :
        return self.x_s.size(0)
    if key == 'edge_index_t' :
        return self.x_t.size(0)
    else :
        return super(PairData, self).__inc__(key, value)

In [21]:
edge_index_s = torch.tensor([
    [0, 0, 0, 0],
    [1, 2, 3, 4],])

x_s = torch.randn(5, 16) # 5 nodes


edge_index_t = torch.tensor([
    [0, 0, 0],
    [1, 2, 3],
])
x_t = torch.randn(4, 16) # 4 nodes

data = PairData(edge_index_s, x_s, edge_index_t, x_t)
data_list = [data, data]
loader = DataLoader(data_list, batch_size = 2)
batch = next(iter(loader))

print(batch)

print(batch.edge_index_s)

print(batch.edge_index_t)


AttributeError: 'PairData' object has no attribute 'face'

## Bipartite Graphs

In [None]:
class BipartiteData(Data) :
    def __init__(self, edge_index, x_s, x_t) :
        super(BipartiteData, self).__init__()
        self.edge_index = edge_index
        self.x_s = x_s
        self.x_t = x_T
        

def __inc__(self, key, value) :
    if key == 'edge_index' :
        return torch.tensor([[self.x_s.size(0)], [self.x_t.size(0)]])
    else :
        return super(BipartiteData, self).__inc__(key, value)


In [None]:
edge_index = torch.tensor([
    [0, 0, 1, 1],
    [0, 1, 1, 2],
])

x_s = torch.randn(2, 16) # 2 nodes
x_t = torch.randn(3, 16) # 3 nodes

data = BipartiteData(edge_index, x_s, x_t)
data_list = [data, data]
loader = DataLoader(data_list, batch_size=2)
batch = next(iter(loader))

print(batch)
print(batch.edge_index)

# Memory-Efficient Aggregations

In [None]:
from torch_geometric.nn import MessagePassing

class MyConv(MessagePassing) :
    def __init__(self) :
        super(MyConv, self).__init__(aggr="add")
        
    def forward(self, x, edge_index) :
        return self.propagate(edge_index, x=x)
    
    def message(self, x_i, x_j) :
        return MLP(x_j - x_i)

In [75]:
from torch_geometric.datasets import KarateClub

dataset = KarateClub()
print(f'Numberof graphs : {len(dataset)}')
print(f'Numberof features : {dataset.num_features}')
print(f'Numberof classes : {dataset.num_classes}')

Numberof graphs : 1
Numberof features : 34
Numberof classes : 2


In [59]:
dataset[0] # graph 수 = 1

dataset[0]

Data(edge_index=[2, 156], x=[34, 34], y=[34])

In [77]:
data.y

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [60]:
data = dataset[0]  # Get the first graph object.

print(data)
print('==============================================================')

# Gather some statistics about the graph.
print(f'Number of nodes: {data.num_nodes}') # 34
print(f'Number of edges: {data.num_edges}') # 156
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}') # 156/34

print(f'Number of training nodes: {data.train_mask.sum()}')
print(f'Training node label rate: {int(data.train_mask.sum()) / data.num_nodes:.2f}')
print(f'Contains isolated nodes: {data.contains_isolated_nodes()}')
print(f'Contains self-loops: {data.contains_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')

Data(edge_index=[2, 156], x=[34, 34], y=[34])
Number of nodes: 34
Number of edges: 156
Average node degree: 4.59


AttributeError: 'Data' object has no attribute 'train_mask'