In [None]:
import numpy as np
np.random.seed(0)
import torch
torch.manual_seed(0)
import matplotlib.pyplot as plt
import torch_geometric.transforms as T # PyG의 그래프 전처리를 담당
from torch_geometric.datasets import Planetoid

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
transform = T.Compose([ # 여러 전처리 함수들을 묶어서 한번에 적용
    T.NormalizeFeatures(),
    T.ToDevice(device),
    T.RandomLinkSplit(num_val=0.05,num_test=0.1,is_undirected=True,split_labels=True,add_negative_train_samples=False)
])
# 링크 예측할 때는 전처리가 표준적임

In [6]:
dataset = Planetoid(root='.',name='Cora',transform=transform)

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


In [7]:
dataset[0]

(Data(x=[2708, 1433], edge_index=[2, 8976], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708], pos_edge_label=[4488], pos_edge_label_index=[2, 4488]),
 Data(x=[2708, 1433], edge_index=[2, 8976], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708], pos_edge_label=[263], pos_edge_label_index=[2, 263], neg_edge_label=[263], neg_edge_label_index=[2, 263]),
 Data(x=[2708, 1433], edge_index=[2, 9502], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708], pos_edge_label=[527], pos_edge_label_index=[2, 527], neg_edge_label=[527], neg_edge_label_index=[2, 527]))

In [11]:
train_data, val_data, test_data = dataset[0]

In [19]:
from torch_geometric.nn import GCNConv, VGAE

class Encoder(torch.nn.Module):
    def __init__(self,dim_in,dim_out):
        super().__init__()
        self.conv1 = GCNConv(dim_in,2*dim_out)
        self.conv_mu = GCNConv(2*dim_out,dim_out)
        self.conv_logstd = GCNConv(2*dim_out,dim_out)

    def forward(self,x,edge_index):
        x = self.conv1(x,edge_index).relu()
        return self.conv_mu(x,edge_index),self.conv_logstd(x,edge_index)

In [20]:
model = VGAE(Encoder(dataset.num_features,16)).to(device)
optimizer = torch.optim.Adam(model.parameters(),lr=0.01)

In [21]:
print(model)

VGAE(
  (encoder): Encoder(
    (conv1): GCNConv(1433, 32)
    (conv_mu): GCNConv(32, 16)
    (conv_logstd): GCNConv(32, 16)
  )
  (decoder): InnerProductDecoder()
)


In [22]:
def train():
    model.train()
    optimizer.zero_grad()
    z = model.encode(train_data.x,train_data.edge_index)
    loss = model.recon_loss(z,train_data.pos_edge_label_index) + (1 / train_data.num_nodes) * model.kl_loss()
    loss.backward()
    optimizer.step()
    return float(loss)

In [23]:
@torch.no_grad()
def test(data):
    model.eval()
    z = model.encode(data.x,data.edge_index)
    return model.test(z,data.pos_edge_label_index,data.neg_edge_label_index)

In [24]:
for epoch in range(301):
    loss = train()
    val_auc, val_ap = test(val_data)
    if epoch%50==0:
        print(f'Epoch : {epoch:>2} | Loss : {loss:.4f} | Val_AUC : {val_auc:.4f} | Val_AP : {val_ap:.4f}')

Epoch :  0 | Loss : 3.4739 | Val_AUC : 0.6651 | Val_AP : 0.6776
Epoch : 50 | Loss : 1.3303 | Val_AUC : 0.6397 | Val_AP : 0.6627
Epoch : 100 | Loss : 1.1553 | Val_AUC : 0.7237 | Val_AP : 0.7203
Epoch : 150 | Loss : 1.1107 | Val_AUC : 0.7352 | Val_AP : 0.7329
Epoch : 200 | Loss : 0.9996 | Val_AUC : 0.8336 | Val_AP : 0.8208
Epoch : 250 | Loss : 0.9542 | Val_AUC : 0.8632 | Val_AP : 0.8590
Epoch : 300 | Loss : 0.9525 | Val_AUC : 0.8743 | Val_AP : 0.8721


In [25]:
test_auc, test_ap = test(test_data)
print(f'Test_AUC : {val_auc:.4f} | Test_AP : {val_ap:.4f}')

Test_AUC : 0.8743 | Test_AP : 0.8721
