In [47]:
import numpy as np 
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim 
import dgl
from dgl.nn import GraphConv
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import RandomLinkSplit

device = 'cpu'

# 데이터 불러오기 
dataset = Planetoid(root='/tmp/Cora', name='Cora')
data = dataset[0]

# Train / Validation / Test split 
transform = RandomLinkSplit(is_undirected=True, num_val = 0.05, num_test = 0.1)
train_data, val_data, test_data = transform(data)

num_nodes = data.num_nodes

In [48]:
# 인접행렬 만들기 
train_edge_index = train_data.edge_index
train_adjacency_matrix = torch.zeros((num_nodes, num_nodes))
train_adjacency_matrix[train_edge_index[0], train_edge_index[1]] = 1
train_adjacency_matrix = (train_adjacency_matrix + np.identity(2708)).to(torch.float32)

test_edge_index = test_data.edge_index
test_adjacency_matrix = torch.zeros((num_nodes, num_nodes))
test_adjacency_matrix[test_edge_index[0], test_edge_index[1]] = 1
test_adjacency_matrix = (test_adjacency_matrix + np.identity(2708)).to(torch.float32)

node_features = data.x


# 그래프 데이터 만들기 
train_edge_index = train_edge_index.tolist()
train_a = torch.tensor(train_edge_index[0])
train_b = torch.tensor(train_edge_index[1])
train_graph = dgl.graph((train_a, train_b))
train_graph = dgl.add_self_loop(train_graph)

test_edge_index = test_edge_index.tolist()
test_a = torch.tensor(test_edge_index[0])
test_b = torch.tensor(test_edge_index[1])
test_graph = dgl.graph((test_a, test_b))
test_graph = dgl.add_self_loop(test_graph)

train_graph = train_graph.to(device)
test_graph = test_graph.to(device)

In [49]:
class VGAE(nn.Module):
    def __init__(self, in_dim = 1433, hidden1_dim = 32, z = 16):
        super().__init__()
        self.graph_conv1 = GraphConv(in_dim, hidden1_dim, activation = F.relu, allow_zero_in_degree = True)
        self.graph_conv_average = GraphConv(hidden1_dim, z, activation = lambda x: x, allow_zero_in_degree = True)
        self.graph_conv_log_variance = GraphConv(hidden1_dim, z, activation = lambda x: x, allow_zero_in_degree = True)
    
    def encoder(self, adj, features):
        z = self.graph_conv1(adj, features)
        average = self.graph_conv_average(adj, z)
        log_variance = self.graph_conv_log_variance(adj, z)
        return average, log_variance
    
    def reparameterization(self, average, log_variance):
        std = torch.exp(0.5 * log_variance)
        eps = torch.randn_like(std)
        return average + std * eps 
    
    def decoder(self, z):
        new_adj = torch.sigmoid(torch.matmul(z, z.t()))
        return new_adj

    def forward(self, adj, features):
        average, log_variance = self.encoder(adj, features)
        z = self.reparameterization(average, log_variance)
        new_adj = self.decoder(z)
        return new_adj, average, log_variance


def loss_function(new_adj, adj, average, log_variance):
    # Minimize 상태로 만들기 
    binary_cross_entropy = F.binary_cross_entropy(new_adj, adj, reduction='sum')
    KL_divergence = - 0.5 * torch.sum(1 + log_variance - average ** 2 - log_variance.exp())
    return binary_cross_entropy + KL_divergence

In [50]:
train_loss_list = []
test_loss_list = []
model = VGAE().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr = 0.03)

def train():
    with train_graph.local_scope():
        model.train()
        optimizer.zero_grad()
        new_adj, average, log_variance = model(train_graph, node_features)
        loss = loss_function(new_adj, train_adjacency_matrix, average, log_variance)
        loss.backward()
        optimizer.step()
        train_loss_list.append(loss.item())
        print("Train Loss: {}".format(loss.item()))


def test():
    model.eval()
    with test_graph.local_scope():
        with torch.no_grad():
            new_adj, average, log_variance = model(test_graph, node_features)
            loss = loss_function(new_adj, test_adjacency_matrix, average, log_variance)
            train_loss_list.append(loss.item())
            print("test Loss: {}".format(loss.item()))
    return new_adj

In [51]:
for epoch in range(1,201):
    train()
    test()

Train Loss: 12965365.0
test Loss: 13206124.0
Train Loss: 13366417.0
test Loss: 11380502.0
Train Loss: 11582376.0
test Loss: 9845955.0
Train Loss: 9729491.0
test Loss: 8910170.0
Train Loss: 8776131.0
test Loss: 8127449.0
Train Loss: 8214009.0
test Loss: 7274783.5
Train Loss: 7268223.5
test Loss: 6683510.5
Train Loss: 6730246.5
test Loss: 6241459.0
Train Loss: 6236850.5
test Loss: 5836674.0
Train Loss: 5845098.0
test Loss: 5700903.0
Train Loss: 5687142.5
test Loss: 5830991.0
Train Loss: 5875857.5
test Loss: 5827972.5
Train Loss: 5850628.0
test Loss: 5533248.0
Train Loss: 5558041.0
test Loss: 5390039.0
Train Loss: 5401951.5
test Loss: 5453655.0
Train Loss: 5450743.5
test Loss: 5579611.0
Train Loss: 5550442.5
test Loss: 5533295.0
Train Loss: 5524754.0
test Loss: 5389536.0
Train Loss: 5392219.0
test Loss: 5333457.5
Train Loss: 5324040.0
test Loss: 5372286.0
Train Loss: 5370757.0
test Loss: 5378829.0
Train Loss: 5387594.0
test Loss: 5335154.0
Train Loss: 5340020.0
test Loss: 5295309.5
Train 

In [45]:
new_adj = np.array(test())
binary_adj = (new_adj >= 0.5).astype(int)

test_adjacency_matrix = np.array(test_adjacency_matrix)

from sklearn.metrics import roc_auc_score, precision_score
auc_score = roc_auc_score(binary_adj.flatten(), test_adjacency_matrix.flatten())
precision = precision_score(binary_adj.flatten(), test_adjacency_matrix.flatten(), average='binary')
print("AUC Score:", auc_score)
print("Precision:", precision)

test Loss: 5157663.5
AUC Score: 0.5004482530894087
Precision: 0.6376740376740376
