In [None]:
%pip install torch torchvision

In [None]:
%pip install torch_geometric

In [1]:
%pip install node2vec

Collecting node2vec
  Downloading node2vec-0.5.0-py3-none-any.whl.metadata (849 bytes)
Downloading node2vec-0.5.0-py3-none-any.whl (7.2 kB)
Installing collected packages: node2vec
Successfully installed node2vec-0.5.0
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.3.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:




# 2_GNN.ipynb 

import pickle
import networkx as nx
import numpy as np
import pandas as pd
import torch
import torch.nn as nn


In [15]:

# PyG 확인
USE_PYG = True
try:
    from torch_geometric.data import Data
    from torch_geometric.nn import GCNConv, SAGEConv, GATConv
except Exception as e:
    print('torch_geometric not available:', e)
    USE_PYG = False


In [16]:

if not USE_PYG:
    raise RuntimeError('torch_geometric 필요: pip install torch-geometric')

# 1) G 불러오기
save_path = 'G_graph.gpickle'
try:
    try:
        G = nx.read_gpickle(save_path)
    except AttributeError:
        with open(save_path, 'rb') as f:
            G = pickle.load(f)
    print(f'Loaded G: nodes={G.number_of_nodes()}, edges={G.number_of_edges()}')
except Exception as e:
    raise RuntimeError(f'Failed to load {save_path}: {e}')


Loaded G: nodes=7958, edges=14128


In [17]:

# 2) 노드 feature 및 edge 구성
nodes = list(G.nodes())
idx_map = {n: i for i, n in enumerate(nodes)}

# Node2Vec 임베딩 생성 (문서 파라미터에 맞춤: dimensions=64, walk_length=30, num_walks=200, p=1, q=1, workers=4)
try:
    from node2vec import Node2Vec
except Exception as e:
    raise RuntimeError(f'node2vec 패키지가 필요합니다: pip install node2vec ({e})')

# node2vec.fit은 내부적으로 gensim Word2Vec를 사용하므로, 노드 키를 문자열로 접근합니다.
n_dim = 64
n2v = Node2Vec(G, dimensions=n_dim, walk_length=30, num_walks=200, p=1, q=1, workers=4)
n2v_model = n2v.fit(window=10, min_count=1, batch_words=4)

embs = []
for n in nodes:
    key = str(n)
    try:
        emb = n2v_model.wv[key]
    except KeyError:
        emb = np.zeros(n_dim, dtype=float)
    embs.append(emb)

X = torch.tensor(np.array(embs), dtype=torch.float)

edge_src, edge_dst, edge_attr = [], [], []
for u, v, edata in G.edges(data=True):
    if u in idx_map and v in idx_map:
        edge_src.append(idx_map[u]); edge_dst.append(idx_map[v])
        edge_attr.append(float(edata.get('netValue', 0.0)))

if len(edge_src) == 0:
    raise RuntimeError('그래프에 엣지가 없습니다.')

edge_index = torch.tensor([edge_src, edge_dst], dtype=torch.long)
edge_attr = torch.tensor(edge_attr, dtype=torch.float).unsqueeze(1)

data = Data(x=X, edge_index=edge_index, edge_attr=edge_attr)


In [18]:

# 3) Train/Val split (node-level, 8:2)
num_nodes = X.shape[0]
indices = np.arange(num_nodes)
np.random.seed(42)
np.random.shuffle(indices)
train_n = int(num_nodes * 0.8)
train_idx = indices[:train_n]
val_idx = indices[train_n:]

train_mask = torch.zeros(num_nodes, dtype=torch.bool)
val_mask = torch.zeros(num_nodes, dtype=torch.bool)
train_mask[train_idx] = True
val_mask[val_idx] = True

data.train_mask = train_mask
data.val_mask = val_mask

# 4) 모델 정의 (Autoencoder: reconstruct node features)
in_dim = X.shape[1]
hidden_dim = 128
latent_dim = 64

def train_model_autoencoder(model, data, train_mask, val_mask, epochs=50, lr=1e-3, weight_decay=5e-4, device='cpu'):
    model.to(device)
    model.train()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    loss_fn = nn.MSELoss()
    x = data.x.to(device)
    edge_index = data.edge_index.to(device)

    for epoch in range(1, epochs+1):
        optimizer.zero_grad()
        z, recon = model(x, edge_index)
        loss = loss_fn(recon[train_mask], x[train_mask])
        loss.backward()
        optimizer.step()
        if epoch % max(1, epochs//5) == 0:
            print(f'Epoch {epoch}/{epochs} train_loss={loss.item():.6f}')

    model.eval()
    with torch.no_grad():
        z, recon = model(x, edge_index)
        train_mse = loss_fn(recon[train_mask], x[train_mask]).item()
        val_mse = loss_fn(recon[val_mask], x[val_mask]).item()
    return model, train_mse, val_mse

# 모델 클래스들
class GCN_AE(nn.Module):
    def __init__(self, in_ch, hid_ch, lat_ch):
        super().__init__()
        self.conv1 = GCNConv(in_ch, hid_ch)
        self.conv2 = GCNConv(hid_ch, lat_ch)
        self.decoder = nn.Linear(lat_ch, in_ch)
    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index); x = torch.relu(x)
        z = self.conv2(x, edge_index)
        recon = self.decoder(z)
        return z, recon

class SAGE_AE(nn.Module):
    def __init__(self, in_ch, hid_ch, lat_ch):
        super().__init__()
        self.conv1 = SAGEConv(in_ch, hid_ch)
        self.conv2 = SAGEConv(hid_ch, lat_ch)
        self.decoder = nn.Linear(lat_ch, in_ch)
    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index); x = torch.relu(x)
        z = self.conv2(x, edge_index)
        recon = self.decoder(z)
        return z, recon


class PaperGAT_AE(nn.Module):
    def __init__(self, in_ch, embed_dim=64, dropout=0.3):
        super().__init__()
        self.input_proj = nn.Linear(in_ch, embed_dim)
        self.gat_e1 = GATConv(embed_dim, 256//8, heads=8, dropout=dropout)  # 256 total
        self.gat_e2 = GATConv(256, 32, heads=1, dropout=dropout)  # 32 total
        self.gat_d1 = GATConv(32, 64, heads=1, dropout=dropout)
        self.output_proj = nn.Linear(64, in_ch)
        self.act = nn.ELU(); self.drop = nn.Dropout(dropout)
    def forward(self, x, edge_index):
        x = self.input_proj(x); x = self.act(x); x = self.drop(x)
        x = self.gat_e1(x, edge_index); x = self.act(x); x = self.drop(x)
        x = self.gat_e2(x, edge_index)
        z = x
        x = self.act(x); x = self.drop(x)
        x = self.gat_d1(x, edge_index); x = self.act(x)
        recon = self.output_proj(x)
        return z, recon


In [19]:

# 5) 학습 및 평가 (데모 설정; 필요시 epochs 조정)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('device ->', device)

results = []

# 모델별 설정
models = {
    'GCN_AE': GCN_AE(in_dim, 128, latent_dim),
    'SAGE_AE': SAGE_AE(in_dim, 128, latent_dim),
    
    'PaperGAT_AE': PaperGAT_AE(in_dim, embed_dim=64, dropout=0.3)
}

for name, model in models.items():
    print(f'\nTraining {name}...')
    ep = 1000 if name == 'PaperGAT_AE' else 30
    model_trained, train_mse, val_mse = train_model_autoencoder(model, data, data.train_mask, data.val_mask, epochs=ep, lr=1e-3, weight_decay=5e-4, device=device)
    results.append({'model': name, 'train_mse': train_mse, 'val_mse': val_mse, 'epochs': ep})

# 6) 결과 표 출력
df_res = pd.DataFrame(results)
print('\nValidation results:')
print(df_res)

# 저장
df_res.to_csv('gnn_validation_results.csv', index=False)
print('\nSaved results -> gnn_validation_results.csv')


device -> cpu

Training GCN_AE...
Epoch 6/30 train_loss=5392064.500000
Epoch 12/30 train_loss=3788205.750000
Epoch 18/30 train_loss=2984600.000000
Epoch 24/30 train_loss=2909980.500000
Epoch 30/30 train_loss=3009859.000000

Training SAGE_AE...
Epoch 6/30 train_loss=1438623.625000
Epoch 12/30 train_loss=684744.750000
Epoch 18/30 train_loss=473067.062500
Epoch 24/30 train_loss=72396.062500
Epoch 30/30 train_loss=86276.835938

Training GAT_AE...
Epoch 6/30 train_loss=3432553.500000
Epoch 12/30 train_loss=4622112.000000
Epoch 18/30 train_loss=3062227.750000
Epoch 24/30 train_loss=3727653.500000
Epoch 30/30 train_loss=3639956.500000

Training PaperGAT_AE...
Epoch 10/50 train_loss=5908890.000000
Epoch 20/50 train_loss=3022080.000000
Epoch 30/50 train_loss=3016367.750000
Epoch 40/50 train_loss=4431187.500000
Epoch 50/50 train_loss=2900788.250000

Validation results:
         model     train_mse       val_mse  epochs
0       GCN_AE  3.006879e+06  1.778398e+06      30
1      SAGE_AE  6.122143e+

In [20]:
# 모델 폴더 생성 및 학습된 모델 저장
import os
os.makedirs('models', exist_ok=True)

for name, model in models.items():
    save_path = os.path.join('models', f"{name}.pth")
    try:
        torch.save(model.state_dict(), save_path)
        print(f"Saved {name} -> {save_path}")
    except Exception as e:
        print(f"Failed to save {name}: {e}")

# 전체 모델 객체도 함께 저장(선택)
for name, model in models.items():
    save_path_obj = os.path.join('models', f"{name}_obj.pth")
    try:
        torch.save(model, save_path_obj)
        print(f"Saved object {name} -> {save_path_obj}")
    except Exception as e:
        print(f"Failed to save object {name}: {e}")



Saved GCN_AE -> models\GCN_AE.pth
Saved SAGE_AE -> models\SAGE_AE.pth
Saved GAT_AE -> models\GAT_AE.pth
Saved PaperGAT_AE -> models\PaperGAT_AE.pth
Saved object GCN_AE -> models\GCN_AE_obj.pth
Saved object SAGE_AE -> models\SAGE_AE_obj.pth
Saved object GAT_AE -> models\GAT_AE_obj.pth
Saved object PaperGAT_AE -> models\PaperGAT_AE_obj.pth
