In [4]:
import sys
sys.path.append("/Users/seinkim/bigdas/code/gnn_with_rwr_centrality")  # 또는 프로젝트 루트 경로로 직접 지정

In [5]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.metrics import roc_auc_score, average_precision_score
from torch_geometric.nn import GCNConv
from torch_geometric.utils import from_networkx
from torch_geometric.transforms import RandomLinkSplit
from data.load_dataset import load_graph

In [7]:
class GCNEncoder(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(GCNEncoder, self).__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, hidden_dim)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x

# ✅ 디코더 (dot product)
def decode(z, edge_index):
    return (z[edge_index[0]] * z[edge_index[1]]).sum(dim=1)

In [8]:
results = []

# ✅ 그래프 및 라벨 로딩
G, id2idx = load_graph()
data = from_networkx(G)
data.train_mask = data.val_mask = data.test_mask = None

# ✅ 링크 예측용 데이터 분할
transform = RandomLinkSplit(is_undirected=True, add_negative_train_samples=True)
train_data, val_data, test_data = transform(data)

In [11]:
attr_dir = "/Users/seinkim/bigdas/code/gnn_with_rwr_centrality/attributes/generated_50_cora" 

In [12]:

files = sorted([f for f in os.listdir(attr_dir) if f.endswith(".npy")])

print(f"총 {len(files)}개의 속성 조합에 대해 Link Prediction 실험을 시작합니다...")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ✅ 모든 속성 조합에 대해 실험 반복
for fname in tqdm(files):
    attr = np.load(os.path.join(attr_dir, fname))
    x = torch.tensor(attr, dtype=torch.float)

    # 속성 적용
    train_data.x = x.to(device)
    val_data.x = x.to(device)
    test_data.x = x.to(device)

    # ✅ 모델 선언
    model = GCNEncoder(input_dim=x.size(1), hidden_dim=64).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.01)

    # 학습
    model.train()
    for epoch in range(201):
        optimizer.zero_grad()
        z = model(train_data.x, train_data.edge_index)
        score = decode(z, train_data.edge_label_index)
        labels = train_data.edge_label.to(device).float()
        # print(train_data.x.shape, train_data.edge_index.shape, train_data.edge_label, labels.shape)
        loss = F.binary_cross_entropy_with_logits(score, labels)
        loss.backward()
        optimizer.step()


    # 평가
    model.eval()
    with torch.no_grad():
        z = model(test_data.x, test_data.edge_index)
        # test_score = decode(z, test_data.edge_label_index).sigmoid().cpu().numpy()
        test_score = decode(z, test_data.edge_label_index).cpu().numpy()

        test_labels = test_data.edge_label.cpu().numpy()

        auc = roc_auc_score(test_labels, test_score)
        ap = average_precision_score(test_labels, test_score)


    # ✅ 결과 저장
    results.append({
        "Attribute File": fname,
        "ROC AUC": round(auc, 4),
        "Average Precision": round(ap, 4)
    })

총 50개의 속성 조합에 대해 Link Prediction 실험을 시작합니다...


100%|██████████| 50/50 [00:40<00:00,  1.24it/s]


NameError: name 'edge_index' is not defined

In [13]:

# ✅ CSV 저장
os.makedirs("./results", exist_ok=True)
df = pd.DataFrame(results)
df.to_csv("./results/link_prediction_generated_sigmoid_test.csv", index=False)
print("모든 실험 완료! 결과는 results/link_prediction_generated_sigmoid_test.csv 에 저장됨.")

모든 실험 완료! 결과는 results/link_prediction_generated_sigmoid_test.csv 에 저장됨.


In [14]:
print(df)

                               Attribute File  ROC AUC  Average Precision
0                        attr_adasim_top1.npy   0.8493             0.8521
1                       attr_adasim_top10.npy   0.8114             0.8178
2                        attr_adasim_top2.npy   0.8470             0.8443
3                        attr_adasim_top3.npy   0.8283             0.8311
4                        attr_adasim_top4.npy   0.8292             0.8292
5                        attr_adasim_top5.npy   0.8130             0.8219
6                        attr_adasim_top6.npy   0.8174             0.8176
7                        attr_adasim_top7.npy   0.8103             0.8160
8                        attr_adasim_top8.npy   0.8049             0.8108
9                        attr_adasim_top9.npy   0.8058             0.8107
10                      attr_jaccard_top1.npy   0.8521             0.8571
11                     attr_jaccard_top10.npy   0.8406             0.8497
12                      attr_jaccard_t

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.metrics import roc_auc_score, average_precision_score
from torch_geometric.nn import GCNConv
from torch_geometric.utils import from_networkx
from torch_geometric.transforms import RandomLinkSplit
from data.load_dataset import load_graph

# ✅ GCN 인코더 정의
class GCNEncoder(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(GCNEncoder, self).__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, hidden_dim)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x

# MLP 기반 디코더 정의 (element-wise 곱 → MLP)
class LinkPredictor(nn.Module):
    def __init__(self, in_channels, hidden_channels=64, num_layers=2, dropout=0.3):
        super(LinkPredictor, self).__init__()
        self.lins = nn.ModuleList()
        self.lins.append(nn.Linear(in_channels, hidden_channels))
        for _ in range(num_layers - 2):
            self.lins.append(nn.Linear(hidden_channels, hidden_channels))
        self.lins.append(nn.Linear(hidden_channels, 1))
        self.dropout = dropout

    def forward(self, x_i, x_j):
        x = x_i * x_j  # element-wise product
        for lin in self.lins[:-1]:
            x = lin(x)
            x = F.relu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.lins[-1](x)
        return torch.sigmoid(x).squeeze()

# ✅ 데이터 로딩
G, id2idx = load_graph()
data = from_networkx(G)
data.train_mask = data.val_mask = data.test_mask = None
transform = RandomLinkSplit(is_undirected=True, add_negative_train_samples=True)
train_data, val_data, test_data = transform(data)

# ✅ 실험 준비
attr_dir = "/Users/seinkim/bigdas/code/gnn_with_rwr_centrality/attributes/generated_50"
files = sorted([f for f in os.listdir(attr_dir) if f.endswith(".npy")])
print(f"총 {len(files)}개의 속성 조합에 대해 Link Prediction 실험을 시작합니다...")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
results = []

# ✅ 모든 속성 조합에 대해 반복
for fname in tqdm(files):
    attr = np.load(os.path.join(attr_dir, fname))
    x = torch.tensor(attr, dtype=torch.float)

    train_data.x = val_data.x = test_data.x = x.to(device)

    model = GCNEncoder(input_dim=x.size(1), hidden_dim=64).to(device)
    predictor = LinkPredictor(in_channels=64, hidden_channels=64).to(device)
    optimizer = optim.Adam(list(model.parameters()) + list(predictor.parameters()), lr=0.01)

    학습
    model.train()
    predictor.train()
    for epoch in range(201):
        optimizer.zero_grad()
        z = model(train_data.x, train_data.edge_index)
        src, dst = train_data.edge_label_index
        score = predictor(z[src], z[dst])
        labels = train_data.edge_label.to(device).float()
        loss = F.binary_cross_entropy(score, labels)
        loss.backward()
        optimizer.step()


    # # 학습
    # model.train()
    # predictor.train()
    # for epoch in range(201):
    #     optimizer.zero_grad()
    #     z = model(train_data.x, train_data.edge_index)

    #     src, dst = train_data.edge_label_index
    #     score = predictor(z[src], z[dst])  # 예측된 확률
    #     labels = train_data.edge_label.to(device).float()  # 실제 라벨 (0 또는 1) 

    #     # # ✅ 추가: score 및 labels 확인 (처음 한 번만)
    #     # if epoch == 0:
    #     #     print(f"score shape: {score.shape}, 예시 값: {score[:5].detach().cpu().numpy()}")
    #     #     print(f"labels shape: {labels.shape}, 예시 값: {labels[:5].detach().cpu().numpy()}")

    #     loss = F.binary_cross_entropy(score, labels)
    #     loss.backward()
    #     optimizer.step()


    # 평가
    model.eval()
    predictor.eval()
    with torch.no_grad():
        z = model(test_data.x, test_data.edge_index)
        src, dst = test_data.edge_label_index
        test_score = predictor(z[src], z[dst]).cpu().numpy()
        test_labels = test_data.edge_label.cpu().numpy()
        auc = roc_auc_score(test_labels, test_score)
        ap = average_precision_score(test_labels, test_score)

    results.append({
        "Attribute File": fname,
        "ROC AUC": round(auc, 4),
        "Average Precision": round(ap, 4)
    })

# ✅ 결과 저장
os.makedirs("./results", exist_ok=True)
df = pd.DataFrame(results)
df.to_csv("./results/link_prediction_mlp_decoder.csv", index=False)
print("모든 실험 완료! 결과는 results/link_prediction_mlp_decoder.csv 에 저장됨.")


총 50개의 속성 조합에 대해 Link Prediction 실험을 시작합니다...


100%|██████████| 50/50 [02:22<00:00,  2.85s/it]

모든 실험 완료! 결과는 results/link_prediction_mlp_decoder_citeseer.csv 에 저장됨.





In [36]:
print(score)

tensor([0.8645, 0.9625, 0.2859,  ..., 0.3719, 0.2832, 0.3155],
       grad_fn=<SqueezeBackward0>)


In [37]:
print(labels)

tensor([1., 1., 1.,  ..., 0., 0., 0.])


In [43]:
print(score[:100])
print(labels[-10:])

tensor([0.8645, 0.9625, 0.2859, 0.2865, 0.3269, 0.3617, 0.5772, 0.2792, 0.2663,
        0.2670, 0.3145, 0.2799, 0.3848, 0.6394, 0.3652, 0.3074, 0.4501, 0.8342,
        0.3686, 0.6848, 0.9841, 0.2777, 0.3368, 0.8823, 0.9572, 0.5807, 0.6041,
        0.8396, 0.3408, 0.7583, 0.4486, 0.9990, 0.2593, 0.2284, 0.7171, 0.3310,
        0.2618, 0.4220, 0.9925, 0.9963, 0.3372, 0.9464, 0.9984, 0.8470, 0.6580,
        0.2902, 0.9747, 0.2579, 0.5816, 1.0000, 0.3210, 0.2554, 0.5661, 0.2834,
        0.3429, 0.3368, 0.9780, 0.7293, 0.9737, 0.7788, 0.9989, 0.5661, 0.2999,
        0.2652, 0.2796, 0.9483, 0.9847, 1.0000, 0.2614, 0.3104, 0.2494, 0.2846,
        0.3002, 0.2738, 0.9407, 0.5793, 0.9376, 0.2860, 0.2856, 0.2329, 0.6896,
        1.0000, 0.4751, 0.9130, 0.6947, 0.9888, 0.9997, 0.4509, 0.9992, 0.9359,
        0.2575, 0.6556, 0.9879, 0.8238, 0.3549, 0.9997, 0.5021, 0.7063, 0.9256,
        0.2581], grad_fn=<SliceBackward0>)
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])


In [44]:
print(z[src][:100])

tensor([[-0.1533,  0.2771, -0.3394,  ..., -0.1005,  0.0219,  0.0314],
        [-0.3577,  0.6625, -0.8395,  ..., -0.3492,  0.1824,  0.1626],
        [ 0.0325, -0.0257,  0.0020,  ...,  0.0337, -0.0312, -0.0210],
        ...,
        [-0.1475,  0.2774, -0.3938,  ..., -0.1299,  0.0553,  0.0768],
        [-0.3010,  0.6704, -0.8064,  ..., -0.3476,  0.1017,  0.1497],
        [-0.0240,  0.0711, -0.1247,  ..., -0.0225, -0.0248,  0.0077]],
       grad_fn=<SliceBackward0>)


In [46]:
print(z.shape) # shape: [num_edges, 64]

torch.Size([2708, 64])


총 50개의 Citeseer 속성 조합에 대해 Link Prediction 실험을 시작합니다...


100%|██████████| 50/50 [01:40<00:00,  2.01s/it]

모든 실험 완료! 결과는 results/link_prediction_mlp_decoder_citeseer.csv 에 저장됨.





In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.metrics import roc_auc_score, average_precision_score
from torch_geometric.nn import GCNConv
from torch_geometric.utils import from_networkx
from torch_geometric.transforms import RandomLinkSplit
from data.load_dataset import load_graph  # Citeseer용 load_graph로 되어 있어야 함!

# ✅ GCN 인코더 정의
class GCNEncoder(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(GCNEncoder, self).__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, hidden_dim)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x

# ✅ MLP 디코더 정의
class LinkPredictor(nn.Module):
    def __init__(self, in_channels, hidden_channels=64, num_layers=2, dropout=0.3):
        super(LinkPredictor, self).__init__()
        self.lins = nn.ModuleList()
        self.lins.append(nn.Linear(in_channels, hidden_channels))
        for _ in range(num_layers - 2):
            self.lins.append(nn.Linear(hidden_channels, hidden_channels))
        self.lins.append(nn.Linear(hidden_channels, 1))
        self.dropout = dropout

    def forward(self, x_i, x_j):
        x = x_i * x_j
        for lin in self.lins[:-1]:
            x = lin(x)
            x = F.relu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.lins[-1](x)
        return torch.sigmoid(x).squeeze()

# ✅ Citeseer 데이터 로딩
G, id2idx = load_graph()  # Citeseer 그래프 불러오기
data = from_networkx(G)
data.train_mask = data.val_mask = data.test_mask = None

transform = RandomLinkSplit(is_undirected=True, add_negative_train_samples=True)
train_data, val_data, test_data = transform(data)

# ✅ 속성 경로 (Citeseer용)
attr_dir = "/Users/seinkim/bigdas/code/gnn_with_rwr_centrality/attributes/generated_50_citeseer"
files = sorted([f for f in os.listdir(attr_dir) if f.endswith(".npy")])
print(f"총 {len(files)}개의 Citeseer 속성 조합에 대해 Link Prediction 실험을 시작합니다...")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
results = []

# ✅ 실험 실행
for fname in tqdm(files):
    attr = np.load(os.path.join(attr_dir, fname))
    x = torch.tensor(attr, dtype=torch.float)

    train_data.x = val_data.x = test_data.x = x.to(device)

    model = GCNEncoder(input_dim=x.size(1), hidden_dim=64).to(device)
    predictor = LinkPredictor(in_channels=64).to(device)
    optimizer = optim.Adam(list(model.parameters()) + list(predictor.parameters()), lr=0.01)

    # ✅ 학습
    model.train()
    predictor.train()
    for epoch in range(201):
        optimizer.zero_grad()
        z = model(train_data.x, train_data.edge_index)
        src, dst = train_data.edge_label_index
        score = predictor(z[src], z[dst])
        labels = train_data.edge_label.to(device).float()
        loss = F.binary_cross_entropy(score, labels)
        loss.backward()
        optimizer.step()

    # ✅ 평가
    model.eval()
    predictor.eval()
    with torch.no_grad():
        z = model(test_data.x, test_data.edge_index)
        src, dst = test_data.edge_label_index
        test_score = predictor(z[src], z[dst]).cpu().numpy()
        test_labels = test_data.edge_label.cpu().numpy()
        auc = roc_auc_score(test_labels, test_score)
        ap = average_precision_score(test_labels, test_score)

    results.append({
        "Attribute File": fname,
        "ROC AUC": round(auc, 4),
        "Average Precision": round(ap, 4)
    })

# ✅ 결과 저장
os.makedirs("./results", exist_ok=True)
df = pd.DataFrame(results)
df.to_csv("./results/link_prediction_mlp_decoder_citeseer.csv", index=False)
print("모든 실험 완료! 결과는 results/link_prediction_mlp_decoder_citeseer.csv 에 저장됨.")


In [53]:
### Cora

NameError: name '__file__' is not defined

In [56]:
# GCN-based Link Prediction on Cora Dataset (Custom Edge Features)
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import networkx as nx
import pandas as pd
from sklearn.metrics import roc_auc_score, average_precision_score
from torch.utils.data import DataLoader
from torch_geometric.datasets import Planetoid
from torch_geometric.utils import to_networkx, negative_sampling
from torch_geometric.nn import GCNConv

# Set device and seed
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.manual_seed(42)
np.random.seed(42)

# Load Cora dataset
dataset = Planetoid(root='./data', name='Cora')
data = dataset[0].to(device)
G = to_networkx(data, to_undirected=True)

# Compute edge features (centrality + similarity)
def compute_edge_features(G, edge_list):
    centrality = nx.betweenness_centrality(G)
    features = []
    for u, v in edge_list:
        try:
            sim = next(nx.jaccard_coefficient(G, [(u, v)]))[2]
        except:
            sim = 0.0
        features.append([(centrality[u] + centrality[v]) / 2, sim])
    return np.array(features, dtype=np.float32)

# Generate edge feature matrix
edge_list = data.edge_index.cpu().numpy().T
edge_features = compute_edge_features(G, edge_list)
edge_attr = torch.tensor(edge_features, dtype=torch.float).to(device)

# Normalize edge attributes
edge_attr = (edge_attr - edge_attr.min(dim=0)[0]) / (edge_attr.max(dim=0)[0] - edge_attr.min(dim=0)[0] + 1e-15)

# GCN Encoder
class GCNEncoder(nn.Module):
    def __init__(self, in_channels, hidden_channels):
        super().__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)

    def forward(self, x, edge_index):
        x = F.relu(self.conv1(x, edge_index))
        x = self.conv2(x, edge_index)
        return x

# MLP Link Predictor
class LinkPredictor(nn.Module):
    def __init__(self, in_channels, hidden_channels=64):
        super().__init__()
        self.lin1 = nn.Linear(in_channels, hidden_channels)
        self.lin2 = nn.Linear(hidden_channels, 1)

    def forward(self, x_i, x_j):
        x = x_i * x_j
        x = F.relu(self.lin1(x))
        x = self.lin2(x)
        return torch.sigmoid(x)

# Generate positive and negative edges
pos_edge_index = data.edge_index
neg_edge_index = negative_sampling(pos_edge_index, num_nodes=data.num_nodes, num_neg_samples=pos_edge_index.size(1))

# Model setup
model = GCNEncoder(dataset.num_features, 64).to(device)
predictor = LinkPredictor(64).to(device)
optimizer = torch.optim.Adam(list(model.parameters()) + list(predictor.parameters()), lr=0.01)

# Training loop
def train():
    model.train()
    predictor.train()
    optimizer.zero_grad()
    z = model(data.x, data.edge_index)

    pos_pred = predictor(z[pos_edge_index[0]], z[pos_edge_index[1]])
    neg_pred = predictor(z[neg_edge_index[0]], z[neg_edge_index[1]])

    # pos_pred = predictor(z[src], z[dst]).squeeze()

    pos_label = torch.ones(pos_pred.size(0), device=device)
    neg_label = torch.zeros(neg_pred.size(0), device=device)

    # loss = F.binary_cross_entropy(torch.cat([pos_pred, neg_pred]), torch.cat([pos_label, neg_label]))
    loss = F.binary_cross_entropy(
        torch.cat([pos_pred, neg_pred]).squeeze(),
        torch.cat([pos_label, neg_label])
)


    loss.backward()
    optimizer.step()
    return loss.item()

# Evaluation
@torch.no_grad()
def evaluate():
    model.eval()
    predictor.eval()
    z = model(data.x, data.edge_index)
    pos_pred = predictor(z[pos_edge_index[0]], z[pos_edge_index[1]])
    neg_pred = predictor(z[neg_edge_index[0]], z[neg_edge_index[1]])
    preds = torch.cat([pos_pred, neg_pred]).cpu()
    labels = torch.cat([torch.ones(pos_pred.size(0)), torch.zeros(neg_pred.size(0))])
    auc = roc_auc_score(labels, preds)
    ap = average_precision_score(labels, preds)
    return auc, ap

# Run training
for epoch in range(1, 201):
    loss = train()
    auc, ap = evaluate()
    if epoch % 10 == 0:
        print(f"Epoch {epoch:03d}, Loss: {loss:.4f}, AUC: {auc:.4f}, AP: {ap:.4f}")

Epoch 010, Loss: 0.6418, AUC: 0.8562, AP: 0.8561
Epoch 020, Loss: 0.3567, AUC: 0.9419, AP: 0.9365
Epoch 030, Loss: 0.2286, AUC: 0.9755, AP: 0.9705
Epoch 040, Loss: 0.1368, AUC: 0.9901, AP: 0.9883
Epoch 050, Loss: 0.0688, AUC: 0.9972, AP: 0.9968
Epoch 060, Loss: 0.0310, AUC: 0.9994, AP: 0.9992
Epoch 070, Loss: 0.0274, AUC: 0.9992, AP: 0.9990
Epoch 080, Loss: 0.0250, AUC: 0.9997, AP: 0.9996
Epoch 090, Loss: 0.0111, AUC: 0.9999, AP: 0.9999
Epoch 100, Loss: 0.0049, AUC: 1.0000, AP: 1.0000
Epoch 110, Loss: 0.0023, AUC: 1.0000, AP: 1.0000
Epoch 120, Loss: 0.0012, AUC: 1.0000, AP: 1.0000
Epoch 130, Loss: 0.0007, AUC: 1.0000, AP: 1.0000
Epoch 140, Loss: 0.0005, AUC: 1.0000, AP: 1.0000
Epoch 150, Loss: 0.0004, AUC: 1.0000, AP: 1.0000
Epoch 160, Loss: 0.0003, AUC: 1.0000, AP: 1.0000
Epoch 170, Loss: 0.0002, AUC: 1.0000, AP: 1.0000
Epoch 180, Loss: 0.0002, AUC: 1.0000, AP: 1.0000
Epoch 190, Loss: 0.0002, AUC: 1.0000, AP: 1.0000
Epoch 200, Loss: 0.0001, AUC: 1.0000, AP: 1.0000


### 실험
#### Cora, Citeseer 그래프에서 Link Prediction task를 수행하는 실험
- Citeseer 데이터셋에 대해 다양한 노드 속성(attribute) 벡터를 불러와서,
- GCN 인코더 + MLP 디코더 구조로 학습한 후,
- 링크 예측 성능(AUC, AP)을 측정하고 .csv로 저장

In [58]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.metrics import roc_auc_score, average_precision_score
from torch_geometric.nn import GCNConv
from torch_geometric.utils import from_networkx
from torch_geometric.transforms import RandomLinkSplit
from data.load_dataset import load_graph

# ✅ GCN 인코더 정의
class GCNEncoder(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(GCNEncoder, self).__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, hidden_dim)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x

# MLP 기반 디코더 정의 (element-wise 곱 → MLP)
class LinkPredictor(nn.Module):
    '''
    z: 모든 노드에 대한 임베딩 벡터들을 담고 있는 행렬
    만약 그래프에 N개의 노드가 있고, hidden_dim=64이면
    → z의 shape은 [N, 64]가 돼 ex) cora dataset의 z shape은 [2708, 64] 

    z[i] * z[j] 형태의 element-wise 곱을 받아서 MLP로 edge 존재 여부(0~1 확률)를 예측하는 구조
    '''
    def __init__(self, in_channels, hidden_channels=64, num_layers=2, dropout=0.3):
        super(LinkPredictor, self).__init__()
        self.lins = nn.ModuleList()
        self.lins.append(nn.Linear(in_channels, hidden_channels))
        for _ in range(num_layers - 2):
            self.lins.append(nn.Linear(hidden_channels, hidden_channels))
        self.lins.append(nn.Linear(hidden_channels, 1))
        self.dropout = dropout

    def forward(self, x_i, x_j):
        x = x_i * x_j  # element-wise product
        for lin in self.lins[:-1]:
            x = lin(x)
            x = F.relu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.lins[-1](x)
        return torch.sigmoid(x).squeeze()

# ✅ 데이터 로딩
G, id2idx = load_graph()
data = from_networkx(G)
data.train_mask = data.val_mask = data.test_mask = None
transform = RandomLinkSplit(is_undirected=True, add_negative_train_samples=True)
train_data, val_data, test_data = transform(data) # train/val/test 분리

# ✅ 실험 준비
attr_dir = "/Users/seinkim/bigdas/code/gnn_with_rwr_centrality/attributes/generated_50_cora"  # 속성 조합이 저장된 디렉토리
files = sorted([f for f in os.listdir(attr_dir) if f.endswith(".npy")])
print(f"총 {len(files)}개의 속성 조합에 대해 Link Prediction 실험을 시작합니다...")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
results = []

# ✅ 모든 속성 조합에 대해 반복
for fname in tqdm(files):
    attr = np.load(os.path.join(attr_dir, fname))
    x = torch.tensor(attr, dtype=torch.float)

    train_data.x = val_data.x = test_data.x = x.to(device)

    model = GCNEncoder(input_dim=x.size(1), hidden_dim=64).to(device)
    predictor = LinkPredictor(in_channels=64, hidden_channels=64).to(device)
    optimizer = optim.Adam(list(model.parameters()) + list(predictor.parameters()), lr=0.01)

    model.train()
    predictor.train()
    for epoch in range(201):
        optimizer.zero_grad()
        z = model(train_data.x, train_data.edge_index)
        src, dst = train_data.edge_label_index
        score = predictor(z[src], z[dst])
        labels = train_data.edge_label.to(device).float()
        loss = F.binary_cross_entropy(score, labels)
        loss.backward()
        optimizer.step()

    # 평가
    model.eval()
    predictor.eval()
    with torch.no_grad():
        z = model(test_data.x, test_data.edge_index)
        src, dst = test_data.edge_label_index
        test_score = predictor(z[src], z[dst]).cpu().numpy()
        test_labels = test_data.edge_label.cpu().numpy()
        auc = roc_auc_score(test_labels, test_score)
        ap = average_precision_score(test_labels, test_score)

    results.append({
        "Attribute File": fname,
        "ROC AUC": round(auc, 4),
        "Average Precision": round(ap, 4)
    })

# ✅ 결과 저장
os.makedirs("./results", exist_ok=True)
df = pd.DataFrame(results)
df.to_csv("./results/link_prediction_mlp_decoder_generated_50_cora.csv", index=False)
print("모든 실험 완료! 결과는 results/link_prediction_mlp_decoder_generated_50_cora.csv 에 저장됨.")


총 50개의 속성 조합에 대해 Link Prediction 실험을 시작합니다...


100%|██████████| 50/50 [01:23<00:00,  1.67s/it]

모든 실험 완료! 결과는 results/link_prediction_mlp_decoder_generated_50_cora.csv 에 저장됨.





In [66]:
?


IPython -- An enhanced Interactive Python

IPython offers a fully compatible replacement for the standard Python
interpreter, with convenient shell features, special commands, command
history mechanism and output results caching.

At your system command line, type 'ipython -h' to see the command line
options available. This document only describes interactive features.

GETTING HELP
------------

Within IPython you have various way to access help:

  ?         -> Introduction and overview of IPython's features (this screen).
  object?   -> Details about 'object'.
  object??  -> More detailed, verbose information about 'object'.
  %quickref -> Quick reference of all IPython specific syntax and magics.
  help      -> Access Python's own help system.

If you are in terminal IPython you can quit this screen by pressing `q`.


MAIN FEATURES
-------------

* Access to the standard Python help with object docstrings and the Python
  manuals. Simply type 'help' (no quotes) to invoke it.

* Ma