In [5]:
import os.path as osp

import torch
from sklearn.metrics import roc_auc_score

import torch_geometric.transforms as T
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GCNConv
from torch_geometric.utils import negative_sampling

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
transform = T.Compose([
    T.NormalizeFeatures(),
    T.ToDevice(device),
    T.RandomLinkSplit(num_val=0.05, num_test=0.1, is_undirected=True,
                      add_negative_train_samples=False),
])

In [3]:
path = osp.join(osp.dirname(osp.realpath(".")), '..', 'data', 'Planetoid')
dataset = Planetoid(path, name='Cora', transform=transform)
# After applying the `RandomLinkSplit` transform, the data is transformed from
# a data object to a list of tuples (train_data, val_data, test_data), with
# each element representing the corresponding split.
train_data, val_data, test_data = dataset[0]

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


In [45]:
train_data

Data(x=[2708, 1433], edge_index=[2, 8976], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708], edge_label=[4488], edge_label_index=[2, 4488])

In [6]:
def add_edge_labels(graph):
    transform = T.RandomLinkSplit(num_val=0.05, num_test=0.1, is_undirected=True,
                      add_negative_train_samples=False)
    return transform(graph)

In [7]:
labeled_graphs = [add_edge_labels(graph) for graph in data_list]

In [62]:
labeled_graphs[0]

(Data(x=[101, 2], edge_index=[2, 1308], y=[101, 101], pos=[101, 2], edge_label=[654], edge_label_index=[2, 654]),
 Data(x=[101, 2], edge_index=[2, 1308], y=[101, 101], pos=[101, 2], edge_label=[76], edge_label_index=[2, 76]),
 Data(x=[101, 2], edge_index=[2, 1384], y=[101, 101], pos=[101, 2], edge_label=[152], edge_label_index=[2, 152]))

In [65]:
for batch in train_loader:
    print(batch)

DataBatch(x=[2020, 2], edge_index=[2, 30300], y=[2020, 101], pos=[2020, 2], batch=[2020], ptr=[21])
DataBatch(x=[2020, 2], edge_index=[2, 30300], y=[2020, 101], pos=[2020, 2], batch=[2020], ptr=[21])
DataBatch(x=[2020, 2], edge_index=[2, 30300], y=[2020, 101], pos=[2020, 2], batch=[2020], ptr=[21])
DataBatch(x=[2020, 2], edge_index=[2, 30300], y=[2020, 101], pos=[2020, 2], batch=[2020], ptr=[21])
DataBatch(x=[2020, 2], edge_index=[2, 30300], y=[2020, 101], pos=[2020, 2], batch=[2020], ptr=[21])
DataBatch(x=[2020, 2], edge_index=[2, 30300], y=[2020, 101], pos=[2020, 2], batch=[2020], ptr=[21])
DataBatch(x=[2020, 2], edge_index=[2, 30300], y=[2020, 101], pos=[2020, 2], batch=[2020], ptr=[21])
DataBatch(x=[2020, 2], edge_index=[2, 30300], y=[2020, 101], pos=[2020, 2], batch=[2020], ptr=[21])
DataBatch(x=[2020, 2], edge_index=[2, 30300], y=[2020, 101], pos=[2020, 2], batch=[2020], ptr=[21])
DataBatch(x=[2020, 2], edge_index=[2, 30300], y=[2020, 101], pos=[2020, 2], batch=[2020], ptr=[21])


In [10]:
class Net(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def encode(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        return self.conv2(x, edge_index)

    def decode(self, z, edge_label_index):
        return (z[edge_label_index[0]] * z[edge_label_index[1]]).sum(dim=-1)

    def decode_all(self, z):
        prob_adj = z @ z.t()
        return (prob_adj > 0).nonzero(as_tuple=False).t()

In [55]:
model = Net(data_list[0].num_features, 128, 64).to(device)
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.01)
criterion = torch.nn.BCEWithLogitsLoss()

In [12]:
import numpy as np

In [56]:
def train(loader):
    model.train()
    total_loss = 0

    for batch in loader:
        optimizer.zero_grad()
        z = model.encode(batch.x, batch.edge_index)

        # We perform a new round of negative sampling for every training epoch:
        neg_edge_index = negative_sampling(
            edge_index=batch.edge_index, num_nodes=batch.num_nodes,
            num_neg_samples=batch.edge_label_index.size(1), method='sparse')

        # Concat positive and negative edge indices.
        edge_label_index = torch.cat(
            [batch.edge_label_index, neg_edge_index],
            dim=-1,
        )
        # Label for positive edges: 1, for negative edges: 0.
        edge_label = torch.cat([
            batch.edge_label,
            batch.edge_label.new_zeros(neg_edge_index.size(1))
        ], dim=0)

        # Note: The model is trained in a supervised manner using the given
        # `edge_label_index` and `edge_label` targets.
        out = model.decode(z, edge_label_index).view(-1)
        loss = criterion(out, edge_label)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    return total_loss / len(loader)


@torch.no_grad()
def test(loader):
    model.eval()
    all_out = []
    all_labels = []

    for batch in loader:
        z = model.encode(batch.x, batch.edge_index)
        out = model.decode(z, batch.edge_label_index).view(-1).sigmoid()
        all_out.append(out.cpu().numpy())
        all_labels.append(batch.edge_label.cpu().numpy())

    all_out = np.concatenate(all_out)
    all_labels = np.concatenate(all_labels)
    return roc_auc_score(all_labels, all_out)

In [57]:
# Train/Test Loop
best_val_auc = final_test_auc = 0
for epoch in range(1, 101):
    loss = train(train_loader)
    val_auc = test(val_loader)
    test_auc = test(test_loader)
    if val_auc > best_val_auc:
        best_val_auc = val_auc
        final_test_auc = test_auc
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Val: {val_auc:.4f}, '
          f'Test: {test_auc:.4f}')

print(f'Final Test: {final_test_auc:.4f}')

Epoch: 001, Loss: 70.0116, Val: 0.8504, Test: 0.8517
Epoch: 002, Loss: 1.0224, Val: 0.8688, Test: 0.8703
Epoch: 003, Loss: 0.6042, Val: 0.8746, Test: 0.8761
Epoch: 004, Loss: 0.5614, Val: 0.8758, Test: 0.8773
Epoch: 005, Loss: 0.5477, Val: 0.8868, Test: 0.8883
Epoch: 006, Loss: 0.5367, Val: 0.9040, Test: 0.9057
Epoch: 007, Loss: 0.5257, Val: 0.9195, Test: 0.9211
Epoch: 008, Loss: 0.5165, Val: 0.9262, Test: 0.9275
Epoch: 009, Loss: 0.5119, Val: 0.9292, Test: 0.9305
Epoch: 010, Loss: 0.5083, Val: 0.9288, Test: 0.9299
Epoch: 011, Loss: 0.5086, Val: 0.9314, Test: 0.9325
Epoch: 012, Loss: 0.5069, Val: 0.9318, Test: 0.9329
Epoch: 013, Loss: 0.5060, Val: 0.9330, Test: 0.9341
Epoch: 014, Loss: 0.5044, Val: 0.9331, Test: 0.9341
Epoch: 015, Loss: 0.5042, Val: 0.9339, Test: 0.9349
Epoch: 016, Loss: 0.5051, Val: 0.9327, Test: 0.9337
Epoch: 017, Loss: 0.5028, Val: 0.9329, Test: 0.9339
Epoch: 018, Loss: 0.5063, Val: 0.9320, Test: 0.9330
Epoch: 019, Loss: 0.5035, Val: 0.9343, Test: 0.9353
Epoch: 020,

In [95]:


z = model.encode(train_size[0].x, train_size[0].edge_index)
final_edge_index = model.decode_all(z)

In [96]:
z

tensor([[ 0.0975,  0.0965,  0.1179,  ..., -0.2019, -0.0823,  0.2832],
        [ 0.1081, -0.1024,  0.2862,  ...,  0.0125, -0.0514, -0.0633],
        [ 0.0154, -0.0206,  0.2033,  ..., -0.2354, -0.1704,  0.2083],
        ...,
        [ 0.2875, -0.0498, -0.3933,  ..., -0.0443,  0.0819, -0.1261],
        [ 0.0784, -0.0241,  0.2030,  ...,  0.1055,  0.0498, -0.2083],
        [ 0.3449, -0.1079, -0.2666,  ..., -0.3180,  0.0281, -0.0787]],
       grad_fn=<AddBackward0>)

In [112]:
fei = final_edge_index.tolist()
edges_pred = {k:[] for k in range(101)}
edges_pred_inv = {k:[] for k in range(101)}
for i in range(len(fei[0])):
    edges_pred[fei[0][i]].append(fei[1][i])
    edges_pred_inv[fei[1][i]].append(fei[0][i])

In [117]:
ts0 = train_size[0].edge_index.tolist()
edges = {k:[] for k in range(101)}
edges_inv = {k:[] for k in range(101)}
for i in range(len(ts0[0])):
    edges[ts0[0][i]].append(ts0[1][i])
    edges_inv[ts0[1][i]].append(ts0[0][i])

In [118]:
print(edges[0])
print(edges_inv[0])

[72, 68, 22, 88]
[72, 68, 22, 88]


In [119]:
print(len(edges_pred[0]))
print(edges_pred_inv[0])

38
[0, 1, 2, 4, 5, 7, 12, 16, 21, 22, 24, 26, 30, 33, 35, 36, 38, 39, 42, 45, 47, 48, 50, 54, 57, 58, 59, 67, 68, 72, 76, 79, 81, 84, 87, 88, 89, 94]


In [120]:
z_raw = model.encode(data_list[0].x, data_list[0].edge_index)
final_edge_index = model.decode_all(z_raw)

In [121]:
z_raw

tensor([[ 0.0962,  0.0431,  0.1419,  ..., -0.2443, -0.1488,  0.2129],
        [ 0.1395, -0.0889,  0.2966,  ..., -0.0376, -0.0449, -0.0761],
        [-0.0483, -0.1029,  0.2274,  ..., -0.1108, -0.2028,  0.1227],
        ...,
        [ 0.3060, -0.0592, -0.4395,  ..., -0.1580,  0.1210, -0.1487],
        [ 0.0710,  0.0184,  0.2128,  ...,  0.0994,  0.0470, -0.2367],
        [ 0.3488, -0.0906, -0.3668,  ..., -0.2929,  0.0824, -0.1348]],
       grad_fn=<AddBackward0>)

In [122]:
final_edge_index

tensor([[  0,   0,   0,  ..., 100, 100, 100],
        [  0,   1,   2,  ...,  97,  98, 100]])

In [123]:
data_list[0].edge_index

tensor([[ 22,  68,  88,  ...,  97,  92,  53],
        [  0,   0,   0,  ..., 100, 100, 100]])

In [124]:
fei = final_edge_index.tolist()
edges_pred = {k:[] for k in range(101)}
edges_pred_inv = {k:[] for k in range(101)}
for i in range(len(fei[0])):
    edges_pred[fei[0][i]].append(fei[1][i])
    edges_pred_inv[fei[1][i]].append(fei[0][i])

In [125]:
ts0 = data_list[0].edge_index.tolist()
edges = {k:[] for k in range(101)}
edges_inv = {k:[] for k in range(101)}
for i in range(len(ts0[0])):
    edges[ts0[0][i]].append(ts0[1][i])
    edges_inv[ts0[1][i]].append(ts0[0][i])

In [126]:
print(edges[0])
print(edges_inv[0])

[22, 54, 68, 72, 88]
[22, 68, 88, 54, 72, 12, 45, 24, 30, 57, 5, 26, 7, 47, 2]


In [127]:
print(edges_pred[0])
print(edges_pred_inv[0])

[0, 1, 2, 3, 4, 5, 7, 8, 9, 12, 14, 15, 16, 19, 21, 22, 24, 26, 28, 30, 31, 33, 35, 36, 37, 38, 39, 45, 47, 48, 54, 55, 56, 57, 58, 62, 63, 64, 67, 68, 70, 72, 76, 78, 79, 81, 84, 87, 88, 89, 90, 92, 94, 95, 97, 100]
[0, 1, 2, 3, 4, 5, 7, 8, 9, 12, 14, 15, 16, 19, 21, 22, 24, 26, 28, 30, 31, 33, 35, 36, 37, 38, 39, 45, 47, 48, 54, 55, 56, 57, 58, 62, 63, 64, 67, 68, 70, 72, 76, 78, 79, 81, 84, 87, 88, 89, 90, 92, 94, 95, 97, 100]


In [134]:
a = {1,3,5}
b = {1,2,3,4,5}
a.intersection(b)

{1, 3, 5}

In [135]:
graph_dict = {}
cnt = 0
for graph in data_list:
    z_raw = model.encode(graph.x, graph.edge_index)
    final_edge_index = model.decode_all(z_raw)
    fei = final_edge_index.tolist()
    edges_pred = {k:[] for k in range(101)}
    edges_pred_inv = {k:[] for k in range(101)}
    for i in range(len(fei[0])):
        edges_pred[fei[0][i]].append(fei[1][i])
        edges_pred_inv[fei[1][i]].append(fei[0][i])
    ts0 = graph.edge_index.tolist()
    edges = {k:[] for k in range(101)}
    edges_inv = {k:[] for k in range(101)}
    for i in range(len(ts0[0])):
        edges[ts0[0][i]].append(ts0[1][i])
        edges_inv[ts0[1][i]].append(ts0[0][i])
    originals = {}
    predictions = {}
    for i in range(101):
        originals[i] = set(edges[i] + edges_inv[i])
        predictions[i] = set(edges_pred[i] + edges_pred_inv[i])
    graph_dict[cnt] = {"real": originals, "preds": predictions}
    cnt += 1

In [148]:
confusion_dict = {}
true_positives = []
false_positives = []
true_negatives = []
false_negatives = []
for key in graph_dict:
    real = graph_dict[key]["real"]
    pred = graph_dict[key]["preds"]
    node_matrix = {}
    for i in range(101):
        tp = len(pred[i].intersection(real[i]))
        fp = len(pred[i] - real[i])
        real_neg = set([j for j in range(101)]) - {i} - real[i]
        pred_neg = set([j for j in range(101)]) - {i} - pred[i]
        tn = len(pred_neg.intersection(real_neg))
        fn = len(pred_neg - real_neg)
        total = tp + fp + tn + fn
        node_matrix[i] = {"tp": tp, "fp": fp, "tn": tn, "fn": fn, "total": total}
        true_positives.append(tp/total)
        false_positives.append(fp/total)
        true_negatives.append(tn/total)
        false_negatives.append(fn/total)
    confusion_dict[key] = node_matrix

In [151]:
from statistics import mean, stdev
print("true positives mean={0:.2f}, stdev={1:.2f}".format(mean(true_positives), stdev(true_positives)))
print("false positives mean={0:.2f}, stdev={1:.2f}".format(mean(false_positives), stdev(false_positives)))
print("true negatives mean={0:.2f}, stdev={1:.2f}".format(mean(true_negatives), stdev(true_negatives)))
print("false negatives mean={0:.2f}, stdev={1:.2f}".format(mean(false_negatives), stdev(false_negatives)))

true positives mean=0.17, stdev=0.03
false positives mean=0.27, stdev=0.11
true negatives mean=0.56, stdev=0.12
false negatives mean=0.00, stdev=0.00


In [145]:
a = graph_dict[0]["real"][0]
b = graph_dict[0]["preds"][0]
len(a.intersection(b))

15

In [58]:
from random import sample
data_raw_dict = {
    "positives": [],
    "negatives": []
}
cnt = 0
pos_cnt = 0
neg_cnt = 0
for graph in data_list:
    neg_cnt = 0
    ng_matrix = graph.y.tolist()
    encoding_matrix = model.encode(graph.x, graph.edge_index).tolist()
    for i in range(101):
        for j in sample(range(101), 30):
            if i == j:
                # if pos_cnt < 80000:
                #     data_raw_dict["positives"].append(encoding_matrix[i]+encoding_matrix[i])
                #     pos_cnt += 1
                continue
            else:
                if ng_matrix[i][j] > 0.5:
                    if pos_cnt < 80000:
                        data_raw_dict["positives"].append(encoding_matrix[i]+encoding_matrix[j]+[1])
                        pos_cnt += 1
                else:
                    if pos_cnt < 80000 and neg_cnt < 1010:
                        data_raw_dict["negatives"].append(encoding_matrix[i]+encoding_matrix[j]+[0])
                        neg_cnt += 1
    

In [69]:
pre_tensor = data_raw_dict["positives"][:5000] + data_raw_dict["negatives"][:5000]
main_tensor = torch.tensor(pre_tensor, dtype=torch.float32)


In [70]:
main_tensor

tensor([[-0.1596, -0.1800, -0.2464,  ...,  0.1065,  0.0275,  1.0000],
        [ 0.1581,  0.1355,  0.1712,  ..., -0.0182, -0.0102,  1.0000],
        [-0.0444, -0.1654, -0.1269,  ..., -0.1918, -0.1444,  1.0000],
        ...,
        [-0.0811, -0.2015, -0.0910,  ..., -0.2468, -0.1751,  0.0000],
        [-0.0811, -0.2015, -0.0910,  ...,  0.1364,  0.2510,  0.0000],
        [-0.0811, -0.2015, -0.0910,  ..., -0.2437, -0.1129,  0.0000]])

In [71]:
main_tensor = main_tensor[torch.randperm(main_tensor.size()[0])]

In [72]:
labels = main_tensor[:,-1:]

In [73]:
embeddings = main_tensor[:,:-1]

In [39]:

import torch.nn as nn

In [64]:
class Wide(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(128, 64)
        self.relu = nn.ReLU()
        self.output = nn.Linear(64, 1)
        self.sigmoid = nn.Sigmoid()
 
    def forward(self, x):
        x = self.relu(self.hidden(x))
        x = self.sigmoid(self.output(x))
        return x

In [65]:

class Deep(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Linear(128, 128)
        self.act1 = nn.ReLU()
        self.layer2 = nn.Linear(128, 128)
        self.act2 = nn.ReLU()
        self.layer3 = nn.Linear(128, 128)
        self.act3 = nn.ReLU()
        self.output = nn.Linear(128, 1)
        self.sigmoid = nn.Sigmoid()
 
    def forward(self, x):
        x = self.act1(self.layer1(x))
        x = self.act2(self.layer2(x))
        x = self.act3(self.layer3(x))
        x = self.sigmoid(self.output(x))
        return x

In [66]:
import copy
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import tqdm
 
def model_train(model, X_train, y_train, X_val, y_val):
    # loss function and optimizer
    loss_fn = nn.BCELoss()  # binary cross entropy
    optimizer = optim.Adam(model.parameters(), lr=0.0001)
 
    n_epochs = 20   # number of epochs to run
    batch_size = 10  # size of each batch
    batch_start = torch.arange(0, len(X_train), batch_size)
 
    # Hold the best model
    best_acc = - np.inf   # init to negative infinity
    best_weights = None
 
    for epoch in range(n_epochs):
        model.train()
        with tqdm.tqdm(batch_start, unit="batch", mininterval=0, disable=True) as bar:
            bar.set_description(f"Epoch {epoch}")
            for start in bar:
                # take a batch
                X_batch = X_train[start:start+batch_size]
                y_batch = y_train[start:start+batch_size]
                # forward pass
                y_pred = model(X_batch)
                loss = loss_fn(y_pred, y_batch)
                # backward pass
                optimizer.zero_grad()
                loss.backward()
                # update weights
                optimizer.step()
                # print progress
                acc = (y_pred.round() == y_batch).float().mean()
                bar.set_postfix(
                    loss=float(loss),
                    acc=float(acc)
                )
        # evaluate accuracy at end of each epoch
        model.eval()
        y_pred = model(X_val)
        acc = (y_pred.round() == y_val).float().mean()
        acc = float(acc)
        if acc > best_acc:
            best_acc = acc
            best_weights = copy.deepcopy(model.state_dict())
    # restore model and return best accuracy
    model.load_state_dict(best_weights)
    return best_acc

In [74]:

from sklearn.model_selection import StratifiedKFold, train_test_split

# train-test split: Hold out the test set for final model evaluation
X_train, X_test, y_train, y_test = train_test_split(embeddings, labels, train_size=0.7, shuffle=True)
 
# define 5-fold cross validation test harness
kfold = StratifiedKFold(n_splits=5, shuffle=True)
cv_scores_wide = []
for train, test in kfold.split(X_train, y_train):
    # create model, train, and get accuracy
    model = Wide()
    acc = model_train(model, X_train[train], y_train[train], X_train[test], y_train[test])
    print("Accuracy (wide): %.2f" % acc)
    cv_scores_wide.append(acc)
cv_scores_deep = []
for train, test in kfold.split(X_train, y_train):
    # create model, train, and get accuracy
    model = Deep()
    acc = model_train(model, X_train[train], y_train[train], X_train[test], y_train[test])
    print("Accuracy (deep): %.2f" % acc)
    cv_scores_deep.append(acc)
 
# evaluate the model
wide_acc = np.mean(cv_scores_wide)
wide_std = np.std(cv_scores_wide)
deep_acc = np.mean(cv_scores_deep)
deep_std = np.std(cv_scores_deep)
print("Wide: %.2f%% (+/- %.2f%%)" % (wide_acc*100, wide_std*100))
print("Deep: %.2f%% (+/- %.2f%%)" % (deep_acc*100, deep_std*100))

Accuracy (wide): 0.94
Accuracy (wide): 0.95
Accuracy (wide): 0.94
Accuracy (wide): 0.94
Accuracy (wide): 0.94
Accuracy (deep): 0.95
Accuracy (deep): 0.95
Accuracy (deep): 0.95
Accuracy (deep): 0.96
Accuracy (deep): 0.95
Wide: 94.21% (+/- 0.52%)
Deep: 95.26% (+/- 0.37%)


In [75]:

# rebuild model with full set of training data
if wide_acc > deep_acc:
    print("Retrain a wide model")
    model = Wide()
else:
    print("Retrain a deep model")
    model = Deep()
acc = model_train(model, X_train, y_train, X_test, y_test)
print(f"Final model accuracy: {acc*100:.2f}%")

Retrain a deep model
Final model accuracy: 95.03%


In [76]:

model.eval()
with torch.no_grad():
    # Test out inference with 5 samples
    for i in range(5):
        y_pred = model(X_test[i:i+1])
        print(f"{X_test[i].numpy()} -> {y_pred[0].numpy()} (expected {y_test[i].numpy()})")

[ 0.14186503  0.06852816  0.13882583  0.18554422  0.01259524  0.17407712
 -0.13004386 -0.05792312  0.2794271  -0.02302442  0.23547888  0.2619889
  0.03080954  0.01906595  0.12538412 -0.05007584 -0.1969449   0.26221353
 -0.14023745 -0.17518337 -0.25760165  0.32017156  0.19400236 -0.21908599
  0.24090998 -0.01987933  0.14625137  0.11163279  0.13615549  0.0862607
  0.08827582 -0.05215658 -0.00360495  0.01262195 -0.0599436   0.44187227
  0.1084906  -0.03968846 -0.19452572  0.01262102  0.29458293  0.05522395
  0.00714007  0.3330657  -0.02873032  0.05573579 -0.09651121 -0.15342638
 -0.05340988 -0.1161788   0.12370549  0.0268348  -0.10383974 -0.15232193
 -0.0115384  -0.11657755  0.30440044 -0.19080487  0.27328292  0.0676147
 -0.03920259  0.00676232  0.03846721  0.02173189  0.03840839 -0.03786739
 -0.5602042   0.10600065 -0.01190336 -0.42612877  0.01708128  0.05423341
  0.38217825  0.10992724 -0.20942265 -0.21930662  0.02979036 -0.3321424
  0.03305952 -0.00558868  0.2711308  -0.12160604  0.174

now a chatgpt example

In [15]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class GNNEncoder(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GNNEncoder, self).__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, output_dim)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x

In [16]:
class DotProductDecoder(nn.Module):
    def __init__(self):
        super(DotProductDecoder, self).__init__()

    def forward(self, z):
        adj_pred = torch.sigmoid(torch.matmul(z, z.t()))
        return adj_pred

In [17]:
class GraphAutoencoder(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GraphAutoencoder, self).__init__()
        self.encoder = GNNEncoder(input_dim, hidden_dim, output_dim)
        self.decoder = DotProductDecoder()

    def forward(self, x, edge_index):
        z = self.encoder(x, edge_index)
        adj_pred = self.decoder(z)
        return adj_pred, z

In [1]:
from math import sqrt
import torch
from torch_geometric.data import Data
from random import randint
from sys import float_info

instances = {}
for k in range(0, 1000):
    nodes = {}
    for i in range(0, 50):
        lat_i = randint(0, 100)
        lon_i = randint(0, 100)
        node_i = (lat_i, lon_i)
        lat_j = randint(0, 100)
        lon_j = randint(0, 100)
        node_j = (lat_j, lon_j)
        nodes[i + 1] = node_i
        nodes[i + 51] = node_j

    dist = {}
    pairs = {}
    for i in range(1, 101):
        for j in range(1, 101):
            if i != j:
                dist[i,j] = sqrt( (nodes[i][0] - nodes[j][0])**2 + (nodes[i][1] - nodes[j][1])**2 )
            else:
                dist[i,j] = float_info.max
    for i in range(1, 101):
        for j in range(1, 101):
            if i not in pairs:
                pairs[i] = j
            if i != j:
                if dist[i,j] < dist[i,pairs[i]]:
                    pairs[i] = j

    nodes[0] = (0,0)
    for i in range(1,101):
        dist[0,i] = sqrt( (nodes[0][0] - nodes[i][0])**2 + (nodes[0][1] - nodes[i][1])**2 )
        dist[i,0] = dist[0,i]
    y = [[0 for _ in range(101)] for _ in range(101)]
    for i in range(101):
        if i > 0:
            y[i][pairs[i]] = 1
                
    instances[k] = {"nodes": nodes, "dist": dist, "y": y}

from torch_geometric.nn import knn_graph
data_list = []
for instance_name in instances:
    y = torch.tensor(instances[instance_name]["y"], dtype=torch.float)
    x = torch.tensor([instances[instance_name]["nodes"][i] for i in range(0, 101)], dtype=torch.float)
    pos = []
    for i in range(101):
        pos.append(instances[instance_name]["nodes"][i])
    pos = torch.tensor(pos, dtype=torch.double)
    # ## filtering by TW, strict
    # complete_graph_list = []
    # for i in range(101):
    #     for j in range(101):
    #         if i!=j:
    #             try:
    #                 if instance_dict[instance_name][i][5] + instance_dict[instance_name][i][6] + loc_dict[i][j] < instance_dict[instance_name][i][5]:
    #                     complete_graph_list.append([i,j])
    #             except:
    #                 continue
    # edge_index = torch.tensor(complete_graph_list, dtype=torch.double).t().contiguous()
    ## end filtering
    data_list.append(Data(x=x, y=y, edge_index = knn_graph(x, 15), pos=pos))

In [2]:
from torch_geometric.loader import DataLoader

In [89]:
labeled_graphs[0]

(Data(x=[101, 2], edge_index=[2, 1308], y=[101, 101], pos=[101, 2], edge_label=[654], edge_label_index=[2, 654]),
 Data(x=[101, 2], edge_index=[2, 1308], y=[101, 101], pos=[101, 2], edge_label=[76], edge_label_index=[2, 76]),
 Data(x=[101, 2], edge_index=[2, 1384], y=[101, 101], pos=[101, 2], edge_label=[152], edge_label_index=[2, 152]))

In [90]:
from torch_geometric.data import Batch

def custom_collate(data_list):
    batch = Batch.from_data_list(data_list)

    # Manually handle edge_label and edge_label_index if they exist in the data_list
    if hasattr(data_list[0], 'edge_label_index'):
        edge_label_index_list = [data.edge_label_index for data in data_list]
        batch.edge_label_index = torch.cat(edge_label_index_list, dim=1)
        
    if hasattr(data_list[0], 'edge_label'):
        edge_label_list = [data.edge_label for data in data_list]
        batch.edge_label = torch.cat(edge_label_list, dim=0)
        
    return batch

In [8]:
train_size = [g[0] for g in labeled_graphs]
val_size = [g[1] for g in labeled_graphs]
test_size = [g[2] for g in labeled_graphs]

In [9]:

train_loader = DataLoader(train_size, batch_size=20, shuffle=True)
val_loader = DataLoader(val_size, batch_size=20, shuffle=False)
test_loader = DataLoader(test_size, batch_size=20, shuffle=False)

In [93]:
for batch in train_loader:
    print(batch)

DataBatch(x=[2020, 2], edge_index=[2, 25794], y=[2020, 101], pos=[2020, 2], edge_label=[12897], edge_label_index=[2, 12897], batch=[2020], ptr=[21])
DataBatch(x=[2020, 2], edge_index=[2, 25712], y=[2020, 101], pos=[2020, 2], edge_label=[12856], edge_label_index=[2, 12856], batch=[2020], ptr=[21])
DataBatch(x=[2020, 2], edge_index=[2, 25696], y=[2020, 101], pos=[2020, 2], edge_label=[12848], edge_label_index=[2, 12848], batch=[2020], ptr=[21])
DataBatch(x=[2020, 2], edge_index=[2, 25478], y=[2020, 101], pos=[2020, 2], edge_label=[12739], edge_label_index=[2, 12739], batch=[2020], ptr=[21])
DataBatch(x=[2020, 2], edge_index=[2, 25672], y=[2020, 101], pos=[2020, 2], edge_label=[12836], edge_label_index=[2, 12836], batch=[2020], ptr=[21])
DataBatch(x=[2020, 2], edge_index=[2, 25806], y=[2020, 101], pos=[2020, 2], edge_label=[12903], edge_label_index=[2, 12903], batch=[2020], ptr=[21])
DataBatch(x=[2020, 2], edge_index=[2, 25796], y=[2020, 101], pos=[2020, 2], edge_label=[12898], edge_label

In [29]:
import torch.optim as optim
from torch_geometric.utils import to_dense_adj

# Assume `data_list` contains multiple graphs with `x`, `edge_index` attributes.
# data_list = [...] 

input_dim = 2
hidden_dim = 64
output_dim = 32
model = GraphAutoencoder(input_dim, hidden_dim, output_dim)
optimizer = optim.Adam(model.parameters(), lr=0.01)
criterion = nn.BCELoss()

for epoch in range(50):
    model.train()
    total_loss = 0
    for data in loader:
        # optimizer.zero_grad()
        adj_dense = to_dense_adj(data.edge_index, max_num_nodes=data.num_nodes)[0]
        adj_dense = adj_dense.view(-1)
        adj_pred, _ = model(data.x, data.edge_index)
        loss = criterion(adj_pred.view(-1), adj_dense)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f'Epoch {epoch}, Loss: {total_loss / len(data_list)}')


Epoch 0, Loss: 1.9940592956542968
Epoch 1, Loss: 1.9940592956542968
Epoch 2, Loss: 1.9940592956542968


KeyboardInterrupt: 