<a href="https://colab.research.google.com/github/sourav-gupta0/Preprocessing-Automations/blob/main/GNN_CF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q torch-scatter -f https://pytorch-geometric.com/whl/torch-1.9.0+cu102.html
!pip install -q torch-sparse -f https://pytorch-geometric.com/whl/torch-1.9.0+cu102.html
!pip install -q torch-geometric

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/108.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m108.0/108.0 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone


In [None]:
# hyperparameter
LR = 0.0005
HIDDEN_LAYER = 512
EMBEDDING_DIM = 128
NUM_EPOCH = 500
DATASET = 3

In [None]:
import numpy as np
import torch
from torch_geometric.data import Data
from torch_geometric.utils import from_networkx
import networkx as nx
import pickle

In [None]:
np.set_printoptions(suppress=True)
G = pickle.load(open('/content/drive/MyDrive/MLG/final/data/graph.pickle', 'rb'))
features = pickle.load(open('/content/drive/MyDrive/MLG/final/data/feature.pickle', 'rb'))

In [None]:
data = from_networkx(G)
data.x = torch.tensor(features, dtype=torch.float)
data.weight = torch.tensor([1 if w > 30 else 0 for w in data.weight], dtype=torch.float)
print(data)

Data(edge_index=[2, 801], gender=[186], weight=[801], x=[186, 13])


In [None]:
from torch.nn import Linear, CosineSimilarity, Softmax
from torch_geometric.nn import GCNConv
from torch_geometric.utils import dropout_adj
import torch.nn.functional as F

class Net(torch.nn.Module):
  def __init__(self, hidden_layer=HIDDEN_LAYER, embedding_dim=EMBEDDING_DIM):
    super(Net, self).__init__()
    torch.manual_seed(12345)
    self.classifier = Linear(data.num_features, hidden_layer)
    self.conv1 = GCNConv(hidden_layer, hidden_layer)
    self.conv2 = GCNConv(hidden_layer, hidden_layer)
    self.conv3 = GCNConv(hidden_layer, EMBEDDING_DIM)
    self.similarity = torch.nn.CosineSimilarity(dim=1, eps=1e-6)

  def forward(self, x, train_edges, pred_edges):

    # Encoder
    h = self.classifier(x.cuda())
    h = self.conv1(h, train_edges)
    h = torch.tanh(h)
    dropout_adj(train_edges, p=0.4)
    h = self.conv3(h, train_edges)
    h = torch.tanh(h)
    nodes = torch.squeeze(h)

    # Decoder
    ids_from = pred_edges[0].clone().detach().cuda()
    ids_to = pred_edges[1].clone().detach().cuda()
    preds = self.similarity(torch.index_select(nodes, 0, ids_from), torch.index_select(nodes, 0, ids_to))
    preds = torch.tanh(torch.abs(preds))
    preds = F.normalize(preds,dim=0,p=100)

    return preds

In [None]:
from IPython.display import Javascript  # Restrict height of output cell.
display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 300})'''))

from sklearn.metrics import roc_auc_score, average_precision_score
from sklearn.model_selection import train_test_split

model = Net().cuda()
criterion = torch.nn.BCELoss()
optimizer = torch.optim.Rprop(model.parameters(), lr=LR)
torch.set_printoptions(precision=8, sci_mode=False)

train_size = int(data.edge_index.shape[1]*9/10)
train_data, test_data = data.edge_index[:,:train_size], data.edge_index[:,train_size:]
train_label, test_label = data.weight[:train_size], data.weight[train_size:]

for epoch in range(NUM_EPOCH):
  optimizer.zero_grad()
  train_size = int(train_data.shape[1]*9/10)
  train_edges, test_edges = train_data[:,:train_size], train_data[:,train_size:]
  train_weight, test_weight = train_label[:train_size], train_label[train_size:]
  out = model(data.x, train_edges.cuda(), test_edges)
  label = test_weight.cuda()
  loss = criterion(out, label)

  print(f'Epoch: {epoch:03d}, Loss: {loss:.6f}')
  loss.backward()
  optimizer.step()

  pred = model(data.x, train_data.cuda(), test_data)
  print(f'roc_auc_score: {roc_auc_score(test_label.numpy(), pred.cpu().detach().numpy())}, average_precision_score: {average_precision_score(test_label.numpy(), pred.cpu().detach().numpy())}')


<IPython.core.display.Javascript object>

Epoch: 000, Loss: 1.742951
roc_auc_score: 0.3638392857142857, average_precision_score: 0.32929095674152764
Epoch: 001, Loss: 1.685726
roc_auc_score: 0.3549107142857143, average_precision_score: 0.3290500289273328
Epoch: 002, Loss: 1.684870
roc_auc_score: 0.37340561224489793, average_precision_score: 0.33018638020206015
Epoch: 003, Loss: 1.685765
roc_auc_score: 0.37340561224489793, average_precision_score: 0.33513023406694475
Epoch: 004, Loss: 1.662602
roc_auc_score: 0.3549107142857143, average_precision_score: 0.32337345001103956
Epoch: 005, Loss: 1.598821
roc_auc_score: 0.3580994897959184, average_precision_score: 0.3314557693669413
Epoch: 006, Loss: 1.531949
roc_auc_score: 0.3632015306122449, average_precision_score: 0.3267222975128982
Epoch: 007, Loss: 1.463514
roc_auc_score: 0.3727678571428572, average_precision_score: 0.3335795891808132
Epoch: 008, Loss: 1.342725
roc_auc_score: 0.3651147959183674, average_precision_score: 0.3255369517434962
Epoch: 009, Loss: 1.244357
roc_auc_score