<a href="https://colab.research.google.com/github/pushkar243/AWS_Config/blob/master/GNN_assignment2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install torch torch-geometric ogb scikit-learn


Collecting torch-geometric
  Downloading torch_geometric-2.5.3-py3-none-any.whl.metadata (64 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.2/64.2 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ogb
  Downloading ogb-1.3.6-py3-none-any.whl.metadata (6.2 kB)
Collecting outdated>=0.2.0 (from ogb)
  Downloading outdated-0.2.2-py2.py3-none-any.whl.metadata (4.7 kB)
Collecting littleutils (from outdated>=0.2.0->ogb)
  Downloading littleutils-0.2.4-py3-none-any.whl.metadata (679 bytes)
Downloading torch_geometric-2.5.3-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ogb-1.3.6-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.8/78.8 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading outdated-0.2.2-py2.py3-none-any.whl (7.5 kB)
Downloading littleutils-0.2.4-py3-none-any.whl (8.1 kB)
Installin

In [2]:
#Import required libraries
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.utils import negative_sampling
from sklearn.metrics import roc_auc_score
import numpy as np
from ogb.linkproppred import LinkPropPredDataset, Evaluator


In [3]:
##Define GCN Model
class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x


In [4]:
#Step 4: Load the Dataset
dataset = LinkPropPredDataset(name="ogbl-collab")
data = dataset[0]


Downloading http://snap.stanford.edu/ogb/data/linkproppred/collab.zip


Downloaded 0.11 GB: 100%|██████████| 117/117 [00:06<00:00, 17.67it/s]


Extracting dataset/collab.zip
Loading necessary files...
This might take a while.
Processing graphs...


100%|██████████| 1/1 [00:00<00:00, 29.37it/s]

Saving...





In [8]:
#Step 5: Move Data to the Device
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Convert numpy arrays to torch tensors and move them to the device
edge_index = torch.tensor(data['edge_index'], dtype=torch.long).to(device)
node_feat = torch.tensor(data['node_feat'], dtype=torch.float).to(device)  # Node features



In [9]:
#Step6: Initialize the model and optimizer
model = GCN(in_channels=node_feat.size(1), hidden_channels=64, out_channels=64).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)


In [12]:
#Generate Negative Samples
from torch_geometric.utils import negative_sampling

# Sample negative edges
num_nodes = node_feat.size(0)
neg_edge_index = negative_sampling(edge_index, num_nodes=num_nodes)


In [13]:
#Create Labels
import torch

# Create labels for positive and negative samples
num_pos_edges = edge_index.size(1)
num_neg_edges = neg_edge_index.size(1)

# True edges labeled as 1
pos_labels = torch.ones(num_pos_edges)

# False edges labeled as 0
neg_labels = torch.zeros(num_neg_edges)

# Concatenate both positive and negative labels
labels = torch.cat([pos_labels, neg_labels], dim=0).to(device)


In [14]:
#compute score and loss
# Forward pass
out = model(node_feat, edge_index)

# Get node embeddings for positive and negative edges
pos_out = out[edge_index[0]]  # Source node embeddings for positive edges
pos_out = torch.sum(pos_out * out[edge_index[1]], dim=1)  # Dot product for edge scores

neg_out = out[neg_edge_index[0]]  # Source node embeddings for negative edges
neg_out = torch.sum(neg_out * out[neg_edge_index[1]], dim=1)  # Dot product for edge scores

# Concatenate positive and negative edge scores
edge_scores = torch.cat([pos_out, neg_out], dim=0)

# Compute loss
criterion = torch.nn.BCEWithLogitsLoss()
loss = criterion(edge_scores, labels)


In [16]:
#Complete Training Loop
import torch
from torch_geometric.nn import GCNConv
from torch_geometric.utils import negative_sampling
from sklearn.metrics import roc_auc_score

class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = torch.relu(x)
        x = self.conv2(x, edge_index)
        return x

# Prepare data
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
edge_index = torch.tensor(data['edge_index'], dtype=torch.long).to(device)
node_feat = torch.tensor(data['node_feat'], dtype=torch.float).to(device)

# Generate negative samples
num_nodes = node_feat.size(0)
neg_edge_index = negative_sampling(edge_index, num_nodes=num_nodes)

# Create labels
num_pos_edges = edge_index.size(1)
num_neg_edges = neg_edge_index.size(1)
pos_labels = torch.ones(num_pos_edges).to(device)
neg_labels = torch.zeros(num_neg_edges).to(device)
labels = torch.cat([pos_labels, neg_labels], dim=0).to(device)

# Initialize model, optimizer, and loss function
model = GCN(in_channels=node_feat.size(1), hidden_channels=64, out_channels=64).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.BCEWithLogitsLoss()

# Training loop
num_epochs = 100
for epoch in range(num_epochs):
    model.train()

    # Forward pass
    out = model(node_feat, edge_index)

    # Get node embeddings for positive and negative edges
    pos_out = out[edge_index[0]]
    pos_out = torch.sum(pos_out * out[edge_index[1]], dim=1)

    neg_out = out[neg_edge_index[0]]
    neg_out = torch.sum(neg_out * out[neg_edge_index[1]], dim=1)

    # Concatenate positive and negative edge scores
    edge_scores = torch.cat([pos_out, neg_out], dim=0)

    # Compute loss
    loss = criterion(edge_scores, labels)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print loss every few epochs
    if epoch % 2 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item()}')

# Evaluation (optional)
model.eval()
with torch.no_grad():
    out = model(node_feat, edge_index)

    # Get embeddings for positive and negative edges
    pos_out = out[edge_index[0]]
    pos_out = torch.sum(pos_out * out[edge_index[1]], dim=1)

    neg_out = out[neg_edge_index[0]]
    neg_out = torch.sum(neg_out * out[neg_edge_index[1]], dim=1)

    # Compute metrics
    edge_scores = torch.cat([pos_out, neg_out], dim=0)
    predictions = torch.sigmoid(edge_scores).cpu().numpy()
    auc = roc_auc_score(labels.cpu().numpy(), predictions)
    print(f'AUC Score: {auc}')


Epoch 0, Loss: 0.7771801948547363
Epoch 2, Loss: 0.7187182903289795
Epoch 4, Loss: 0.6417094469070435
Epoch 6, Loss: 0.6535205245018005
Epoch 8, Loss: 0.636506199836731
Epoch 10, Loss: 0.6362978219985962
Epoch 12, Loss: 0.6177603602409363
Epoch 14, Loss: 0.601878821849823
Epoch 16, Loss: 0.5889580249786377
Epoch 18, Loss: 0.5724357962608337
Epoch 20, Loss: 0.5567289590835571
Epoch 22, Loss: 0.5433588027954102
Epoch 24, Loss: 0.5348495244979858
Epoch 26, Loss: 0.5260825157165527
Epoch 28, Loss: 0.5170064568519592
Epoch 30, Loss: 0.5099192261695862
Epoch 32, Loss: 0.5044182538986206
Epoch 34, Loss: 0.5002217888832092
Epoch 36, Loss: 0.49593910574913025
Epoch 38, Loss: 0.4918031096458435
Epoch 40, Loss: 0.4874316453933716
Epoch 42, Loss: 0.483011931180954
Epoch 44, Loss: 0.47899842262268066
Epoch 46, Loss: 0.4751671254634857
Epoch 48, Loss: 0.4719536304473877
Epoch 50, Loss: 0.46967098116874695
Epoch 52, Loss: 0.46855732798576355
Epoch 54, Loss: 0.4682421386241913
Epoch 56, Loss: 0.468051

In [18]:
# Assuming split_edge is already defined
val_edges = torch.tensor(split_edge['valid']['edge'], dtype=torch.long).to(device)
val_neg_edges = torch.tensor(split_edge['valid']['edge_neg'], dtype=torch.long).to(device)

# Forward pass with validation edges
model.eval()
with torch.no_grad():
    out = model(node_feat, edge_index)

    # Get embeddings for positive and negative validation edges
    pos_val_out = out[val_edges[:, 0]] * out[val_edges[:, 1]]
    pos_val_out = torch.sum(pos_val_out, dim=1)

    neg_val_out = out[val_neg_edges[:, 0]] * out[val_neg_edges[:, 1]]
    neg_val_out = torch.sum(neg_val_out, dim=1)

    # Combine and calculate the AUC
    val_scores = torch.cat([pos_val_out, neg_val_out], dim=0)
    val_labels = torch.cat([torch.ones(pos_val_out.size(0)), torch.zeros(neg_val_out.size(0))], dim=0).to(device)

    val_predictions = torch.sigmoid(val_scores).cpu().numpy()
    auc = roc_auc_score(val_labels.cpu().numpy(), val_predictions)
    print(f'Validation AUC: {auc}')


Validation AUC: 0.9236754544471073


In [19]:
# Assuming split_edge is already defined
val_edges = torch.tensor(split_edge['valid']['edge'], dtype=torch.long).to(device)
val_neg_edges = torch.tensor(split_edge['valid']['edge_neg'], dtype=torch.long).to(device)

# Forward pass with validation edges
model.eval()
# Use the validation or test set for evaluation
split_edge = dataset.get_edge_split()
val_edges = split_edge['valid']['edge'].to(device)
val_neg_edges = split_edge['valid']['edge_neg'].to(device)

with torch.no_grad():
    # Predict the scores for positive and negative edges
    pos_scores = torch.sigmoid(model(node_feat, val_edges.t()))
    neg_scores = torch.sigmoid(model(node_feat, val_neg_edges.t()))

    # Compute the Hits@K metric using the Evaluator class
    evaluator = Evaluator(name='ogbl-collab')
    results = evaluator.eval({
        'y_pred_pos': pos_scores,
        'y_pred_neg': neg_scores,
    })

    print(f"Hits@50: {results['hits@50']:.4f}")



  train = torch.load(osp.join(path, 'train.pt'))
  valid = torch.load(osp.join(path, 'valid.pt'))
  test = torch.load(osp.join(path, 'test.pt'))


AttributeError: 'numpy.ndarray' object has no attribute 'to'