<a href="https://colab.research.google.com/github/sidhu2690/Geometric-DL/blob/main/01_Shallow_Encoding.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [29]:
import random

def generate_training_pairs(edges, num_nodes, num_negatives=2):
    pos_pairs = edges
    edge_set = set(edges)

    all_pairs = [(u, v) for u in range(num_nodes) for v in range(num_nodes) if u != v]
    possible_negatives = [pair for pair in all_pairs if pair not in edge_set]
    neg_pairs = random.choices(possible_negatives, k= num_negatives)

    return pos_pairs, neg_pairs


edges = [(0, 1), (1, 2), (2, 3)]
pos, neg = generate_training_pairs(edges, num_nodes=5, num_negatives=2)

print(f"Positive pairs:{pos}| Negative pairs: {neg}")

Positive pairs:[(0, 1), (1, 2), (2, 3)]| Negative pairs: [(4, 0), (3, 4)]


**Only a few negatives are sampled (k=num_negatives) to reduce computation and enable efficient stochastic training instead of using all possible negative pairs.**


In [67]:
import numpy as np


# Shallow embedding model
class NodeEmbeddingModel:
    def __init__(self, num_nodes, embedding_dim=4, lr=0.1):
        self.num_nodes = num_nodes
        self.embedding_dim = embedding_dim
        self.lr = lr
        self.embeddings = np.random.randn(num_nodes, embedding_dim)             # Initialize embeddings randomly

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def forward(self, u, v):
        return np.dot(self.embeddings[u], self.embeddings[v])

    def loss_and_grad(self, pairs, label):
        losses = []
        grads = np.zeros_like(self.embeddings)

        for u, v in pairs:
            score = self.forward(u, v)
            pred = self.sigmoid(score)

            # BCE loss: -[y log p + (1-y) log (1-p)]
            loss = -(label * np.log(pred + 1e-10) + (1 - label) * np.log(1 - pred + 1e-10))
            losses.append(loss)

            error = (pred - label)
            grads[u] += error * self.embeddings[v]
            grads[v] += error * self.embeddings[u]

        return np.mean(losses), grads / len(pairs)

    def train_step(self, pos_pairs, neg_pairs):
        pos_loss, pos_grads = self.loss_and_grad(pos_pairs, 1)
        neg_loss, neg_grads = self.loss_and_grad(neg_pairs, 0)

        total_loss = pos_loss + neg_loss
        grads = pos_grads + neg_grads

        # SGD
        self.embeddings -= self.lr * grads
        return total_loss


In [85]:
edges = [(0, 1), (1, 2), (2, 3)]
num_nodes = 5
embedding_dim = 3
model = NodeEmbeddingModel(num_nodes, embedding_dim, lr=0.2)

for epoch in range(500):
    pos_pairs, neg_pairs = generate_training_pairs(edges, num_nodes, num_negatives=4)
    loss = model.train_step(pos_pairs, neg_pairs)
    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss = {loss:.4f}")

Epoch 0, Loss = 1.2746
Epoch 100, Loss = 0.9397
Epoch 200, Loss = 0.8365
Epoch 300, Loss = 0.2181
Epoch 400, Loss = 0.1983


In [86]:
print(f"Final embeddings: {model.embeddings}")

Final embeddings: [[-1.73491092 -0.97579543  2.42168662]
 [-1.15953029 -1.51811553 -0.87107437]
 [ 1.05168399 -1.06459263 -1.30776315]
 [ 3.0695279  -0.11112977  0.56342709]
 [-0.99380832  3.57506184 -0.89103276]]
