In [None]:
!pip install torch torchvision torch-geometric pandas

In [None]:
import pandas as pd
import torch
import torch.nn.functional as F
from torch_geometric.data import Data, DataLoader
from torch_geometric.nn import GCNConv


In [None]:
# Download the dataset
!wget "http://files.grouplens.org/datasets/movielens/ml-100k/u.data"

# Load data with pandas
df = pd.read_csv("u.data", sep="\t", header=None, names=["user", "item", "rating", "timestamp"])

--2023-10-14 08:30:39--  http://files.grouplens.org/datasets/movielens/ml-100k/u.data
Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152
Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1979173 (1.9M)
Saving to: ‘u.data’


2023-10-14 08:30:40 (11.1 MB/s) - ‘u.data’ saved [1979173/1979173]



In [None]:
print(df.head())

   user  item  rating  timestamp
0   196   242       3  881250949
1   186   302       3  891717742
2    22   377       1  878887116
3   244    51       2  880606923
4   166   346       1  886397596


In [None]:
# Process data
users = torch.tensor(df["user"].values, dtype=torch.long) - 1  # 0-indexed
items = torch.tensor(df["item"].values, dtype=torch.long) - 1  # 0-indexed
edge_index = torch.stack([users, items + df["user"].nunique()], dim=0)  # Offset for unique item IDs

# Create PyG Data object
data = Data(edge_index=edge_index)


In [None]:
# GNN Model
class RecommenderGNN(torch.nn.Module):
    def __init__(self, num_nodes, input_dim, hidden_dim, output_dim):
        super(RecommenderGNN, self).__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, output_dim)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x

# Model, Data, and Optimizer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = RecommenderGNN(num_nodes=df["user"].nunique() + df["item"].nunique(),
                       input_dim=32, hidden_dim=64, output_dim=32).to(device)

# Update the node features initialization
data.x = torch.randn((df["user"].nunique() + df["item"].nunique(), 32)).to(device)  # Random initial features


In [None]:
# Train the GNN
model.train()
for epoch in range(30):
    optimizer.zero_grad()
    out = model(data)
    loss = F.mse_loss(out, data.x)  # Autoencoder approach
    loss.backward()
    optimizer.step()
    print(f"Epoch {epoch + 1}, Loss: {loss.item()}")

Epoch 1, Loss: 7.773842811584473
Epoch 2, Loss: 7.773842811584473
Epoch 3, Loss: 7.773842811584473
Epoch 4, Loss: 7.773842811584473
Epoch 5, Loss: 7.773842811584473
Epoch 6, Loss: 7.773842811584473
Epoch 7, Loss: 7.773842811584473
Epoch 8, Loss: 7.773842811584473
Epoch 9, Loss: 7.773842811584473
Epoch 10, Loss: 7.773842811584473
Epoch 11, Loss: 7.773842811584473
Epoch 12, Loss: 7.773842811584473
Epoch 13, Loss: 7.773842811584473
Epoch 14, Loss: 7.773842811584473
Epoch 15, Loss: 7.773842811584473
Epoch 16, Loss: 7.773842811584473
Epoch 17, Loss: 7.773842811584473
Epoch 18, Loss: 7.773842811584473
Epoch 19, Loss: 7.773842811584473
Epoch 20, Loss: 7.773842811584473
Epoch 21, Loss: 7.773842811584473
Epoch 22, Loss: 7.773842811584473
Epoch 23, Loss: 7.773842811584473
Epoch 24, Loss: 7.773842811584473
Epoch 25, Loss: 7.773842811584473
Epoch 26, Loss: 7.773842811584473
Epoch 27, Loss: 7.773842811584473
Epoch 28, Loss: 7.773842811584473
Epoch 29, Loss: 7.773842811584473
Epoch 30, Loss: 7.77384

In [None]:
# Make recommendations
model.eval()
with torch.no_grad():
    embeddings = model(data)
    user_embeddings = embeddings[:df["user"].nunique()]
    item_embeddings = embeddings[df["user"].nunique():]
    scores = torch.mm(user_embeddings, item_embeddings.t())
    _, recommendations = scores.topk(5, dim=1)
    print(recommendations)  # These are item IDs (0-indexed) recommended for each user


tensor([[ 49, 180,  99, 257,   0],
        [ 49, 257,  99, 180, 293],
        [ 49, 180,  99, 257,   0],
        ...,
        [ 49,  99, 180, 287, 293],
        [ 49, 287, 293, 257,  99],
        [ 49, 180,  99,   0, 257]])
