__Imports__

In [1]:
import torch
import torch_geometric

from tqdm import tqdm
from torch.utils.data import random_split
from torch import Generator
from torch import optim
from model import LightGCN, train_model
# from model2 import LightGCN
from representations import convert_to_adj_matrix, convert_to_dense_adj_matrix, extract_interaction_matrix
from preprocessing import dataset, init_interaction_edges

* There are 610 users and 9724 movies in this dataset. 
* On average, users give a movie a 3.5/5 rating.

In [2]:
dataset.describe()

Unnamed: 0,userId,movieId,rating,timestamp
count,100836.0,100836.0,100836.0,100836.0
mean,325.127564,3101.735561,3.501557,1205946000.0
std,182.618491,2627.050983,1.042529,216261000.0
min,0.0,0.0,0.5,828124600.0
25%,176.0,900.0,3.0,1019124000.0
50%,324.0,2252.0,3.5,1186087000.0
75%,476.0,5095.25,4.0,1435994000.0
max,609.0,9723.0,5.0,1537799000.0


In [3]:
threshold = 3.5
dataset = dataset.where(dataset["rating"] >= threshold)
num_users = len(dataset["userId"].unique())
num_movies = len(dataset["movieId"].unique())

print(f"users: {num_users}, movies: {num_movies}")

users: 610, movies: 7364


__Create the graph__

In [4]:
locations, values = init_interaction_edges(dataset, "userId", "movieId", "rating", threshold)

print(locations, locations.size())
print(values, values.size())
print(values.min())

tensor([[   0,    0,    0,  ...,  609,  609,  609],
        [   0,    2,    5,  ..., 9443, 9444, 9445]]) torch.Size([2, 61716])
tensor([4., 4., 4.,  ..., 5., 5., 5.], dtype=torch.float64) torch.Size([61716])
tensor(3.5000, dtype=torch.float64)


In [5]:
num_interactions = values.shape[0]

print(f"edges: {num_interactions}, nodes: {num_movies+num_users}")

edges: 61716, nodes: 7974


__Split into test and train sets__

In [6]:
indices = list(range(num_interactions))

generator = Generator().manual_seed(42)
train_set_split, test_set_split = random_split(indices, [0.8, 0.2], generator=generator)

print(f"train: {len(train_set_split)} interactions")
print(f"test: {len(test_set_split)} interactions")
print((num_interactions) == (len(train_set_split) + len(test_set_split)))

train: 49373 interactions
test: 12343 interactions
True


In [7]:
train_indices = locations[:, train_set_split]
train_values = values[train_set_split]

test_indices = locations[:, test_set_split]
test_values = values[test_set_split]

print(test_indices, test_indices.size())
print(test_values, test_values.size())

tensor([[  94,   17,  248,  ...,  609,   73,  314],
        [1497, 7243,  461,  ..., 7233, 2789,  974]]) torch.Size([2, 12343])
tensor([4.0000, 4.5000, 4.5000,  ..., 4.5000, 4.5000, 4.0000],
       dtype=torch.float64) torch.Size([12343])


In [8]:
# 610, 9724 are the original counts

train_set = convert_to_adj_matrix(train_indices, 610, 9724, train_values)
test_set = convert_to_adj_matrix(test_indices, 610, 9724, test_values)

print(train_set)
print(test_set)

(tensor([[    0,     0,     0,  ..., 10330, 10332, 10333],
        [  610,   612,   615,  ...,   183,   183,   330]]), tensor([3.5000, 3.5000, 3.5000,  ..., 4.5000, 4.5000, 4.0000]))
(tensor([[    0,     0,     0,  ..., 10323, 10326, 10331],
        [  699,   935,   994,  ...,   183,   183,   183]]), tensor([4., 4., 4.,  ..., 4., 4., 4.]))


__Train the model__

In [9]:
ITERATIONS = 10000
EPOCHS = 10
BATCH_SIZE = 1024
LR = 1e-3
ITERS_PER_EVAL = 200
ITERS_PER_LR_DECAY = 200
K = 10
LAMBDA = 1e-6

In [10]:
model = LightGCN(num_users, num_movies)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using {device}.")

model = model.to(device)
model.train()

optimizer = optim.Adam(model.parameters(), lr=LR, weight_decay=0.01)
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95)

locations = locations.to(device)
train_set_ind = train_set[0]
train_set_ind = train_set_ind.to(device)
train_set_sparse = torch.sparse_coo_tensor(indices=train_set_ind, values=train_set[1], size=(10334, 10334))
type(train_set_sparse)
# train_sparse = torch.sparse_coo_tensor(TRAIN_IND, TRAIN_VAL, size=(num_users+num_movies, num_users+num_movies))
# print(train_sparse)

Using cpu.


tensor(indices=tensor([[    0,     0,     0,  ..., 10330, 10332, 10333],
                       [  610,   612,   615,  ...,   183,   183,   330]]),
       values=tensor([3.5000, 3.5000, 3.5000,  ..., 4.5000, 4.5000, 4.0000]),
       size=(10334, 10334), nnz=98746, layout=torch.sparse_coo)

In [11]:
# train_edge_im, train_val_im = extract_interaction_matrix(TRAIN_IND, TRAIN_VAL, num_users, num_movies)
# validation_edge_im, validation_val_im = extract_interaction_matrix(VALIDATION_IND, VALIDATION_VAL, num_users, num_movies)
# test_edge_im, test_val_im = extract_interaction_matrix(TEST_IND, TEST_VAL, num_users, num_movies)

train_model(model, device, optimizer, scheduler, train_set_ind, train_set_sparse)

  0%|          | 0/10000 [00:00<?, ?it/s]

  0%|          | 0/10000 [00:00<?, ?it/s]


ValueError: `MessagePassing.propagate` only supports integer tensors of shape `[2, num_messages]`, `torch_sparse.SparseTensor` or `torch.sparse.Tensor` for argument `edge_index`.

In [None]:
test = torch.sparse_coo_tensor([[1,2], [3,4]])
type(test)