__Imports__

In [10]:
import torch
import torch_geometric

from torch.utils.data import random_split
from torch import Generator
from torch_geometric.nn.conv.gcn_conv import gcn_norm
from model import LightGCN
from representations import build_interaction_matrix_from_edges, convert_to_adj_matrix, convert_to_dense_adj_matrix, extract_interaction_matrix
from preprocessing import dataset, init_interaction_edges

* There are 610 users and 9724 movies in this dataset. 
* On average, users give a movie a 3.5/5 rating.

In [2]:
dataset.describe()

Unnamed: 0,userId,movieId,rating,timestamp
count,100836.0,100836.0,100836.0,100836.0
mean,325.127564,3101.735561,3.501557,1205946000.0
std,182.618491,2627.050983,1.042529,216261000.0
min,0.0,0.0,0.5,828124600.0
25%,176.0,900.0,3.0,1019124000.0
50%,324.0,2252.0,3.5,1186087000.0
75%,476.0,5095.25,4.0,1435994000.0
max,609.0,9723.0,5.0,1537799000.0


__Create the graph__

In [3]:
locations, values = init_interaction_edges(dataset, "userId", "movieId", "rating", 1)

print(locations, locations.size())
print(values, values.size())

tensor([[   0,    0,    0,  ...,  609,  609,  609],
        [   0,    2,    5,  ..., 9444, 9445, 9485]]) torch.Size([2, 99466])
tensor([4., 4., 4.,  ..., 5., 5., 3.], dtype=torch.float64) torch.Size([99466])


In [4]:
num_interactions = values.shape[0]
num_users = 610
num_movies = 9724

print(f"edges: {num_interactions}, nodes: {num_movies+num_users}")

edges: 99466, nodes: 10334


__Split into test, train, validation sets__

In [5]:
indices = list(range(num_interactions))

generator = Generator().manual_seed(42)
train_set, test_set, valid_set = random_split(indices, [0.8, 0.1, 0.1], generator=generator)

print(f"train: {len(train_set)} interactions")
print(f"test: {len(test_set)} interactions")
print(f"validation: {len(valid_set)} interactions")

train: 79573 interactions
test: 9947 interactions
validation: 9946 interactions


In [6]:
train_indices = locations[:, train_set]
train_values = values[train_set]

test_indices = locations[:, test_set]
test_values = values[test_set]

valid_indices = locations[:, valid_set]
valid_values = values[valid_set]

print(train_indices, train_indices.size())
print(train_values, train_values.size())

tensor([[ 473,  186,  595,  ...,  306,  274,  218],
        [3377, 6019, 7022,  ..., 3283, 2145, 1145]]) torch.Size([2, 79573])
tensor([2.5000, 4.0000, 3.5000,  ..., 4.0000, 4.0000, 2.5000],
       dtype=torch.float64) torch.Size([79573])


In [7]:
train_ind, train_val = convert_to_adj_matrix(train_indices, num_users, num_movies, train_values)
test_ind, test_val = convert_to_adj_matrix(test_indices, num_users, num_movies, test_values)
validation_ind, validation_val = convert_to_adj_matrix(valid_indices, num_users, num_movies, valid_values)

print(train_ind.size())
print(test_ind.size())
print(validation_ind.size())

torch.Size([2, 159146])
torch.Size([2, 19894])
torch.Size([2, 19892])


__Train the model__

In [13]:
# users = max(test_ind[0].unique())+1
# movies = max(test_ind[1].unique())+1

# U = build_interaction_matrix_from_edges(test_ind, users, movies, test_val, use_value=False)
# I = build_interaction_matrix_from_edges(test_ind, movies, users, test_val, use_value=False)
# D_u = torch.diag(torch.sum(U, dim=1))
# D_i = torch.diag(torch.sum(I, dim=1))

# Di_inv_sqrt = torch.pow(D_i, -0.5)
# Di_inv_sqrt = torch.where(Di_inv_sqrt == float('inf'), 0, Di_inv_sqrt)

# Du_inv_sqrt = torch.pow(D_u, -0.5)
# Du_inv_sqrt = torch.where(Du_inv_sqrt == float('inf'), 0, Du_inv_sqrt)

# A_tilde = torch.mm(Di_inv_sqrt, I)
# A_tilde = torch.mm(A_tilde, Du_inv_sqrt)

print(gcn_norm(locations, add_self_loops=False))

(tensor([[   0,    0,    0,  ...,  609,  609,  609],
        [   0,    2,    5,  ..., 9444, 9445, 9485]]), tensor([0.0047, 0.0096, 0.0068,  ..., 0.0592, 0.0459, 0.1325]))
