In [1]:
from torchrecsys.models.scoring import ALS
from torchrecsys.external_datasets import Movielens_1M
from torchrecsys.datasets import InteractionsDataset
from pytorch_lightning import Trainer
import pandas as pd
import torch
import numpy as np
from torch.utils.data import DataLoader

In [2]:
data = Movielens_1M()
ratings, users, movies = data.load()

In [3]:
#Preprocess users
users['gender'], uniques = pd.factorize(users['gender'])
users['occupation'], uniques = pd.factorize(users['occupation'])
users['zip'], uniques = pd.factorize(users['zip'])
##Set category dtype
users['gender'] = users.gender.astype('category')
users['occupation'] = users.occupation.astype('category')
users['zip'] = users.zip.astype('category')

#Preprocess movies, 
##categories to index
movies['title'], uniques = pd.factorize(movies['title'])
movies['genres'], uniques = pd.factorize(movies['genres'])
##Set category dtype
movies['title'] = movies.title.astype('category')
movies['genres'] = movies.genres.astype('category')

In [4]:
dataset = InteractionsDataset(ratings, users, movies, item_id="movie_id", interaction_id="rating")
train = DataLoader(dataset, batch_size=512)

In [5]:
model = ALS(dataset.data_schema)

In [6]:
trainer = Trainer(max_epochs=1)
trainer.fit(model, train)

  return torch._C._cuda_getDeviceCount() > 0
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  rank_zero_warn("You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.")

  | Name           | Type       | Params
----------------------------------------------
0 | user_features  | ModuleList | 27.7 K
1 | item_features  | ModuleList | 33.5 K
2 | user_embedding | Embedding  | 386 K 
3 | item_embedding | Embedding  | 288 K 
4 | user_biases    | Embedding  | 6.0 K 
5 | item_biases    | Embedding  | 4.0 K 
6 | criterion      | MSELoss    | 0     
----------------------------------------------
746 K     Trainable params
0         Non-trainable params
746 K     Total params
2.986     Total estimated model params size (MB)
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

## Lets rank some movies for a given user.

We are now going to rank all movies for all users and keep the top 100 rated movies for each user as recommendations.

In [9]:
from tqdm.notebook import tqdm

user_id=1
all_user_ids = np.array(list(dataset.user_features.keys()))
all_item_ids = np.array(list(dataset.item_features.keys()))

results = {} #User key, top100recos value

context = torch.tensor([[] for item_id in all_item_ids])
#We score all candidates
item_features = torch.tensor(np.array([dataset.item_features[item_id] for item_id in all_item_ids]))
for user_id in tqdm(all_user_ids):
    #Prepare the data for passing it into the model
    pairs = torch.tensor([[user_id, item_id] for item_id in all_item_ids])
    user_features = torch.tensor(np.array([dataset.user_features[user_id] for item_id in all_item_ids]))
    
    r = model(pairs, context, user_features, item_features).detach().numpy()
    ordered_index = np.argsort(r)[::-1] ##reverse into descending order
    results[user_id] = all_item_ids[ordered_index[:100]]

  0%|          | 0/6040 [00:00<?, ?it/s]