In [1]:
from recsys.external_datasets import Movielens_1M
from recsys.datasets import InteractionsDataset
from pytorch_lightning import Trainer
from recsys.models.scoring import NCF
import pandas as pd
import torch
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


# Data processing

In [2]:
data = Movielens_1M()
ratings, users, movies = data.load()

In [3]:
#Preprocess users
users['gender'], uniques = pd.factorize(users['gender'])
users['occupation'], uniques = pd.factorize(users['occupation'])
users['zip'], uniques = pd.factorize(users['zip'])
##Set category dtype
users['user_id'] = users.user_id.astype('category')
users['gender'] = users.gender.astype('category')
users['occupation'] = users.occupation.astype('category')
users['zip'] = users.zip.astype('category')

#Preprocess movies, 
##categories to index
movies['title'], uniques = pd.factorize(movies['title'])
movies['genres'], uniques = pd.factorize(movies['genres'])
##Set category dtype
movies['movie_id'] = movies.movie_id.astype('category')
movies['title'] = movies.title.astype('category')
movies['genres'] = movies.genres.astype('category')

#Make all ratings an implicit interaction
ratings["rating"] = ratings["rating"].apply(lambda x: 1 if x>=3 else 0)

In [4]:
dataset = InteractionsDataset(ratings, users, movies, item_id="movie_id", interaction_id="rating", sample_negatives=3)

# Scoring step

### Train

In [5]:
scorer = NCF(dataset.data_schema)

In [6]:
scorer.fit(dataset=dataset, num_epochs=1)

Epoch: 0/1, Loss: 0.45: 100%|█████████████████████| 1/1 [00:24<00:00, 24.78s/it]


In [7]:
test_users =  torch.tensor(users.head().values)
test_items = torch.tensor(movies.head().values)

test_users = scorer.encode_user(test_users)
test_items = scorer.encode_item(test_items)

In [8]:
# Score individual combinations of vectors
scorer.score(test_users, test_items)

tensor([ 1.4168, -0.4882, -1.3726, -1.5232, -1.0343],
       grad_fn=<SqueezeBackward0>)

In [9]:
# Score all item vectors for all users
scorer.batch_score(test_users, test_items)

tensor([[ 1.4168, -0.4253, -0.9619, -1.5268, -0.7329],
        [ 1.0060, -0.4882, -1.1246, -1.3082, -1.0096],
        [ 0.8785, -0.5913, -1.3726, -1.5357, -1.0929],
        [ 0.9753, -0.5774, -1.3251, -1.5232, -1.0801],
        [ 0.8261, -0.6391, -1.2776, -1.4907, -1.0343]],
       grad_fn=<StackBackward0>)

### Metrics