In [1]:
from recsys.external_datasets import Movielens_1M
from recsys.datasets import InteractionsDataset
from pytorch_lightning import Trainer
from recsys.models.scoring import NCF
import pandas as pd
import torch
import numpy as np
from recsys.layers import retrieve_nearest_neighbors

# Data processing

In [2]:
data = Movielens_1M()
ratings, users, movies = data.load()

In [3]:
#Preprocess users
users['gender'], uniques = pd.factorize(users['gender'])
users['occupation'], uniques = pd.factorize(users['occupation'])
users['zip'], uniques = pd.factorize(users['zip'])
##Set category dtype
users['gender'] = users.gender.astype('category')
users['occupation'] = users.occupation.astype('category')
users['zip'] = users.zip.astype('category')

#Preprocess movies, 
##categories to index
movies['title'], uniques = pd.factorize(movies['title'])
movies['genres'], uniques = pd.factorize(movies['genres'])
##Set category dtype
movies['title'] = movies.title.astype('category')
movies['genres'] = movies.genres.astype('category')

#Make all ratings an implicit interaction
ratings["rating"] = ratings["rating"].apply(lambda x: 1 if x>=3 else 0)

In [4]:
dataset = InteractionsDataset(ratings, users, movies, item_id="movie_id", interaction_id="rating", sample_negatives=3)

# Scoring step

### Train

In [9]:
scorer = NCF(dataset.data_schema)

In [10]:
scorer.fit(dataset=dataset, num_epochs=1)

Epoch: 0/1, Loss: 0.41: 100%|█████████████████████| 1/1 [00:22<00:00, 22.02s/it]


In [11]:
users = user_alias[:10]
items = retrieved_items[:10]

users_features = dataset.get_user_features(users.tolist())
items_features = dataset.get_item_features(items.tolist())

pairs = torch.stack([users, items]).T

scorer.score(pairs, users_features, items_features)

tensor([-0.4980,  0.5163, -0.4679,  0.0602, -4.1169, -0.9351,  0.3336, -0.0893,
        -0.1404,  0.6188], grad_fn=<SqueezeBackward0>)

### Evaluate

In [16]:
r = scorer.batch_score(users, items, users_features, items_features)

### Metrics