In [1]:
from recsys.models.retrieval import DeepRetriever
from recsys.external_datasets import Movielens_1M
from recsys.datasets import InteractionsDataset
from pytorch_lightning import Trainer
from recsys.models.scoring import NCF
import pandas as pd
import torch
import numpy as np
from recsys.layers import retrieve_nearest_neighbors

# Data processing

In [2]:
data = Movielens_1M()
ratings, users, movies = data.load()

In [3]:
#Preprocess users
users['gender'], uniques = pd.factorize(users['gender'])
users['occupation'], uniques = pd.factorize(users['occupation'])
users['zip'], uniques = pd.factorize(users['zip'])
##Set category dtype
users['gender'] = users.gender.astype('category')
users['occupation'] = users.occupation.astype('category')
users['zip'] = users.zip.astype('category')

#Preprocess movies, 
##categories to index
movies['title'], uniques = pd.factorize(movies['title'])
movies['genres'], uniques = pd.factorize(movies['genres'])
##Set category dtype
movies['title'] = movies.title.astype('category')
movies['genres'] = movies.genres.astype('category')

#Make all ratings an implicit interaction
ratings["rating"] = ratings["rating"].apply(lambda x: 1 if x>=3 else 0)

In [4]:
dataset = InteractionsDataset(ratings, users, movies, item_id="movie_id", interaction_id="rating", sample_negatives=3)

# Retrieval Step

In [5]:
retriever = DeepRetriever(dataset.data_schema)

In [6]:
retriever.fit(dataset=dataset, num_epochs=1)

Epoch: 0/1, Loss: 0.49: 100%|█████████████████████| 1/1 [00:26<00:00, 26.30s/it]


In [7]:
item_alias, item_representations = retriever.generate_item_representations(dataset)
user_alias, user_representations = retriever.generate_user_representations(dataset)

  items_features = torch.tensor(items_features)


In [8]:
nearest_neighbors_idx = retrieve_nearest_neighbors(candidates=item_representations, query=user_representations[0]).tolist()
retrieved_items = item_alias[nearest_neighbors_idx]
retrieved_items

tensor([ 583, 3435, 2289,  914, 2248, 3364,  922,  904, 3403, 2405,  903, 3670,
        1393,  587])

# Scoring step

### Train

In [9]:
scorer = NCF(dataset.data_schema)

In [10]:
scorer.fit(dataset=dataset, num_epochs=1)

Epoch: 0/1, Loss: 0.44: 100%|█████████████████████| 1/1 [00:21<00:00, 21.08s/it]


In [14]:
users = user_alias[:10]
items = retrieved_items[:10]

users_features = dataset.get_user_features(users.tolist())
items_features = dataset.get_item_features(items.tolist())

pairs = torch.stack([users, items]).T

scorer.score(pairs, users_features, items_features)

tensor([[-2.5361],
        [-0.6979],
        [-0.1836],
        [ 0.2111],
        [ 0.0242],
        [-0.9390],
        [-0.7528],
        [-0.2695],
        [-3.0460],
        [-0.2613]], grad_fn=<AddmmBackward0>)

### Evaluate

In [17]:
scorer.batch_score(users, items, users_features, items_features)

tensor([[[-2.5361],
         [-0.7800],
         [-0.1710],
         [-0.4011],
         [-0.4500],
         [-1.6668],
         [-0.7591],
         [-0.6417],
         [-3.3509],
         [-1.1106]],

        [[-1.8759],
         [-0.6979],
         [ 0.4413],
         [ 0.2015],
         [ 0.5549],
         [-1.1568],
         [-0.5454],
         [ 0.0826],
         [-3.3898],
         [-0.4027]],

        [[-2.2840],
         [-1.0458],
         [-0.1836],
         [-0.2404],
         [ 0.0740],
         [-1.5316],
         [-0.7132],
         [-0.8216],
         [-3.5295],
         [-0.6286]],

        [[-1.9430],
         [-0.3315],
         [ 0.2041],
         [ 0.2111],
         [ 0.7172],
         [-1.0178],
         [-0.3352],
         [-0.2772],
         [-3.0965],
         [-0.2929]],

        [[-1.8561],
         [-1.0895],
         [-0.6017],
         [-0.1620],
         [ 0.0242],
         [-1.2198],
         [-0.7627],
         [-0.6032],
         [-3.1316],
         [-0