In [1]:
from recsys.models.retrieval import DeepRetriever
from recsys.external_datasets import Movielens_1M
from recsys.datasets import InteractionsDataset
from pytorch_lightning import Trainer
from recsys.models.scoring import NCF
import pandas as pd
import torch
import numpy as np
from recsys.layers import retrieve_nearest_neighbors

In [2]:
data = Movielens_1M()
ratings, users, movies = data.load()

# Data processing

In [3]:
#Preprocess users
users['gender'], uniques = pd.factorize(users['gender'])
users['occupation'], uniques = pd.factorize(users['occupation'])
users['zip'], uniques = pd.factorize(users['zip'])
##Set category dtype
users['gender'] = users.gender.astype('category')
users['occupation'] = users.occupation.astype('category')
users['zip'] = users.zip.astype('category')

#Preprocess movies, 
##categories to index
movies['title'], uniques = pd.factorize(movies['title'])
movies['genres'], uniques = pd.factorize(movies['genres'])
##Set category dtype
movies['title'] = movies.title.astype('category')
movies['genres'] = movies.genres.astype('category')

#Make all ratings an implicit interaction
ratings["rating"] = ratings["rating"].apply(lambda x: 1 if x>=3 else 0)

In [4]:
dataset = InteractionsDataset(ratings, users, movies, item_id="movie_id", interaction_id="rating", sample_negatives=3)

# Retrieval Step

In [5]:
retriever = DeepRetriever(dataset.data_schema)

In [6]:
retriever.fit(dataset=dataset, num_epochs=1)

Epoch: 0/1, Loss: 0.58: 100%|█████████████████████| 1/1 [00:15<00:00, 15.51s/it]


In [7]:
item_alias, item_representations = retriever.generate_item_representations(dataset)
user_alis, user_representations = retriever.generate_user_representations(dataset)

  items_features = torch.tensor(items_features)


In [8]:
retrieve_nearest_neighbors(candidates=item_representations, query=user_representations[1]) # WORKJS HEHEHEHE WII

tensor([1779, 2068,  249, 3349,   67, 2265, 2091,  323, 1588, 2170, 2782, 2187,
         898, 1720])

# Scoring step

In [9]:
scorer = NCF(dataset.data_schema)

In [10]:
scorer.fit(dataset=dataset, num_epochs=1)

Epoch: 0/1, Loss: 0.43: 100%|█████████████████████| 1/1 [00:13<00:00, 13.92s/it]
