In [None]:
%pip install wandb -Uqq

In [None]:
%pip install nmslib -Uqq

[K     |████████████████████████████████| 13.5 MB 5.1 MB/s 
[K     |████████████████████████████████| 188 kB 47.6 MB/s 
[?25h

In [None]:
%pip install LibRecommender -Uqq

[K     |████████████████████████████████| 2.0 MB 5.2 MB/s 
[K     |████████████████████████████████| 24.1 MB 1.6 MB/s 
[?25h

In [None]:
%pip install faiss==1.5.3 -Uqq

[K     |████████████████████████████████| 4.7 MB 4.9 MB/s 
[?25h

In [None]:
%pip install ujson redis -Uqq

[K     |████████████████████████████████| 45 kB 2.2 MB/s 
[K     |████████████████████████████████| 246 kB 8.7 MB/s 
[?25h

In [None]:
import wandb
wandb.login()

In [None]:
import os
from pathlib import Path

import numpy as np
import pandas as pd

from libreco.data import random_split, split_by_ratio_chrono, DatasetPure
from libreco.algorithms import Item2Vec   # pure data, algorithm SVD++
from libreco.evaluation import evaluate

import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

In [None]:
from libserving.serialization import save_embed

In [None]:
MODEL_PATH = "/content/drive/MyDrive/Colab Notebooks/00-MTC/models/Item2Vec" 
if not os.path.isdir(MODEL_PATH):
    os.mkdir(MODEL_PATH)  
MODEL_NAME = 'Item2Vec'

In [None]:
data = pd.read_csv("https://s3-us-west-2.amazonaws.com/recommender-tutorial/ratings.csv")
data.columns = ["user", "item", "label", "time"]
data["label"] = 1 

train_data, eval_data, test_data = split_by_ratio_chrono(data, multi_ratios=[0.8, 0.1, 0.1])

train_data, data_info = DatasetPure.build_trainset(train_data)
eval_data = DatasetPure.build_evalset(eval_data)
test_data = DatasetPure.build_testset(test_data)
train_data.build_negative_samples(data_info)  # sample negative items for each record
eval_data.build_negative_samples(data_info) 
test_data.build_negative_samples(data_info)
print(data_info)   # n_users: 5894, n_items: 3253, data sparsity: 0.4172 %

random neg item sampling elapsed: 0.095s
random neg item sampling elapsed: 0.011s
random neg item sampling elapsed: 0.010s
n_users: 610, n_items: 8237, data sparsity: 1.6056 %


In [None]:
result = max(len(seq) for seq in data_info.user_consumed.values()) + 5
result

2163

In [None]:
from types import SimpleNamespace  

cfg = {
  "embed_size": 16,
  "n_epochs": 3,
  "norm_embed": False,
  "window_size": None,
  "k":10,
}
cfg = SimpleNamespace(**cfg)

metrics = ["loss", "roc_auc", "precision", "recall", "map", "ndcg"]    
# Rest of code.
model = Item2Vec(task="ranking", data_info=data_info, 
                embed_size=cfg.embed_size, n_epochs=cfg.n_epochs, 
                norm_embed=cfg.norm_embed, window_size=cfg.window_size)
model.fit(train_data, verbose=2, eval_data=eval_data, metrics=metrics)
evaluate(model=model, data=test_data, metrics=metrics)
# specify model saving directory
name_str = f'{MODEL_NAME}' + \
  f'_n_epoch_{cfg.n_epochs}_' + \
  f'emb_size_{cfg.embed_size}_norm_embed_{cfg.norm_embed}_' + \
  f'window_size_{cfg.window_size}'
current_dir = os.path.join(MODEL_PATH, name_str)
if not os.path.isdir(current_dir):
    os.mkdir(current_dir) 
save_embed(current_dir, model)        
model.save(path=current_dir, model_name="Item2Vec", manual=True, inference_only=True)

[31mwindow size: 2163, using too large window size may slow down training.[0m
Training start time: [35m2022-11-06 10:54:16[0m


Item2vec iter0: 100%|██████████| 610/610 [00:00<00:00, 19501.40it/s]
Item2vec iter1: 100%|██████████| 610/610 [00:04<00:00, 132.13it/s]
Item2vec iter2: 100%|██████████| 610/610 [00:04<00:00, 134.63it/s]
Item2vec iter3: 100%|██████████| 610/610 [00:04<00:00, 127.38it/s]


gensim word2vec training elapsed: 79.657s


eval_pred: 100%|██████████| 3/3 [00:00<00:00, 315.96it/s]
eval_rec: 100%|██████████| 610/610 [00:00<00:00, 1741.43it/s]


	 eval log_loss: 1.1418
	 eval roc_auc: 0.4442
	 eval precision@10: 0.0005
	 eval recall@10: 0.0001
	 eval map@10: 0.0035
	 eval ndcg@10: 0.0038


eval_pred: 100%|██████████| 3/3 [00:00<00:00, 380.37it/s]
eval_rec: 100%|██████████| 608/608 [00:00<00:00, 1996.16it/s]


https://wandb.ai/iloncka-ds/recsys-app/sweeps/tw52paxu?workspace=user-iloncka-ds