In [None]:
%pip install wandb -Uqq

In [None]:
%pip install nmslib -Uqq

[K     |████████████████████████████████| 13.5 MB 5.1 MB/s 
[K     |████████████████████████████████| 188 kB 47.6 MB/s 
[?25h

In [None]:
%pip install LibRecommender -Uqq

[K     |████████████████████████████████| 2.0 MB 5.2 MB/s 
[K     |████████████████████████████████| 24.1 MB 1.6 MB/s 
[?25h

In [None]:
%pip install faiss==1.5.3 -Uqq

[K     |████████████████████████████████| 4.7 MB 4.9 MB/s 
[?25h

In [None]:
%pip install ujson redis -Uqq

[K     |████████████████████████████████| 45 kB 2.2 MB/s 
[K     |████████████████████████████████| 246 kB 8.7 MB/s 
[?25h

In [None]:
import wandb
wandb.login()

In [None]:
import os
from pathlib import Path

import numpy as np
import pandas as pd

from libreco.data import random_split, split_by_ratio_chrono, DatasetPure
from libreco.algorithms import Caser   # pure data
from libreco.evaluation import evaluate

import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

In [None]:
from libserving.serialization import save_embed

In [None]:
MODEL_PATH = "/content/drive/MyDrive/Colab Notebooks/00-MTC/models/Caser" 
if not os.path.isdir(MODEL_PATH):
    os.mkdir(MODEL_PATH)  
MODEL_NAME = 'Caser'

In [None]:
data = pd.read_csv("https://s3-us-west-2.amazonaws.com/recommender-tutorial/ratings.csv")
data.columns = ["user", "item", "label", "time"]
data["label"] = 1 

train_data, eval_data, test_data = split_by_ratio_chrono(data, multi_ratios=[0.8, 0.1, 0.1])

train_data, data_info = DatasetPure.build_trainset(train_data)
eval_data = DatasetPure.build_evalset(eval_data)
test_data = DatasetPure.build_testset(test_data)
train_data.build_negative_samples(data_info)  # sample negative items for each record
eval_data.build_negative_samples(data_info) 
test_data.build_negative_samples(data_info)
print(data_info)   # n_users: 5894, n_items: 3253, data sparsity: 0.4172 %

random neg item sampling elapsed: 0.090s
random neg item sampling elapsed: 0.011s
random neg item sampling elapsed: 0.011s
n_users: 610, n_items: 8237, data sparsity: 1.6056 %


In [None]:
from types import SimpleNamespace  

cfg = {
  "embed_size": 16,
  "n_epochs": 3,
  "lr": 0.01, 
  "batch_size": 256,
  "lr_decay":True,
  "loss_type":"cross_entropy",
  "dropout_rate": None,
  "k":10,
  "use_bn":False, 
  "nh_filters":2, 
  "nv_filters":4, 
  "num_neg":1,
  "recent_num":10, 
  "random_num":None,
}
cfg = SimpleNamespace(**cfg)

metrics = ["loss", "roc_auc", "precision", "recall", "map", "ndcg"]    
# Rest of code.
model = Caser(task="ranking", data_info=data_info, 
                embed_size=cfg.embed_size, n_epochs=cfg.n_epochs, 
                lr=cfg.lr, batch_size=cfg.batch_size)
model.fit(train_data, verbose=2, eval_data=eval_data, metrics=metrics)
evaluate(model=model, data=test_data, metrics=metrics)
# specify model saving directory
name_str = f'{MODEL_NAME}' + \
  f'_n_epoch_{cfg.n_epochs}_' + \
  f'emb_size_{cfg.embed_size}_' + \
  f'lr_{cfg.lr}_' + \
  f'batch_size_{cfg.batch_size}'
current_dir = os.path.join(MODEL_PATH, name_str)
if not os.path.isdir(current_dir):
    os.mkdir(current_dir) 
save_embed(current_dir, model)        
model.save(path=current_dir, model_name="Caser", manual=True, inference_only=True)

total params: [33m416,573[0m | embedding params: [33m413,429[0m | network params: [33m3,144[0m
Training start time: [35m2022-11-06 11:22:44[0m


train: 100%|██████████| 631/631 [00:17<00:00, 36.45it/s]


Epoch 1 elapsed: 17.364s
	 [32mtrain_loss: 0.4894[0m


eval_pred: 100%|██████████| 3/3 [00:00<00:00, 313.78it/s]
eval_rec: 100%|██████████| 610/610 [00:00<00:00, 2174.83it/s]


	 eval log_loss: 0.4933
	 eval roc_auc: 0.8446
	 eval precision@10: 0.0305
	 eval recall@10: 0.0287
	 eval map@10: 0.0702
	 eval ndcg@10: 0.1094


train: 100%|██████████| 631/631 [00:16<00:00, 38.66it/s]


Epoch 2 elapsed: 16.381s
	 [32mtrain_loss: 0.2894[0m


eval_pred: 100%|██████████| 3/3 [00:00<00:00, 270.35it/s]
eval_rec: 100%|██████████| 610/610 [00:00<00:00, 2301.12it/s]


	 eval log_loss: 0.5772
	 eval roc_auc: 0.8256
	 eval precision@10: 0.0284
	 eval recall@10: 0.0285
	 eval map@10: 0.0726
	 eval ndcg@10: 0.1096


train: 100%|██████████| 631/631 [00:16<00:00, 38.21it/s]


Epoch 3 elapsed: 16.560s
	 [32mtrain_loss: 0.1458[0m


eval_pred: 100%|██████████| 3/3 [00:00<00:00, 232.50it/s]
eval_rec: 100%|██████████| 610/610 [00:00<00:00, 2300.95it/s]


	 eval log_loss: 0.7182
	 eval roc_auc: 0.8112
	 eval precision@10: 0.0257
	 eval recall@10: 0.0268
	 eval map@10: 0.0671
	 eval ndcg@10: 0.1021


eval_pred: 100%|██████████| 3/3 [00:00<00:00, 292.13it/s]
eval_rec: 100%|██████████| 608/608 [00:00<00:00, 2250.89it/s]


https://wandb.ai/iloncka-ds/recsys-app/sweeps/tw52paxu?workspace=user-iloncka-ds