In [19]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sn

import scipy.sparse
import time
import pickle
import glob
import warnings

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from scipy.sparse import SparseEfficiencyWarning

from tqdm.notebook import tqdm

from metrics import Evaluator
from utils_VAE import BaseMultiVAE, TrainableMultVAE, loss_function, naive_sparse2tensor, sparse2torch_sparse
from my_models import ParallSynSLIM

In [9]:
warnings.simplefilter('ignore',SparseEfficiencyWarning)

mypath = "/home/mmarzec12/data/"
savepath = "/home/mmarzec12/models/vae/model_tuning/"

explicit = pd.read_csv(mypath+"explicit_train.csv")
validation = pd.read_csv(mypath+"leave_one_out_validation.csv")
test = pd.read_csv(mypath+"leave_one_out_test.csv")

# list with (user,item) tuples from validation set
test_list = [(u,i) for u,i in zip(test.user_name, test.game_id)]
# dict with user:game key-value pairs from validation set
test_dict = {u:i for u,i in zip(test.user_name, test.game_id)}

# unique games and users
unique_users = explicit.user_name.unique()
unique_games = explicit.game_id.unique()

n_users, n_items = len(unique_users), len(unique_games)

# dictonaries to map users to unique ids and vice vers
us_to_ids = {u:i for i,u in enumerate(unique_users)}
ids_to_us = {i:u for i,u in enumerate(unique_users)}

# dictonaries to map games to unique ids and vice vers
gs_to_ids = {g:i for i,g in enumerate(unique_games)}
ids_to_gs = {i:g for i,g in enumerate(unique_games)}


implicit = pd.read_csv(mypath+"implicit_train.csv")
implicit["score"] = 1

# filtering explicit ratings: filter ratings <6 and >=1
print(f"There is {np.sum(explicit.score <= 6)} rows with score <= 6.")
#explicit = explicit[explicit.score > 6]

# we join implictit and explicit rating data
joined = pd.concat([explicit, implicit])
joined = joined[["user_name", "game_id", "score"]]
# converting all interaction data to "1" 
joined["score"] = 1

# creating sparse matrix with data
row = [us_to_ids[us] for us in joined.user_name]
col = [gs_to_ids[g] for g in joined.game_id]
data = joined.score

train_data = scipy.sparse.coo_matrix((data, (row, col)), shape=(len(unique_users), len(unique_games))).tocsr()
#item_matrix = user_matrix.T.copy()
#dok_matrix = user_matrix.todok()

user_loc = row


item_loc = col
ratings = data.values

There is 1362961 rows with score <= 6.


## Explicit

### VAE

In [10]:
tmp_savepath = "/home/mmarzec12/models/vae/dataset_impact/"
best_params = pd.read_pickle("/home/mmarzec12/models/vae/model_tuning/"+"vae_best_params")
base_params = best_params["base_params"]
train_params = best_params["train_params"]
criterion = loss_function

# preparing test data
test_data = [(us_to_ids[u], gs_to_ids[i]) for u,i in test_list]

# Explicit dataset
row = [us_to_ids[us] for us in explicit.user_name]
col = [gs_to_ids[g] for g in explicit.game_id]
data = explicit.score

train_data = scipy.sparse.coo_matrix((data, (row, col)), shape=(len(unique_users), len(unique_games))).tocsr()

In [13]:
model = TrainableMultVAE(base_params["encoder_dims"], base_params["decoder_dims"], base_params["dropout"])
optimizer = optim.Adam(model.parameters(), **train_params["optimizer_kwargs"])

model.fit(train_data, optimizer, criterion, val_data=None, n_epochs=train_params["n_epochs"],
          k=train_params["k"], beta=train_params["beta"])

ndcg, err, hr = model.predict_metrics(train_data, test_data)

res = {}
res["NDCG10"] = ndcg
res["ERR10"] = err
res["HR10"] = hr
res["dataset"] = "explicit"

with open(tmp_savepath+"vae_explicit", "wb") as handle:
    pickle.dump(res, handle, protocol=pickle.HIGHEST_PROTOCOL)

Training phase...
| epoch   1 |  100/ 427 batches | ms/batch 32.90 | loss 2823.57
| epoch   1 |  200/ 427 batches | ms/batch 32.54 | loss 2642.88
| epoch   1 |  300/ 427 batches | ms/batch 32.40 | loss 2638.54
| epoch   1 |  400/ 427 batches | ms/batch 32.40 | loss 2563.20
Training took 13.84 seconds.
Training phase...
| epoch   2 |  100/ 427 batches | ms/batch 32.30 | loss 2555.63
| epoch   2 |  200/ 427 batches | ms/batch 32.12 | loss 2551.39
| epoch   2 |  300/ 427 batches | ms/batch 32.19 | loss 2538.95
| epoch   2 |  400/ 427 batches | ms/batch 32.20 | loss 2512.93
Training took 13.7 seconds.
Training phase...
| epoch   3 |  100/ 427 batches | ms/batch 32.71 | loss 2531.43
| epoch   3 |  200/ 427 batches | ms/batch 32.19 | loss 2469.92
| epoch   3 |  300/ 427 batches | ms/batch 32.21 | loss 2480.61
| epoch   3 |  400/ 427 batches | ms/batch 32.40 | loss 2494.99
Training took 13.77 seconds.
Training phase...
| epoch   4 |  100/ 427 batches | ms/batch 32.46 | loss 2485.54
| epoch   

| epoch  28 |  100/ 427 batches | ms/batch 32.72 | loss 2365.72
| epoch  28 |  200/ 427 batches | ms/batch 32.18 | loss 2369.63
| epoch  28 |  300/ 427 batches | ms/batch 32.27 | loss 2369.14
| epoch  28 |  400/ 427 batches | ms/batch 32.02 | loss 2387.41
Training took 13.74 seconds.
Training phase...
| epoch  29 |  100/ 427 batches | ms/batch 32.59 | loss 2385.68
| epoch  29 |  200/ 427 batches | ms/batch 32.62 | loss 2393.44
| epoch  29 |  300/ 427 batches | ms/batch 32.19 | loss 2340.95
| epoch  29 |  400/ 427 batches | ms/batch 32.23 | loss 2381.32
Training took 13.79 seconds.
Training phase...
| epoch  30 |  100/ 427 batches | ms/batch 32.60 | loss 2386.78
| epoch  30 |  200/ 427 batches | ms/batch 32.21 | loss 2329.53
| epoch  30 |  300/ 427 batches | ms/batch 32.21 | loss 2389.92
| epoch  30 |  400/ 427 batches | ms/batch 32.32 | loss 2363.82
Training took 13.76 seconds.
Training phase...
| epoch  31 |  100/ 427 batches | ms/batch 32.16 | loss 2408.11
| epoch  31 |  200/ 427 bat

| epoch  55 |  200/ 427 batches | ms/batch 32.65 | loss 2374.60
| epoch  55 |  300/ 427 batches | ms/batch 32.26 | loss 2317.88
| epoch  55 |  400/ 427 batches | ms/batch 32.03 | loss 2367.90
Training took 13.78 seconds.
Training phase...
| epoch  56 |  100/ 427 batches | ms/batch 32.57 | loss 2356.31
| epoch  56 |  200/ 427 batches | ms/batch 32.07 | loss 2338.63
| epoch  56 |  300/ 427 batches | ms/batch 32.55 | loss 2401.13
| epoch  56 |  400/ 427 batches | ms/batch 32.31 | loss 2330.42
Training took 13.79 seconds.
Training phase...
| epoch  57 |  100/ 427 batches | ms/batch 32.51 | loss 2384.65
| epoch  57 |  200/ 427 batches | ms/batch 32.16 | loss 2380.41
| epoch  57 |  300/ 427 batches | ms/batch 32.48 | loss 2337.67
| epoch  57 |  400/ 427 batches | ms/batch 32.10 | loss 2350.59
Training took 13.74 seconds.
Training phase...
| epoch  58 |  100/ 427 batches | ms/batch 32.59 | loss 2364.71
| epoch  58 |  200/ 427 batches | ms/batch 32.45 | loss 2367.11
| epoch  58 |  300/ 427 bat

| epoch  82 |  300/ 427 batches | ms/batch 32.12 | loss 2349.16
| epoch  82 |  400/ 427 batches | ms/batch 32.22 | loss 2343.39
Training took 13.71 seconds.
Training phase...
| epoch  83 |  100/ 427 batches | ms/batch 32.59 | loss 2370.23
| epoch  83 |  200/ 427 batches | ms/batch 32.47 | loss 2341.35
| epoch  83 |  300/ 427 batches | ms/batch 32.33 | loss 2358.50
| epoch  83 |  400/ 427 batches | ms/batch 32.38 | loss 2344.86
Training took 13.81 seconds.
Training phase...
| epoch  84 |  100/ 427 batches | ms/batch 32.52 | loss 2370.24
| epoch  84 |  200/ 427 batches | ms/batch 32.41 | loss 2329.12
| epoch  84 |  300/ 427 batches | ms/batch 32.14 | loss 2370.52
| epoch  84 |  400/ 427 batches | ms/batch 32.23 | loss 2338.68
Training took 13.75 seconds.
Training phase...
| epoch  85 |  100/ 427 batches | ms/batch 32.87 | loss 2371.15
| epoch  85 |  200/ 427 batches | ms/batch 32.12 | loss 2326.60
| epoch  85 |  300/ 427 batches | ms/batch 31.97 | loss 2364.56
| epoch  85 |  400/ 427 bat

AttributeError: 'DataFrame' object has no attribute 'toarray'

### SLIM

In [23]:
best_params_slim = pd.read_pickle("/home/mmarzec12/models/slim/"+"slim_best_params")
tmp_savepath_slim = "/home/mmarzec12/models/slim/dataset_impact"

l1_reg = best_params_slim["l1_reg"]
l2_reg = best_params_slim["l2_reg"]
k = 10

In [24]:
# set the parameters
slim = ParallSynSLIM(l1_reg, l2_reg)

# train the model
slim.fit(train_data)

# how many nonzero entires in W matirx
proc = 100*slim.W.nnz/slim.W.shape[0]**2

print("Computing top-k list for each user...")
# produce top k list for all users
start = time.time()
top_k_list = slim.calculate_top_k(train_data, ids_to_gs, ids_to_us, k=k)
pred_time = time.time() - start

print("...evaluation...")
ev = Evaluator(k=k, true=test_list, predicted=top_k_list)
ev.calculate_metrics()
ngcg10, err10, hr10 = ev.ndcg, ev.err, ev.hr

# save the obtained results

res = {}
res["ndcg10"] = ngcg10
res["err10"] = err10
res["hr10"] = hr10
res["W_zeros_percentage"] = proc
res["prediction_calc_time_seconds"] = pred_time
res["datset"] = "explicit"


print(ngcg10, err10, hr10)
# save the obtained results
with open(tmp_savepath_slim+"SLIM_explicit", "wb") as handle:
    pickle.dump(res, handle, protocol=pickle.HIGHEST_PROTOCOL)

Learning all 2265 vectors took 5.41 minutes.
In W matrix we have 88378 nonzero elements (1.723%).
Computing top-k list for each user...


  0%|          | 0/109084 [00:00<?, ?it/s]

...evaluation...
0.18944588067206375 0.15648312091511826 0.29664295405375674


## Binarized explicit

In [26]:
# Explicit dataset
explicit_binarized = explicit.copy()
explicit_binarized.score = 1
row = [us_to_ids[us] for us in explicit_binarized.user_name]
col = [gs_to_ids[g] for g in explicit_binarized.game_id]
data = explicit_binarized.score

train_data = scipy.sparse.coo_matrix((data, (row, col)), shape=(len(unique_users), len(unique_games))).tocsr()

### VAE

In [27]:
model = TrainableMultVAE(base_params["encoder_dims"], base_params["decoder_dims"], base_params["dropout"])
optimizer = optim.Adam(model.parameters(), **train_params["optimizer_kwargs"])

model.fit(train_data, optimizer, criterion, val_data=None, n_epochs=train_params["n_epochs"],
          k=train_params["k"], beta=train_params["beta"])

ndcg, err, hr = model.predict_metrics(train_data, validation_data)

res = {}

res["NDCG10"] = ndcg
res["ERR10"] = err
res["HR10"] = hr
res["dataset"] = "explicit_binarized"

with open(tmp_savepath+"vae_explicit_binarized", "wb") as handle:
    pickle.dump(res, handle, protocol=pickle.HIGHEST_PROTOCOL)

Training phase...
| epoch   1 |  100/ 427 batches | ms/batch 32.98 | loss 397.87
| epoch   1 |  200/ 427 batches | ms/batch 32.22 | loss 368.92
| epoch   1 |  300/ 427 batches | ms/batch 32.28 | loss 371.12
| epoch   1 |  400/ 427 batches | ms/batch 32.55 | loss 362.93
Training took 13.82 seconds.
Training phase...
| epoch   2 |  100/ 427 batches | ms/batch 32.55 | loss 368.02
| epoch   2 |  200/ 427 batches | ms/batch 32.13 | loss 362.16
| epoch   2 |  300/ 427 batches | ms/batch 32.35 | loss 356.80
| epoch   2 |  400/ 427 batches | ms/batch 32.30 | loss 359.20
Training took 13.76 seconds.
Training phase...
| epoch   3 |  100/ 427 batches | ms/batch 32.28 | loss 365.68
| epoch   3 |  200/ 427 batches | ms/batch 32.20 | loss 352.89
| epoch   3 |  300/ 427 batches | ms/batch 32.22 | loss 357.77
| epoch   3 |  400/ 427 batches | ms/batch 32.24 | loss 353.65
Training took 13.72 seconds.
Training phase...
| epoch   4 |  100/ 427 batches | ms/batch 32.53 | loss 356.63
| epoch   4 |  200/ 42

| epoch  28 |  300/ 427 batches | ms/batch 32.10 | loss 342.78
| epoch  28 |  400/ 427 batches | ms/batch 32.31 | loss 342.64
Training took 13.72 seconds.
Training phase...
| epoch  29 |  100/ 427 batches | ms/batch 32.65 | loss 345.61
| epoch  29 |  200/ 427 batches | ms/batch 32.52 | loss 345.23
| epoch  29 |  300/ 427 batches | ms/batch 32.29 | loss 342.02
| epoch  29 |  400/ 427 batches | ms/batch 32.24 | loss 343.16
Training took 13.79 seconds.
Training phase...
| epoch  30 |  100/ 427 batches | ms/batch 32.44 | loss 346.85
| epoch  30 |  200/ 427 batches | ms/batch 32.15 | loss 343.18
| epoch  30 |  300/ 427 batches | ms/batch 32.22 | loss 338.45
| epoch  30 |  400/ 427 batches | ms/batch 32.26 | loss 344.49
Training took 13.73 seconds.
Training phase...
| epoch  31 |  100/ 427 batches | ms/batch 32.52 | loss 347.44
| epoch  31 |  200/ 427 batches | ms/batch 32.28 | loss 340.73
| epoch  31 |  300/ 427 batches | ms/batch 32.24 | loss 339.32
| epoch  31 |  400/ 427 batches | ms/bat

Training took 13.84 seconds.
Training phase...
| epoch  56 |  100/ 427 batches | ms/batch 32.52 | loss 341.93
| epoch  56 |  200/ 427 batches | ms/batch 32.30 | loss 339.42
| epoch  56 |  300/ 427 batches | ms/batch 32.13 | loss 344.25
| epoch  56 |  400/ 427 batches | ms/batch 32.23 | loss 340.69
Training took 13.74 seconds.
Training phase...
| epoch  57 |  100/ 427 batches | ms/batch 32.85 | loss 342.76
| epoch  57 |  200/ 427 batches | ms/batch 32.46 | loss 342.76
| epoch  57 |  300/ 427 batches | ms/batch 32.08 | loss 340.90
| epoch  57 |  400/ 427 batches | ms/batch 32.24 | loss 340.29
Training took 13.79 seconds.
Training phase...
| epoch  58 |  100/ 427 batches | ms/batch 32.70 | loss 344.31
| epoch  58 |  200/ 427 batches | ms/batch 32.39 | loss 339.19
| epoch  58 |  300/ 427 batches | ms/batch 32.17 | loss 347.38
| epoch  58 |  400/ 427 batches | ms/batch 32.44 | loss 336.99
Training took 13.8 seconds.
Training phase...
| epoch  59 |  100/ 427 batches | ms/batch 32.53 | loss 3

| epoch  83 |  300/ 427 batches | ms/batch 32.14 | loss 341.56
| epoch  83 |  400/ 427 batches | ms/batch 31.92 | loss 339.72
Training took 13.69 seconds.
Training phase...
| epoch  84 |  100/ 427 batches | ms/batch 32.32 | loss 340.01
| epoch  84 |  200/ 427 batches | ms/batch 32.09 | loss 342.35
| epoch  84 |  300/ 427 batches | ms/batch 32.03 | loss 343.13
| epoch  84 |  400/ 427 batches | ms/batch 32.06 | loss 339.65
Training took 13.67 seconds.
Training phase...
| epoch  85 |  100/ 427 batches | ms/batch 32.38 | loss 339.76
| epoch  85 |  200/ 427 batches | ms/batch 32.16 | loss 343.57
| epoch  85 |  300/ 427 batches | ms/batch 31.99 | loss 338.69
| epoch  85 |  400/ 427 batches | ms/batch 32.18 | loss 338.27
Training took 13.68 seconds.
Training phase...
| epoch  86 |  100/ 427 batches | ms/batch 32.84 | loss 341.19
| epoch  86 |  200/ 427 batches | ms/batch 32.27 | loss 343.61
| epoch  86 |  300/ 427 batches | ms/batch 32.00 | loss 341.55
| epoch  86 |  400/ 427 batches | ms/bat

### SLIM

In [28]:
# set the parameters
slim = ParallSynSLIM(l1_reg, l2_reg)

# train the model
slim.fit(train_data)

# how many nonzero entires in W matirx
proc = 100*slim.W.nnz/slim.W.shape[0]**2

print("Computing top-k list for each user...")
# produce top k list for all users
start = time.time()
top_k_list = slim.calculate_top_k(train_data, ids_to_gs, ids_to_us, k=k)
pred_time = time.time() - start

print("...evaluation...")
ev = Evaluator(k=k, true=validation_list, predicted=top_k_list)
ev.calculate_metrics()
ngcg10, err10, hr10 = ev.ndcg, ev.err, ev.hr

# save the obtained results
res = {}
res["ndcg10"] = ngcg10
res["err10"] = err10
res["hr10"] = hr10
res["W_zeros_percentage"] = proc
res["prediction_calc_time_seconds"] = pred_time
res["datset"] = "explicit_binarized"


print(ngcg10, err10, hr10)
# save the obtained results
with open(tmp_savepath_slim+"SLIM_explicit_binarized", "wb") as handle:
    pickle.dump(res, handle, protocol=pickle.HIGHEST_PROTOCOL)

Learning all 2265 vectors took 1.44 minutes.
In W matrix we have 65679 nonzero elements (1.28%).
Computing top-k list for each user...


  0%|          | 0/109084 [00:00<?, ?it/s]

...evaluation...
0.18763862360168257 0.153417942660178 0.29900810384657694


## Binarized filtered explicit

In [29]:
# Explicit dataset
explicit_filtered = explicit[explicit.score > 6]
explicit_filtered.score = 1
row = [us_to_ids[us] for us in explicit_filtered.user_name]
col = [gs_to_ids[g] for g in explicit_filtered.game_id]
data = explicit_filtered.score

train_data = scipy.sparse.coo_matrix((data, (row, col)), shape=(len(unique_users), len(unique_games))).tocsr()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


### VAE

In [30]:
model = TrainableMultVAE(base_params["encoder_dims"], base_params["decoder_dims"], base_params["dropout"])
optimizer = optim.Adam(model.parameters(), **train_params["optimizer_kwargs"])

model.fit(train_data, optimizer, criterion, val_data=None, n_epochs=train_params["n_epochs"],
          k=train_params["k"], beta=train_params["beta"])

ndcg, err, hr = model.predict_metrics(train_data, validation_data)

res = {}

res["NDCG10"] = ndcg
res["ERR10"] = err
res["HR10"] = hr
res["dataset"] = "explicit_binarized_filtered"

with open(tmp_savepath+"vae_explicit_binarized_filtered", "wb") as handle:
    pickle.dump(res, handle, protocol=pickle.HIGHEST_PROTOCOL)

Training phase...
| epoch   1 |  100/ 427 batches | ms/batch 32.40 | loss 298.55
| epoch   1 |  200/ 427 batches | ms/batch 31.78 | loss 283.94
| epoch   1 |  300/ 427 batches | ms/batch 32.05 | loss 281.77
| epoch   1 |  400/ 427 batches | ms/batch 31.74 | loss 277.16
Training took 13.63 seconds.
Training phase...
| epoch   2 |  100/ 427 batches | ms/batch 32.71 | loss 279.44
| epoch   2 |  200/ 427 batches | ms/batch 31.85 | loss 272.13
| epoch   2 |  300/ 427 batches | ms/batch 31.82 | loss 273.43
| epoch   2 |  400/ 427 batches | ms/batch 31.78 | loss 271.45
Training took 13.62 seconds.
Training phase...
| epoch   3 |  100/ 427 batches | ms/batch 32.04 | loss 273.47
| epoch   3 |  200/ 427 batches | ms/batch 31.83 | loss 270.82
| epoch   3 |  300/ 427 batches | ms/batch 31.84 | loss 272.64
| epoch   3 |  400/ 427 batches | ms/batch 31.84 | loss 265.53
Training took 13.57 seconds.
Training phase...
| epoch   4 |  100/ 427 batches | ms/batch 32.30 | loss 271.99
| epoch   4 |  200/ 42

| epoch  28 |  300/ 427 batches | ms/batch 31.87 | loss 256.67
| epoch  28 |  400/ 427 batches | ms/batch 31.85 | loss 260.09
Training took 13.6 seconds.
Training phase...
| epoch  29 |  100/ 427 batches | ms/batch 32.12 | loss 260.74
| epoch  29 |  200/ 427 batches | ms/batch 31.97 | loss 255.63
| epoch  29 |  300/ 427 batches | ms/batch 31.95 | loss 257.05
| epoch  29 |  400/ 427 batches | ms/batch 31.84 | loss 258.67
Training took 13.61 seconds.
Training phase...
| epoch  30 |  100/ 427 batches | ms/batch 32.32 | loss 260.10
| epoch  30 |  200/ 427 batches | ms/batch 31.93 | loss 256.67
| epoch  30 |  300/ 427 batches | ms/batch 31.75 | loss 259.13
| epoch  30 |  400/ 427 batches | ms/batch 32.16 | loss 257.13
Training took 13.63 seconds.
Training phase...
| epoch  31 |  100/ 427 batches | ms/batch 32.36 | loss 262.49
| epoch  31 |  200/ 427 batches | ms/batch 32.09 | loss 257.60
| epoch  31 |  300/ 427 batches | ms/batch 31.86 | loss 251.22
| epoch  31 |  400/ 427 batches | ms/batc

Training took 13.63 seconds.
Training phase...
| epoch  56 |  100/ 427 batches | ms/batch 32.17 | loss 260.17
| epoch  56 |  200/ 427 batches | ms/batch 31.94 | loss 258.48
| epoch  56 |  300/ 427 batches | ms/batch 31.82 | loss 256.50
| epoch  56 |  400/ 427 batches | ms/batch 31.87 | loss 254.89
Training took 13.6 seconds.
Training phase...
| epoch  57 |  100/ 427 batches | ms/batch 32.20 | loss 255.64
| epoch  57 |  200/ 427 batches | ms/batch 31.93 | loss 257.43
| epoch  57 |  300/ 427 batches | ms/batch 32.14 | loss 256.83
| epoch  57 |  400/ 427 batches | ms/batch 31.78 | loss 257.00
Training took 13.62 seconds.
Training phase...
| epoch  58 |  100/ 427 batches | ms/batch 32.26 | loss 257.55
| epoch  58 |  200/ 427 batches | ms/batch 31.80 | loss 257.42
| epoch  58 |  300/ 427 batches | ms/batch 31.82 | loss 257.83
| epoch  58 |  400/ 427 batches | ms/batch 32.01 | loss 256.98
Training took 13.61 seconds.
Training phase...
| epoch  59 |  100/ 427 batches | ms/batch 32.23 | loss 2

| epoch  83 |  300/ 427 batches | ms/batch 32.05 | loss 251.57
| epoch  83 |  400/ 427 batches | ms/batch 32.05 | loss 255.72
Training took 13.66 seconds.
Training phase...
| epoch  84 |  100/ 427 batches | ms/batch 32.54 | loss 256.35
| epoch  84 |  200/ 427 batches | ms/batch 31.88 | loss 257.73
| epoch  84 |  300/ 427 batches | ms/batch 31.88 | loss 255.62
| epoch  84 |  400/ 427 batches | ms/batch 32.00 | loss 254.66
Training took 13.64 seconds.
Training phase...
| epoch  85 |  100/ 427 batches | ms/batch 32.33 | loss 258.68
| epoch  85 |  200/ 427 batches | ms/batch 32.04 | loss 255.88
| epoch  85 |  300/ 427 batches | ms/batch 32.22 | loss 257.05
| epoch  85 |  400/ 427 batches | ms/batch 32.02 | loss 254.84
Training took 13.68 seconds.
Training phase...
| epoch  86 |  100/ 427 batches | ms/batch 32.43 | loss 260.30
| epoch  86 |  200/ 427 batches | ms/batch 31.93 | loss 254.44
| epoch  86 |  300/ 427 batches | ms/batch 32.02 | loss 254.94
| epoch  86 |  400/ 427 batches | ms/bat

### SLIM

In [31]:
# set the parameters
slim = ParallSynSLIM(l1_reg, l2_reg)

# train the model
slim.fit(train_data)

# how many nonzero entires in W matirx
proc = 100*slim.W.nnz/slim.W.shape[0]**2

print("Computing top-k list for each user...")
# produce top k list for all users
start = time.time()
top_k_list = slim.calculate_top_k(train_data, ids_to_gs, ids_to_us, k=k)
pred_time = time.time() - start

print("...evaluation...")
ev = Evaluator(k=k, true=validation_list, predicted=top_k_list)
ev.calculate_metrics()
ngcg10, err10, hr10 = ev.ndcg, ev.err, ev.hr

# save the obtained results
res = {}
res["ndcg10"] = ngcg10
res["err10"] = err10
res["hr10"] = hr10
res["W_zeros_percentage"] = proc
res["prediction_calc_time_seconds"] = pred_time
res["datset"] = "explicit_binarized_filtered"


print(ngcg10, err10, hr10)
# save the obtained results
with open(tmp_savepath_slim+"SLIM_explicit_binarized_filtered", "wb") as handle:
    pickle.dump(res, handle, protocol=pickle.HIGHEST_PROTOCOL)

Learning all 2265 vectors took 1.42 minutes.
In W matrix we have 53258 nonzero elements (1.038%).
Computing top-k list for each user...


  0%|          | 0/109084 [00:00<?, ?it/s]

...evaluation...
0.18657369989503855 0.1522059816433877 0.2983388947966705
