In [5]:
# Suppress all warnings
import warnings
warnings.filterwarnings("ignore")

import os
import sys
import numpy as np
import pandas as pd
import torch
#import fastai
from tempfile import TemporaryDirectory

from fastai.collab import collab_learner, CollabDataLoaders, load_learner

from recommenders.utils.constants import (
    DEFAULT_USER_COL as USER, 
    DEFAULT_ITEM_COL as ITEM, 
    DEFAULT_RATING_COL as RATING, 
    DEFAULT_TIMESTAMP_COL as TIMESTAMP, 
    DEFAULT_PREDICTION_COL as PREDICTION
) 
from recommenders.utils.timer import Timer
from recommenders.datasets import movielens
from recommenders.datasets.python_splitters import python_stratified_split
from recommenders.models.fastai.fastai_utils import cartesian_product, score
from recommenders.evaluation.python_evaluation import map, ndcg_at_k, precision_at_k, recall_at_k
from recommenders.evaluation.python_evaluation import rmse, mae, rsquared, exp_var
from recommenders.utils.notebook_utils import store_metadata

print("System version: {}".format(sys.version))
print("Pandas version: {}".format(pd.__version__))
#print("Fast AI version: {}".format(fastai.__version__))
print("Torch version: {}".format(torch.__version__))
print("CUDA Available: {}".format(torch.cuda.is_available()))
print("CuDNN Enabled: {}".format(torch.backends.cudnn.enabled))

System version: 3.10.10 (main, Mar 21 2023, 18:45:11) [GCC 11.2.0]
Pandas version: 2.2.3
Torch version: 2.6.0+cu124
CUDA Available: False
CuDNN Enabled: True


In [6]:
# top k items to recommend
TOP_K = 10

# Select MovieLens data size: 100k, 1m, 10m, or 20m
MOVIELENS_DATA_SIZE = '100k'

# Model parameters
N_FACTORS = 40
EPOCHS = 5

In [7]:
ratings_df = movielens.load_pandas_df(
    size=MOVIELENS_DATA_SIZE,
    header=[USER,ITEM,RATING,TIMESTAMP]
)

# make sure the IDs are loaded as strings to better prevent confusion with embedding ids
ratings_df[USER] = ratings_df[USER].astype('str')
ratings_df[ITEM] = ratings_df[ITEM].astype('str')

ratings_df.head()

100%|██████████| 4.81k/4.81k [00:00<00:00, 9.85kKB/s]


Unnamed: 0,userID,itemID,rating,timestamp
0,196,242,3.0,881250949
1,186,302,3.0,891717742
2,22,377,1.0,878887116
3,244,51,2.0,880606923
4,166,346,1.0,886397596


In [8]:
# Split the dataset
train_valid_df, test_df = python_stratified_split(
    ratings_df, 
    ratio=0.75, 
    min_rating=1, 
    filter_by="item", 
    col_user=USER, 
    col_item=ITEM
)

In [9]:
# Remove "cold" users from test set  
test_df = test_df[test_df.userID.isin(train_valid_df.userID)]

## Training

In [10]:
# fix random seeds to make sure our runs are reproducible
np.random.seed(101)
torch.manual_seed(101)
torch.cuda.manual_seed_all(101)

In [11]:
with Timer() as preprocess_time:
    data = CollabDataLoaders.from_df(train_valid_df, 
                                     user_name=USER, 
                                     item_name=ITEM, 
                                     rating_name=RATING, 
                                     valid_pct=0)

In [12]:
############# DRAFT ##############

In [13]:
######################################

In [14]:
data.show_batch()

Unnamed: 0,userID,itemID,rating
0,447,183,5.0
1,445,886,3.0
2,121,427,4.0
3,478,145,1.0
4,398,476,3.0
5,457,717,3.0
6,435,366,2.0
7,752,322,1.0
8,588,95,4.0
9,554,216,3.0


In [15]:
#from fastai.tabular.model import get_emb_sz, tabular_config
#from fastai.losses import MSELossFlat
from torch.nn import MSELoss

In [16]:
def trunc_normal_(x, mean=0., std=1.):
    "Truncated normal initialization (approximation)"
    # From https://discuss.pytorch.org/t/implementing-truncated-normal-initializer/4778/12
    return x.normal_().fmod_(2).mul_(std).add_(mean)

In [17]:
import torch
import torch.nn as nn
#from fastai.layers import Embedding
from torch.nn import Embedding
#from fastai.torch_core import Module, to_detach
from torch.nn import Module
#from fastai.torch_core import tensor
import torch.nn.init as init

class EmbeddingDotBias(Module):
    "Base dot model for collaborative filtering."
    def __init__(self, n_factors, n_users, n_items, y_range=None):
        
        super().__init__()
        self.y_range = y_range
        (self.u_weight, self.i_weight, self.u_bias, self.i_bias) = [Embedding(*o) for o in [
            (n_users, n_factors), (n_items, n_factors), (n_users,1), (n_items,1)
        ]]

        # Initialize with truncated normal
        for emb in [self.u_weight, self.i_weight, self.u_bias, self.i_bias]:
            init.trunc_normal_(emb.weight, std=0.01)

    def forward(self, x):
        users,items = x[:,0],x[:,1]
        dot = self.u_weight(users)* self.i_weight(items)
        res = dot.sum(1) + self.u_bias(users).squeeze() + self.i_bias(items).squeeze()
        if self.y_range is None: return res
        return torch.sigmoid(res) * (self.y_range[1]-self.y_range[0]) + self.y_range[0]

    @classmethod
    def from_classes(cls, n_factors, classes, user=None, item=None, y_range=None):
        "Build a model with `n_factors` by inferring `n_users` and  `n_items` from `classes`"
        if user is None: user = list(classes.keys())[0]
        if item is None: item = list(classes.keys())[1]
        res = cls(n_factors, len(classes[user]), len(classes[item]), y_range=y_range)
        res.classes,res.user,res.item = classes,user,item
        return res

    def _get_idx(self, arr, is_item=True):
        "Fetch item or user (based on `is_item`) for all in `arr`"
        assert hasattr(self, 'classes'), "Build your model with `EmbeddingDotBias.from_classes` to use this functionality."
        classes = self.classes[self.item] if is_item else self.classes[self.user]
        c2i = {v:k for k,v in enumerate(classes)}
        try: return torch.tensor([c2i[o] for o in arr])
        except KeyError as e:
            message = f"You're trying to access {'an item' if is_item else 'a user'} that isn't in the training data. If it was in your original data, it may have been split such that it's only in the validation set now."
            raise modify_exception(e, message, replace=True)

    def bias(self, arr, is_item=True):
        "Bias for item or user (based on `is_item`) for all in `arr`"
        idx = self._get_idx(arr, is_item)
        layer = (self.i_bias if is_item else self.u_bias).eval().cpu()
        #return to_detach(layer(idx).squeeze(),gather=False)
        return layer(idx).squeeze().detach()

    def weight(self, arr, is_item=True):
        "Weight for item or user (based on `is_item`) for all in `arr`"
        idx = self._get_idx(arr, is_item)
        layer = (self.i_weight if is_item else self.u_weight).eval().cpu()
        #return to_detach(layer(idx),gather=False)
        return layer(idx).detach()

In [18]:
y_range=[0,5.5]

In [19]:
import torch
import torch.nn as nn
import torch.optim as optim
#from fastai.losses import MSELossFlat
from torch.nn import MSELoss 

In [21]:
data.classes

{'userID': ['#na#', '1', '10', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '11', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '12', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '13', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '14', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '15', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '16', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '17', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '18', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '19', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '2', '20', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '21', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '22', '220', '221', '222', '223', '224', '225', '226', '22

In [22]:
data.classes

{'userID': ['#na#', '1', '10', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '11', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '12', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '13', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '14', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '15', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '16', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '17', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '18', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '19', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '2', '20', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '21', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '22', '220', '221', '222', '223', '224', '225', '226', '22

In [23]:
# 3. Create model
n_factors = 40
model = EmbeddingDotBias.from_classes(
    n_factors=n_factors,
    classes=data.classes,
    user='userID',
    item='itemID',
    y_range=y_range
)

# 4. Setup training
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
optimizer = optim.AdamW(model.parameters(), 
    lr=1e-3,
    betas=(0.9, 0.99),
    eps=1e-5,
    weight_decay=0.01
)

#loss_fn = MSELossFlat()
loss_fn = MSELoss()
#loss_fn = MSELossFlat2()

In [24]:
device

device(type='cpu')

In [25]:

# 5. Training loop
def train_epoch(model, train_dl, optimizer, loss_fn, device):
    model.train()
    total_loss = 0
    for batch in train_dl:
        # Get batch data
        users_items, ratings = batch
        users_items = users_items.to(device)
        ratings = ratings.to(device)
        
        # Forward pass
        predictions = model(users_items)

        #Flatten both predictions and targets directly in the training loop
        loss = loss_fn(predictions.view(-1), ratings.view(-1))
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    return total_loss / len(train_dl)

def validate(model, valid_dl, loss_fn, device):
    model.eval()
    total_loss = 0
    try:
        with torch.no_grad():
            for batch in valid_dl:
                users_items, ratings = batch
                users_items = users_items.to(device)
                ratings = ratings.to(device)
                
                predictions = model(users_items)
                loss = loss_fn(predictions.view(-1), ratings.view(-1))
                total_loss += loss.item()
        
        return total_loss / len(valid_dl)
    except ZeroDivisionError:
        return None

# 6. Train the model
n_epochs = 7
for epoch in range(n_epochs):
    train_loss = train_epoch(model, data.train, optimizer, loss_fn, device)
    valid_loss = validate(model, data.valid, loss_fn, device)
    print(f'Epoch {epoch+1}/{n_epochs}:')
    print(f'Train Loss: {train_loss}')
    print(f'Valid Loss: {valid_loss}')

# 7. Make prediction

Epoch 1/7:
Train Loss: 1.3331980733232693
Valid Loss: None
Epoch 2/7:
Train Loss: 0.871400328422976
Valid Loss: None
Epoch 3/7:
Train Loss: 0.7851374810720466
Valid Loss: None
Epoch 4/7:
Train Loss: 0.7128831043127454
Valid Loss: None
Epoch 5/7:
Train Loss: 0.6439980921993483
Valid Loss: None
Epoch 6/7:
Train Loss: 0.5804225572801287
Valid Loss: None
Epoch 7/7:
Train Loss: 0.5201062617804411
Valid Loss: None


In [26]:
def predict_rating(model, user_id, item_id):
    model.eval()
    with torch.no_grad():
        try:
            print(f"Trying to predict for user {user_id} and item {item_id}")
            print(f"Available users: {model.classes[USER][:5]}...")  # Print first 5 users
            print(f"Available items: {model.classes[ITEM][:5]}...")  # Print first 5 items
            
            # Convert user and item IDs to indices
            user_idx = model._get_idx([user_id], is_item=False)
            item_idx = model._get_idx([item_id], is_item=True)
            
            # Create input tensor
            x = torch.stack([user_idx, item_idx], dim=1).to(device)
            
            # Get prediction
            pred = model(x)
            return pred.item()
        except Exception as e:
            print(f"Error in prediction: {str(e)}")
            return None

In [27]:
model.classes[USER][:5]

(#5) ['#na#','1','10','100','101']

In [28]:
model.classes[ITEM][:5]

(#5) ['#na#','1','10','100','1000']

In [29]:
total_items = model.classes[ITEM][1:]
total_users = model.classes[USER][1:]

In [30]:
total_users

(#943) ['1','10','100','101','102','103','104','105','106','107','108','109','11','110','111','112','113','114','115','116'...]

In [31]:
total_items

(#1682) ['1','10','100','1000','1001','1002','1003','1004','1005','1006','1007','1008','1009','101','1010','1011','1012','1013','1014','1015'...]

In [32]:
# Get all users from the test set and remove any users that were know in the training set 
test_users = test_df[USER].unique()

In [33]:
test_users

array(['57', '141', '184', '15', '486', '738', '843', '18', '434', '761',
       '692', '777', '913', '38', '472', '295', '311', '174', '938',
       '458', '246', '560', '487', '545', '389', '715', '537', '521',
       '930', '177', '865', '365', '468', '882', '263', '79', '83', '664',
       '663', '505', '540', '411', '933', '674', '6', '350', '327', '895',
       '742', '465', '710', '293', '460', '387', '390', '454', '339',
       '84', '463', '577', '941', '42', '265', '77', '868', '234', '344',
       '471', '20', '81', '137', '550', '503', '621', '330', '676', '280',
       '374', '854', '419', '916', '648', '745', '210', '727', '716',
       '200', '554', '549', '896', '934', '885', '13', '262', '459',
       '150', '689', '160', '438', '66', '735', '654', '253', '893',
       '649', '562', '838', '887', '5', '41', '817', '45', '618', '268',
       '279', '181', '790', '416', '506', '514', '565', '423', '906',
       '378', '500', '312', '862', '518', '49', '733', '104', '373'

In [34]:
test_users = np.intersect1d(test_users, total_users)

In [35]:
test_users

array(['1', '10', '100', '101', '102', '103', '104', '105', '106', '107',
       '108', '109', '11', '110', '111', '112', '113', '114', '115',
       '116', '117', '118', '119', '12', '120', '121', '122', '123',
       '124', '125', '126', '127', '128', '129', '13', '130', '131',
       '132', '133', '134', '135', '136', '137', '138', '139', '14',
       '140', '141', '142', '143', '144', '145', '146', '147', '148',
       '149', '15', '150', '151', '152', '153', '154', '155', '156',
       '157', '158', '159', '16', '160', '161', '162', '163', '164',
       '165', '166', '167', '168', '169', '17', '170', '171', '172',
       '173', '174', '175', '176', '177', '178', '179', '18', '180',
       '181', '182', '183', '184', '185', '186', '187', '188', '189',
       '19', '190', '191', '192', '193', '194', '195', '196', '197',
       '198', '199', '2', '20', '200', '201', '202', '203', '204', '205',
       '206', '207', '208', '209', '21', '210', '211', '212', '213',
       '214', '215', '

In [36]:

user_embeddings = model.weight(['1', '10'], is_item=False)

In [37]:
user_embeddings

tensor([[-0.6163, -0.2073,  0.0704, -0.2973, -0.1889, -0.6495,  0.1631,  0.1153,
         -0.2267,  0.2196,  0.0315, -0.0445, -0.3049, -0.1868, -0.0617, -0.2091,
          0.1798, -0.1370,  0.5447, -0.0399,  0.0594,  0.2918,  0.0073, -0.3090,
          0.4370,  0.1606, -0.1076,  0.2519,  0.1735,  0.3986,  0.1232,  0.2012,
          0.0860,  0.1141,  0.3534, -0.3463,  0.1353, -0.3631, -0.2767,  0.3221],
        [-0.1470,  0.0059,  0.1707, -0.0748, -0.0681,  0.4866,  0.0055,  0.0971,
         -0.1661,  0.1822,  0.1614, -0.1482,  0.3345, -0.0607,  0.2048, -0.2113,
          0.0795, -0.1303,  0.0275, -0.1420, -0.0952,  0.1729, -0.0267, -0.1320,
          0.1556,  0.1251, -0.1875,  0.0826, -0.1657,  0.1375, -0.1253,  0.1484,
          0.2020,  0.1607,  0.1565,  0.0266, -0.1110,  0.0028, -0.0822,  0.1555]])

In [38]:

# Example prediction
user_id = "1"
item_id = "10"
predicted_rating = predict_rating(model, user_id, item_id)
print(f'\nPredicted rating for user {user_id} and item {item_id}: {predicted_rating}')

Trying to predict for user 1 and item 10
Available users: ['#na#', '1', '10', '100', '101']...
Available items: ['#na#', '1', '10', '100', '1000']...

Predicted rating for user 1 and item 10: 4.050480842590332


In [39]:
np.array(test_users)

array(['1', '10', '100', '101', '102', '103', '104', '105', '106', '107',
       '108', '109', '11', '110', '111', '112', '113', '114', '115',
       '116', '117', '118', '119', '12', '120', '121', '122', '123',
       '124', '125', '126', '127', '128', '129', '13', '130', '131',
       '132', '133', '134', '135', '136', '137', '138', '139', '14',
       '140', '141', '142', '143', '144', '145', '146', '147', '148',
       '149', '15', '150', '151', '152', '153', '154', '155', '156',
       '157', '158', '159', '16', '160', '161', '162', '163', '164',
       '165', '166', '167', '168', '169', '17', '170', '171', '172',
       '173', '174', '175', '176', '177', '178', '179', '18', '180',
       '181', '182', '183', '184', '185', '186', '187', '188', '189',
       '19', '190', '191', '192', '193', '194', '195', '196', '197',
       '198', '199', '2', '20', '200', '201', '202', '203', '204', '205',
       '206', '207', '208', '209', '21', '210', '211', '212', '213',
       '214', '215', '

In [40]:
np.array(total_items)

array(['1', '10', '100', ..., '997', '998', '999'],
      shape=(1682,), dtype='<U4')

In [41]:
users_items = cartesian_product(np.array(test_users),np.array(total_items))

In [42]:
users_items

array([['1', '1'],
       ['1', '10'],
       ['1', '100'],
       ...,
       ['99', '997'],
       ['99', '998'],
       ['99', '999']], shape=(1586126, 2), dtype=object)

In [43]:
users_items = pd.DataFrame(users_items, columns=[USER,ITEM])

In [44]:
users_items

Unnamed: 0,userID,itemID
0,1,1
1,1,10
2,1,100
3,1,1000
4,1,1001
...,...,...
1586121,99,995
1586122,99,996
1586123,99,997
1586124,99,998


In [45]:
training_removed = pd.merge(users_items, train_valid_df.astype(str), on=[USER, ITEM], how='left')
training_removed = training_removed[training_removed[RATING].isna()][[USER, ITEM]]

In [46]:
training_removed

Unnamed: 0,userID,itemID
3,1,1000
4,1,1001
5,1,1002
6,1,1003
7,1,1004
...,...,...
1586121,99,995
1586122,99,996
1586123,99,997
1586124,99,998


In [47]:
from recommenders.utils import constants as cc

In [48]:
total_users, total_items = data.classes.values()

In [49]:
total_users

['#na#', '1', '10', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '11', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '12', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '13', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '14', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '15', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '16', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '17', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '18', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '19', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '2', '20', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '21', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '22', '220', '221', '222', '223', '224', '225', '226', '227', '228', 

In [50]:
def score(
    learner,
    data,
    test_df,
    user_col=cc.DEFAULT_USER_COL,
    item_col=cc.DEFAULT_ITEM_COL,
    prediction_col=cc.DEFAULT_PREDICTION_COL,
    top_k=None,
):
    """Score all users+items provided and reduce to top_k items per user if top_k>0

    Args:
        learner (object): Model.
        test_df (pandas.DataFrame): Test dataframe.
        user_col (str): User column name.
        item_col (str): Item column name.
        prediction_col (str): Prediction column name.
        top_k (int): Number of top items to recommend.

    Returns:
        pandas.DataFrame: Result of recommendation
    """
    # replace values not known to the model with NaN
    total_users, total_items = data.classes.values()
    test_df.loc[~test_df[user_col].isin(total_users), user_col] = np.nan
    test_df.loc[~test_df[item_col].isin(total_items), item_col] = np.nan

    # map ids to embedding ids
    u = learner._get_idx(test_df[user_col], is_item=False)
    m = learner._get_idx(test_df[item_col], is_item=True)

    # score the pytorch model
    x = torch.column_stack((u, m))

    if torch.cuda.is_available():
        x = x.to("cuda")
        learner = learner.to("cuda")

    pred = learner.forward(x).detach().cpu().numpy()
    scores = pd.DataFrame(
        {user_col: test_df[user_col], item_col: test_df[item_col], prediction_col: pred}
    )
    scores = scores.sort_values([user_col, prediction_col], ascending=[True, False])

    if top_k is not None:
        top_scores = scores.groupby(user_col).head(top_k).reset_index(drop=True)
    else:
        top_scores = scores

    return top_scores

In [51]:
with Timer() as test_time:
    top_k_scores = score(model, 
                         data, # Agregado pero podria quitarlo 
                         test_df=training_removed,
                         user_col=USER, 
                         item_col=ITEM, 
                         prediction_col=PREDICTION)

In [52]:
top_k_scores

Unnamed: 0,userID,itemID,prediction
760,1,169,5.121271
1141,1,511,5.001065
500,1,1449,4.976738
1026,1,408,4.941924
1109,1,483,4.935520
...,...,...,...
1585643,99,564,1.758346
1585458,99,398,1.733279
1585506,99,440,1.719984
1584543,99,1088,1.688444


In [53]:
eval_map = map(test_df, top_k_scores, col_user=USER, col_item=ITEM, 
               col_rating=RATING, col_prediction=PREDICTION, 
               relevancy_method="top_k", k=TOP_K)

In [54]:
eval_ndcg = ndcg_at_k(test_df, top_k_scores, col_user=USER, col_item=ITEM, 
                      col_rating=RATING, col_prediction=PREDICTION, 
                      relevancy_method="top_k", k=TOP_K)

In [55]:
eval_precision = precision_at_k(test_df, top_k_scores, col_user=USER, col_item=ITEM, 
                                col_rating=RATING, col_prediction=PREDICTION, 
                                relevancy_method="top_k", k=TOP_K)

In [56]:
eval_recall = recall_at_k(test_df, top_k_scores, col_user=USER, col_item=ITEM, 
                          col_rating=RATING, col_prediction=PREDICTION, 
                          relevancy_method="top_k", k=TOP_K)

In [57]:
print("Model:\t\t" + model.__class__.__name__,
      "Top K:\t\t%d" % TOP_K,
      "MAP:\t\t%f" % eval_map,
      "NDCG:\t\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

Model:		EmbeddingDotBias
Top K:		10
MAP:		0.021510
NDCG:		0.134169
Precision@K:	0.121951
Recall@K:	0.049598


In [58]:
scores = score(model,
                data,
               test_df=test_df.copy(), 
               user_col=USER, 
               item_col=ITEM, 
               prediction_col=PREDICTION)

In [59]:
eval_r2 = rsquared(test_df, scores, col_user=USER, col_item=ITEM, col_rating=RATING, col_prediction=PREDICTION)
eval_rmse = rmse(test_df, scores, col_user=USER, col_item=ITEM, col_rating=RATING, col_prediction=PREDICTION)
eval_mae = mae(test_df, scores, col_user=USER, col_item=ITEM, col_rating=RATING, col_prediction=PREDICTION)
eval_exp_var = exp_var(test_df, scores, col_user=USER, col_item=ITEM, col_rating=RATING, col_prediction=PREDICTION)

print("Model:\t\t\t" + model.__class__.__name__,
      "RMSE:\t\t\t%f" % eval_rmse,
      "MAE:\t\t\t%f" % eval_mae,
      "Explained variance:\t%f" % eval_exp_var,
      "R squared:\t\t%f" % eval_r2, sep='\n')

Model:			EmbeddingDotBias
RMSE:			0.928071
MAE:			0.728880
Explained variance:	0.321262
R squared:		0.321262
