## Bayesian Personalized Ranking
(by Tevfik Aytekin)

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from scipy.sparse import csr_matrix
import copy

In [3]:
prefs = pd.read_csv("../../datasets/ml-latest-small/ratings.csv", sep=",")

prefs.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [4]:
n_users = prefs.iloc[:,0].unique().size
n_items = prefs.iloc[:,1].unique().size
n_prefs = prefs.iloc[:,1].size
n_factors = 5
users = prefs.iloc[:,0].unique()
items = prefs.iloc[:,1].unique()

print("Number of users:",n_users)
print("Number of items:",n_items)
print("Number of preferences:",n_prefs)
print("Sparsity:",n_prefs/(n_users*n_items))

Number of users: 610
Number of items: 9724
Number of preferences: 100836
Sparsity: 0.016999683055613623


In [5]:
def calc_rank_error(X, u, i_p, i_n):
 
    pos_pred = np.dot(user_factors[u].T, item_factors[i_p])
    neg_pred = np.dot(user_factors[u].T, item_factors[i_n])
        
    return pos_pred - neg_pred

In [15]:
def calc_error(X, u_factors, i_factors):
    error = 0
    n_iters = 100
    all_items = set(X.iloc[:,1].unique())
    for t in range(n_iters):
        # sample a user
        r = np.random.randint(X.shape[0])
        u = X.iloc[r,0]
  
        #sample a positive item
        I_u = X[X.userId==u].iloc[:,1].array
        r = np.random.randint(len(I_u))
        i_p = I_u[r]

        #sample a negative item
        diff = all_items.difference(set(I_u))
        r = np.random.randint(len(diff))
        i_n = items[r]
        
        error += sigmoid(calc_rank_error(X, u, i_p, i_n))
    return error/X.shape[0]

In [16]:
def sigmoid(x):
    s = 1 / (1 + np.exp(-x))    
    return s

In [17]:
item_factors = {}
user_factors = {}
for r in range(n_prefs):
    user_factors[prefs.iloc[r,0]] = np.random.rand(n_factors,1) - 0.5
    item_factors[prefs.iloc[r,1]] = np.random.rand(n_factors,1) - 0.5
    
X_train, X_test = train_test_split(prefs, test_size=0.1)
print("Initial error: ", calc_error(prefs, user_factors, item_factors))

alpha = 0.03
my_lambda = 0.01
n_iters = 100000

all_items = set(X_train.iloc[:,1].unique())
for t in range(n_iters):
    # sample a user
    r = np.random.randint(X_train.shape[0])
    u = X_train.iloc[r,0]

    #sample a positive item
    I_u = X_train[X_train.userId==u].iloc[:,1].array
    r = np.random.randint(len(I_u))
    i_p = I_u[r]

    #sample a negative item
    diff = all_items.difference(set(I_u))
    r = np.random.randint(len(diff))
    i_n = items[r]


    error = sigmoid(calc_rank_error(X_train, u, i_p, i_n))
    user_factors[u] = user_factors[u] + alpha*(error*(1-error)*(item_factors[i_p]-item_factors[i_n]) - my_lambda*user_factors[u])
    item_factors[i_p] = item_factors[i_p] + alpha*(error*(1-error)*user_factors[u] - my_lambda*item_factors[i_p])  
    item_factors[i_n] = item_factors[i_n] + alpha*(-1*error*(1-error)*user_factors[u] - my_lambda*item_factors[i_n])  

       
    if (t % 1000 == 0):      
        print("Iteration ", t)
        print("Train error: ", calc_error(X_train, user_factors, item_factors))
        print("Test error: ", calc_error(X_test, user_factors, item_factors))
    

Initial error:  [[0.00050319]]
Iteration  0
Train error:  [[0.00055581]]
Test error:  [[0.00494893]]
Iteration  1000
Train error:  [[0.00054656]]
Test error:  [[0.00501381]]
Iteration  2000
Train error:  [[0.00055941]]
Test error:  [[0.00500319]]
Iteration  3000
Train error:  [[0.00054662]]
Test error:  [[0.00497768]]
Iteration  4000
Train error:  [[0.00054598]]
Test error:  [[0.0048884]]
Iteration  5000
Train error:  [[0.00055369]]
Test error:  [[0.0048928]]
Iteration  6000
Train error:  [[0.00054589]]
Test error:  [[0.00499707]]
Iteration  7000
Train error:  [[0.00054443]]
Test error:  [[0.00482033]]
Iteration  8000
Train error:  [[0.00054049]]
Test error:  [[0.005025]]
Iteration  9000
Train error:  [[0.00054638]]
Test error:  [[0.00501691]]
Iteration  10000
Train error:  [[0.00054696]]
Test error:  [[0.00497774]]
Iteration  11000
Train error:  [[0.00055039]]
Test error:  [[0.0047871]]
Iteration  12000
Train error:  [[0.00054737]]
Test error:  [[0.00493955]]
Iteration  13000
Train er

Traceback (most recent call last):
  File "/Users/tevfikaytekin/miniforge3/envs/pytorch/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3553, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/var/folders/8v/tw2k9h3n5k5d1stxdsh3q6d00000gn/T/ipykernel_59386/20383031.py", line 18, in <module>
    u = X_train.iloc[r,0]
        ~~~~~~~~~~~~^^^^^
  File "/Users/tevfikaytekin/miniforge3/envs/pytorch/lib/python3.11/site-packages/pandas/core/indexing.py", line 1145, in __getitem__
    if self._is_scalar_access(key):
       ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/tevfikaytekin/miniforge3/envs/pytorch/lib/python3.11/site-packages/pandas/core/indexing.py", line 1624, in _is_scalar_access
    if len(key) != self.ndim:
                   ^^^^^^^^^
  File "indexing.pyx", line 23, in pandas._libs.indexing.NDFrameIndexerBase.ndim.__get__
  File "/Users/tevfikaytekin/miniforge3/envs/pytorch/lib/python3.11/site-packages/pandas/core/generic.py", line 659, i