## Bayesian Personalized Ranking
(by Tevfik Aytekin)

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from scipy.sparse import csr_matrix
import copy

In [3]:
prefs = pd.read_csv("../../datasets/ml-latest-small/ratings.csv", sep=",")

prefs.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [4]:
n_users = prefs.iloc[:,0].unique().size
n_items = prefs.iloc[:,1].unique().size
n_prefs = prefs.iloc[:,1].size
n_factors = 5
users = prefs.iloc[:,0].unique()
items = prefs.iloc[:,1].unique()

print("Number of users:",n_users)
print("Number of items:",n_items)
print("Number of preferences:",n_prefs)
print("Sparsity:",n_prefs/(n_users*n_items))

Number of users: 610
Number of items: 9724
Number of preferences: 100836
Sparsity: 0.016999683055613623


In [5]:
def calc_rank_error(X, u, i_p, i_n):
 
    pos_pred = np.dot(user_factors[u].T, item_factors[i_p])
    neg_pred = np.dot(user_factors[u].T, item_factors[i_n])
        
    return pos_pred - neg_pred

In [15]:
def calc_error(X, u_factors, i_factors):
    error = 0
    n_iters = 100
    all_items = set(X.iloc[:,1].unique())
    for t in range(n_iters):
        # sample a user
        r = np.random.randint(X.shape[0])
        u = X.iloc[r,0]
  
        #sample a positive item
        I_u = X[X.userId==u].iloc[:,1].array
        r = np.random.randint(len(I_u))
        i_p = I_u[r]

        #sample a negative item
        diff = all_items.difference(set(I_u))
        r = np.random.randint(len(diff))
        i_n = items[r]
        
        error += sigmoid(calc_rank_error(X, u, i_p, i_n))
    return error/X.shape[0]

In [16]:
def sigmoid(x):
    s = 1 / (1 + np.exp(-x))    
    return s

In [None]:
item_factors = {}
user_factors = {}
for r in range(n_prefs):
    user_factors[prefs.iloc[r,0]] = np.random.rand(n_factors,1) - 0.5
    item_factors[prefs.iloc[r,1]] = np.random.rand(n_factors,1) - 0.5
    
X_train, X_test = train_test_split(prefs, test_size=0.1)
print("Initial error: ", calc_error(prefs, user_factors, item_factors))

alpha = 0.03
my_lambda = 0.01
n_iters = 100000

all_items = set(X_train.iloc[:,1].unique())
for t in range(n_iters):
    # sample a user
    r = np.random.randint(X_train.shape[0])
    u = X_train.iloc[r,0]

    #sample a positive item
    I_u = X_train[X_train.userId==u].iloc[:,1].array
    r = np.random.randint(len(I_u))
    i_p = I_u[r]

    #sample a negative item
    diff = all_items.difference(set(I_u))
    r = np.random.randint(len(diff))
    i_n = items[r]


    error = sigmoid(calc_rank_error(X_train, u, i_p, i_n))
    user_factors[u] = user_factors[u] + alpha*(error*(1-error)*(item_factors[i_p]-item_factors[i_n]) - my_lambda*user_factors[u])
    item_factors[i_p] = item_factors[i_p] + alpha*(error*(1-error)*user_factors[u] - my_lambda*item_factors[i_p])  
    item_factors[i_n] = item_factors[i_n] + alpha*(-1*error*(1-error)*user_factors[u] - my_lambda*item_factors[i_n])  

       
    if (t % 1000 == 0):      
        print("Iteration ", t)
        print("Train error: ", calc_error(X_train, user_factors, item_factors))
        print("Test error: ", calc_error(X_test, user_factors, item_factors))
    