In [1]:
import pandas as pd
import numpy as np

In [2]:
names = ['user_id', 'item_id', 'rating', 'timestamp']
df = pd.read_csv(
    './ml-100k/u.data', sep='\t', names=names)

In [3]:
n_users = df.user_id.unique().shape[0]
n_items = df.item_id.unique().shape[0]

In [4]:
# Create r_{ui}, our ratings matrix
ratings = np.zeros((n_users, n_items))
for row in df.itertuples():
    ratings[row[1]-1, row[2]-1] = row[3]

In [5]:
# Split into training and test sets. 
# Remove 10 ratings for each user 
# and assign them to the test set
def train_test_split(ratings):
    test = np.zeros(ratings.shape)
    train = ratings.copy()
    for user in range(ratings.shape[0]):
        test_ratings = np.random.choice(ratings[user, :].nonzero()[0], 
                                        size=10, 
                                        replace=False)
        train[user, test_ratings] = 0.
        test[user, test_ratings] = ratings[user, test_ratings]
        
    # Test and training are truly disjoint
    assert(np.all((train * test) == 0)) 
    return train, test

train, test = train_test_split(ratings)

In [6]:
from sklearn.metrics import mean_squared_error

def get_mse(pred, actual):
    # Ignore nonzero terms.
    pred = pred[actual.nonzero()].flatten()
    actual = actual[actual.nonzero()].flatten()
    return mean_squared_error(pred, actual)

In [7]:
from numpy.linalg import solve

def alt_step(latent_vector, 
             fixed_vector,
             ratings,
             _lambda,
             _type = 'user'):
    
    get_vec = lambda x: ratings[x, :]
    if _type == 'item':
        get_vec = lambda x: ratings[:, x].T
    ATA = fixed_vector.T.dot(fixed_vector)
    ATAlambdaI = ATA + np.eye(ATA.shape[0])*_lambda
    for u_i in range(latent_vector.shape[0]):
        latent_vector[u_i,:] = \
        solve(ATAlambdaI, get_vec(u_i).dot(fixed_vector))
    return latent_vector

In [43]:
def sgd(user_vector, item_vector, user_bias, item_bias, 
        global_bias, learning_rate, regularizer_factor, ratings, non_zero_row, non_zero_col):
    rows = np.arange(len(non_zero_row))
    np.random.shuffle(rows)
    for i in rows:
            user = non_zero_row[i]
            item = non_zero_col[i]
            prediction = global_bias + user_bias[user] + item_bias[item] + user_vector[user, :].dot(item_vector[item, :].T)
            error = (ratings[user,item] - prediction) # error
            
            user_bias[user] += learning_rate * (error - regularizer_factor * user_bias[user])
            item_bias[item] += learning_rate * (error - regularizer_factor * item_bias[item])
            
            user_vector[user, :] += learning_rate * (error * item_vector[item, :] - regularizer_factor * user_vector[user,:])
            item_vector[item, :] += learning_rate * (error * user_vector[user, :] - regularizer_factor * item_vector[item, :])
    

In [50]:
def inference(user_vector, item_vector, global_bias, user_bias, item_bias):
    predictions = np.zeros((user_vector.shape[0], item_vector.shape[0]))
    for user in range(user_vector.shape[0]):
        for item in range(item_vector.shape[0]):
            predictions[user, item] = global_bias + \
                user_bias[user] + item_bias[item] + user_vector[user, :].dot(item_vector[item, :].T)
                
    return predictions    

In [53]:
n_user = train.shape[0]
n_item = train.shape[1]
n_factor = 40
steps = 400
user_vector = np.random.normal(scale=1./n_factor,size=(n_user, n_factor))
item_vector = np.random.normal(scale=1./n_factor,size=(n_item, n_factor))
user_bias = np.zeros(n_user)
item_bias = np.zeros(n_item)
global_bias = np.mean(train[np.where(train != 0)])
non_zero_row, non_zero_col = train.nonzero()
learning_rate = 0.001
regularizer_factor = 0.001

In [54]:
for i in range(steps):
    if(i%10==0):
        print("Step %d" %  i)
        predictions = inference(user_vector, item_vector, global_bias, user_bias, item_bias) 
        print ("test mse ", get_mse(predictions, test))
        print ("train mse ", get_mse(predictions, train))
    sgd(user_vector, item_vector, user_bias, item_bias, 
        global_bias, learning_rate, regularizer_factor, train, non_zero_row, non_zero_col)
#     user_vector = alt_step(user_vector, item_vector, train, usr_lambda)
#     item_vector = alt_step(item_vector, user_vector, train, item_lambda, 'item')


Step 0
test mse  1.2755496757878833
train mse  1.2663168859406702
Step 10
test mse  1.0183266436405871
train mse  0.9175904020787433
Step 20
test mse  0.979125252487933
train mse  0.8764130774587809
Step 30
test mse  0.9620323228727423
train mse  0.8581931659885981
Step 40
test mse  0.9526522189518604
train mse  0.8469674591719006
Step 50
test mse  0.9469877753796764
train mse  0.8381361994846179
Step 60
test mse  0.9429873321406207
train mse  0.8291620851839531
Step 70
test mse  0.9395559886101154
train mse  0.8177177139119635
Step 80
test mse  0.9357901098833429
train mse  0.8012193919745568
Step 90
test mse  0.9309036777865511
train mse  0.7777222752478801
Step 100
test mse  0.9248312104326007
train mse  0.7478985044123462
Step 110
test mse  0.9185644169774134
train mse  0.7146020005379419
Step 120
test mse  0.9127538070000007
train mse  0.6797245319407509
Step 130
test mse  0.9078782893136185
train mse  0.6437598649808202
Step 140
test mse  0.904259613134203
train mse  0.6071107855