In [1]:
import warnings
warnings.filterwarnings("ignore")
from collections import defaultdict
import time
import numpy as np
from math import sqrt
import tensorflow as tf

In [2]:
np.random.seed(42)
train_set = open("ml-1m/ratings.dat").readlines()
train_set = np.random.permutation(train_set)
train_threshold = int(0.75 * len(train_set))
# train_indices = []
# test_indices = []
train_user_indices = []
train_item_indices = []
test_user_indices = []
test_item_indices = []
train_ratings = []
test_ratings = []

data = defaultdict(dict)
user2id = {}
item2id = {}
index_user = 0
index_item = 0

for i, line in enumerate(train_set):
    user = line.split("::")[0]
    item = line.split("::")[1]
    rating = line.split("::")[2]
    
    try:
        user_id = user2id[user]
    except KeyError:
        user_id = index_user
        user2id[user] = index_user
        index_user += 1
    try:
        item_id = item2id[item]
    except KeyError:
        item_id = index_item
        item2id[item] = index_item
        index_item += 1
        
    if i < train_threshold:
    #    train_indices.append((user_id, item_id))
        train_user_indices.append(user_id)
        train_item_indices.append(item_id)
        train_ratings.append(int(rating))
        data[user_id].update(dict(zip([item_id], [int(rating)])))
    else:
    #    test_indices.append((user_id, item_id))
        test_user_indices.append(user_id)
        test_item_indices.append(item_id)
        test_ratings.append(int(rating))

In [3]:
len(train_ratings), len(test_ratings)

(750156, 250053)

In [4]:
print("before: ", len(train_ratings), len(test_ratings))

for u, i, r in zip(test_user_indices, test_item_indices, test_ratings):
    if u not in data:
        test_user_indices.remove(u)
        test_item_indices.remove(i)
        test_ratings.remove(r)

for u, i, r in zip(test_user_indices, test_item_indices, test_ratings):
    if u not in data:
        test_user_indices.remove(u)
        test_item_indices.remove(i)
        test_ratings.remove(r)

for u, i, r in zip(test_user_indices, test_item_indices, test_ratings):
    if u not in data:
        test_user_indices.remove(u)
        test_item_indices.remove(i)
        test_ratings.remove(r)

for u in test_user_indices:
    if u not in data:
        print("left", u)

print("after: ", len(train_ratings), len(test_ratings))

before:  7500 2500
after:  7500 1975


### SVD

In [9]:
train_indices = np.array([train_user_indices, train_item_indices]).T
test_indices = np.array([test_user_indices, test_item_indices]).T

In [5]:
%%time
np.random.seed(42)
tf.reset_default_graph()
n_factors = 100
n_users = len(user2id)
n_items = len(item2id)
n_epochs = 500
lr = 0.01
reg = 1e-4

global_mean = tf.placeholder(tf.float32, shape=[])
bu = tf.Variable(tf.zeros([n_users]))
bi = tf.Variable(tf.zeros([n_items]))
pu = tf.Variable(tf.random_normal([n_users, n_factors], 0.0, 0.01))
qi = tf.Variable(tf.random_normal([n_items, n_factors], 0.0, 0.01))
ratings = tf.placeholder(tf.int32, shape=[None])
indices = tf.placeholder(tf.int32, shape=[None, 2])

dot = tf.matmul(pu, qi, transpose_b=True)
bias_user_index = tf.gather(indices, 0, axis=1)
bias_item_index = tf.gather(indices, 1, axis=1)
bias_user = tf.nn.embedding_lookup(bu, bias_user_index)
bias_item = tf.nn.embedding_lookup(bi, bias_item_index)
pred = global_mean + bias_user + bias_item + tf.gather_nd(dot, indices)
loss = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(tf.cast(ratings, tf.float32), pred))))


# loss = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(tf.cast(ratings, tf.float32), 
#                                                   tf.gather_nd(dot, indices)))))


reg_pu = tf.contrib.layers.l2_regularizer(reg)(pu)
reg_qi = tf.contrib.layers.l2_regularizer(reg)(qi)
reg_bu = tf.contrib.layers.l2_regularizer(reg)(bu)
reg_bi = tf.contrib.layers.l2_regularizer(reg)(bi)
total_loss = tf.add_n([loss, reg_pu, reg_qi, reg_bu, reg_bi])
# optimizer = tf.train.GradientDescentOptimizer(lr)
optimizer = tf.train.AdamOptimizer(lr)
training_op = optimizer.minimize(total_loss)
init = tf.global_variables_initializer()

sess = tf.InteractiveSession()
sess.run(init)

step = 5
for epoch in range(n_epochs):   # batch
    sess.run(training_op, feed_dict={ratings: train_ratings, 
                                     indices: train_indices, 
                                     global_mean: np.mean(train_ratings)})
    if epoch % step == 0:
        train_loss = loss.eval(feed_dict={ratings: train_ratings, 
                                          indices: train_indices,
                                          global_mean: np.mean(train_ratings)})
        test_loss = loss.eval(feed_dict={ratings: test_ratings, 
                                         indices: test_indices, 
                                         global_mean: np.mean(train_ratings)})
        print("Epoch: ", epoch + 1, "\ttrain loss: ", train_loss, "\ttest loss: ", test_loss)

Epoch:  1 	train loss:  1.1099772 	test loss:  1.1103163
Epoch:  6 	train loss:  1.0758973 	test loss:  1.0770359
Epoch:  11 	train loss:  1.0183473 	test loss:  1.0220661
Epoch:  16 	train loss:  0.95823574 	test loss:  0.9663194
Epoch:  21 	train loss:  0.94376427 	test loss:  0.95290095
Epoch:  26 	train loss:  0.9421018 	test loss:  0.95110947
Epoch:  31 	train loss:  0.9393327 	test loss:  0.94824326
Epoch:  36 	train loss:  0.93477565 	test loss:  0.9437178
Epoch:  41 	train loss:  0.9305752 	test loss:  0.9395445
Epoch:  46 	train loss:  0.92739445 	test loss:  0.9363242
Epoch:  51 	train loss:  0.9249203 	test loss:  0.93377566
Epoch:  56 	train loss:  0.92283916 	test loss:  0.9316009
Epoch:  61 	train loss:  0.92104363 	test loss:  0.9297234
Epoch:  66 	train loss:  0.9194193 	test loss:  0.92803425
Epoch:  71 	train loss:  0.91791236 	test loss:  0.9264875
Epoch:  76 	train loss:  0.9165547 	test loss:  0.92510307
Epoch:  81 	train loss:  0.91538274 	test loss:  0.9239051
Ep

### SVD++ - map_fn

In [4]:
train_indices = np.array([train_user_indices, train_item_indices]).T
test_indices = np.array([test_user_indices, test_item_indices]).T

In [5]:
n_users = len(user2id)
n_items = len(item2id)

train_data = np.zeros((n_users, n_items), dtype=np.int32)
for u in data:
    u_items = list(data[u].keys())
    train_data[u] = np.array(u_items + [0] * (n_items - len(u_items)))

def nu_func(user):
    u_items = tf.nn.embedding_lookup(pseudo_data, user)
    zero = tf.constant(0, dtype=tf.int32)
    mask = tf.not_equal(u_items, zero)
    u_items_mask = tf.boolean_mask(u_items, mask)
    return tf.reduce_sum(tf.gather(yj, u_items_mask), axis=0) / tf.sqrt(tf.cast(tf.size(u_items_mask), tf.float32))

In [None]:
start_time = time.time()
np.random.seed(42)
tf.reset_default_graph()
n_factors = 100
n_users = len(user2id)
n_items = len(item2id)
user_list = np.array(list(data.keys()), dtype=np.int32)

n_epochs = 500
lr = 0.01
reg = 0.01
display_step = 1


ratings = tf.placeholder(tf.int32, shape=[None])
indices = tf.placeholder(tf.int32, shape=[None, 2])
pseudo_data = tf.placeholder(tf.int32, shape=[n_users, n_items])  ###### max_length
global_mean = tf.placeholder(tf.float32, shape=[])

bu = tf.Variable(tf.zeros([n_users]))
bi = tf.Variable(tf.zeros([n_items]))
pu = tf.Variable(tf.random_normal([n_users, n_factors], 0.0, 0.01))
qi = tf.Variable(tf.random_normal([n_items, n_factors], 0.0, 0.01))
yj = tf.Variable(tf.random_normal([n_items, n_factors], 0.0, 0.01))
nu = tf.map_fn(nu_func, user_list, dtype=tf.float32)

pn = pu + nu
dot = tf.matmul(pn, qi, transpose_b=True)   ### + nu
bias_user_index = tf.gather(indices, 0, axis=1)
bias_item_index = tf.gather(indices, 1, axis=1)
bias_user = tf.nn.embedding_lookup(bu, bias_user_index)
bias_item = tf.nn.embedding_lookup(bi, bias_item_index)
pred = global_mean + bias_user + bias_item + tf.gather_nd(dot, indices)
loss = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(tf.cast(ratings, tf.float32), pred))))

reg_pu = tf.contrib.layers.l2_regularizer(reg)(pu)
reg_qi = tf.contrib.layers.l2_regularizer(reg)(qi)
reg_bu = tf.contrib.layers.l2_regularizer(reg)(bu)
reg_bi = tf.contrib.layers.l2_regularizer(reg)(bi)
reg_yj = tf.contrib.layers.l2_regularizer(reg)(yj)
total_loss = tf.add_n([loss, reg_pu, reg_qi, reg_bu, reg_bi, reg_yj])
# optimizer = tf.train.GradientDescentOptimizer(lr)
optimizer = tf.train.AdamOptimizer(lr)
training_op = optimizer.minimize(total_loss)
init = tf.global_variables_initializer()

sess = tf.InteractiveSession()
sess.run(init)
print("before time: ", time.time() - start_time)


for epoch in range(n_epochs):   # batch
    t0 = time.time()
    sess.run(training_op, feed_dict={ratings: train_ratings, 
                                     indices: train_indices, 
                                     global_mean: np.mean(train_ratings), 
                                     pseudo_data: train_data})
    print("Epoch: ", epoch + 1, "training time: ", time.time() - t0)
    
    if epoch % display_step == 0:
        t1 = time.time()
        train_loss = loss.eval(feed_dict={ratings: train_ratings, 
                                          indices: train_indices,
                                          global_mean: np.mean(train_ratings), 
                                          pseudo_data: train_data})
        test_loss = loss.eval(feed_dict={ratings: test_ratings, 
                                         indices: test_indices, 
                                         global_mean: np.mean(train_ratings), 
                                         pseudo_data: train_data})
        print("evaluate time: ", time.time() - t1)
        print("Epoch: ", epoch + 1, "\ttrain loss: ", train_loss, "\ttest loss: ", test_loss)
        print()

before time:  6.2889463901519775
Epoch:  1 training time:  139.17226481437683
evaluate time:  0.8897066116333008
Epoch:  1 	train loss:  1.1100589 	test loss:  1.110304

Epoch:  2 training time:  150.10314011573792
evaluate time:  0.8937220573425293
Epoch:  2 	train loss:  1.1059463 	test loss:  1.1059988

Epoch:  3 training time:  147.465482711792
evaluate time:  0.991757869720459
Epoch:  3 	train loss:  1.1031151 	test loss:  1.1031557

Epoch:  4 training time:  150.2542712688446
evaluate time:  1.0497841835021973
Epoch:  4 	train loss:  1.1011149 	test loss:  1.1011428

Epoch:  5 training time:  148.20391535758972
evaluate time:  0.9735527038574219
Epoch:  5 	train loss:  1.0995452 	test loss:  1.0995774

Epoch:  6 training time:  150.10250663757324
evaluate time:  0.9864838123321533
Epoch:  6 	train loss:  1.0982659 	test loss:  1.0983186

Epoch:  7 training time:  149.47698211669922
evaluate time:  0.880845308303833
Epoch:  7 	train loss:  1.0972111 	test loss:  1.0973097

Epoch: 

### batch

In [None]:
start_time = time.time()
np.random.seed(42)
tf.reset_default_graph()
n_factors = 100
n_users = len(user2id)
n_items = len(item2id)
user_list = np.array(list(data.keys()), dtype=np.int32)
# user_list = np.arange(n_users, dtype=np.int32)

n_epochs = 500
lr = 0.001
reg = 0.01
display_step = 1


ratings = tf.placeholder(tf.int32, shape=[None])
indices = tf.placeholder(tf.int32, shape=[None, 2])
pseudo_data = tf.placeholder(tf.int32, shape=[n_users, n_items])  ###### max_length
global_mean = tf.placeholder(tf.float32, shape=[])

bu = tf.Variable(tf.zeros([n_users]))
bi = tf.Variable(tf.zeros([n_items]))
pu = tf.Variable(tf.random_normal([n_users, n_factors], 0.0, 0.01))
qi = tf.Variable(tf.random_normal([n_items, n_factors], 0.0, 0.01))
yj = tf.Variable(tf.random_normal([n_items, n_factors], 0.0, 0.01))
nu = tf.map_fn(nu_func, user_list, dtype=tf.float32)

pn = pu + nu
dot = tf.matmul(pn, qi, transpose_b=True)   ### + nu
bias_user_index = tf.gather(indices, 0, axis=1)
bias_item_index = tf.gather(indices, 1, axis=1)
bias_user = tf.nn.embedding_lookup(bu, bias_user_index)
bias_item = tf.nn.embedding_lookup(bi, bias_item_index)
pred = global_mean + bias_user + bias_item + tf.gather_nd(dot, indices)
loss = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(tf.cast(ratings, tf.float32), pred))))

reg_pu = tf.contrib.layers.l2_regularizer(reg)(pu)
reg_qi = tf.contrib.layers.l2_regularizer(reg)(qi)
reg_bu = tf.contrib.layers.l2_regularizer(reg)(bu)
reg_bi = tf.contrib.layers.l2_regularizer(reg)(bi)
reg_yj = tf.contrib.layers.l2_regularizer(reg)(yj)
total_loss = tf.add_n([loss, reg_pu, reg_qi, reg_bu, reg_bi, reg_yj])
# optimizer = tf.train.GradientDescentOptimizer(lr)
optimizer = tf.train.AdamOptimizer(lr)
training_op = optimizer.minimize(total_loss)
init = tf.global_variables_initializer()

sess = tf.InteractiveSession()
sess.run(init)
print("before time: ", time.time() - start_time)

batch_size = 10000
for epoch in range(n_epochs):   # batch
    
    t0 = time.time()
    n_batches = len(train_ratings) // batch_size
    for i in range(n_batches):
        end = min((i+1) * batch_size, len(train_ratings))
        train_batch_ratings = train_ratings[i * batch_size : end]
        train_batch_indices = train_indices[i * batch_size : end]
        sess.run(training_op, feed_dict={ratings: train_batch_ratings, 
                                         indices: train_batch_indices, 
                                         global_mean: np.mean(train_ratings), 
                                         pseudo_data: train_data})
        
        print("nu: ", pu.eval()[:3, :3])

    print("Epoch: ", epoch + 1, "training time: ", time.time() - t0)
    
    if epoch % display_step == 0:
        t1 = time.time()
        train_loss = loss.eval(feed_dict={ratings: train_ratings, 
                                          indices: train_indices,
                                          global_mean: np.mean(train_ratings), 
                                          pseudo_data: train_data})
        test_loss = loss.eval(feed_dict={ratings: test_ratings, 
                                         indices: test_indices, 
                                         global_mean: np.mean(train_ratings), 
                                         pseudo_data: train_data})
        print("evaluate time: ", time.time() - t1)
        print("Epoch: ", epoch + 1, "\ttrain loss: ", train_loss, "\ttest loss: ", test_loss)
        print()

before time:  5.525773286819458
nu:  [[-0.0057214  -0.00065996 -0.00517013]
 [ 0.00721052  0.00029257  0.00556136]
 [ 0.007543    0.00088455 -0.0076777 ]]
nu:  [[-4.7351066e-03  2.7604483e-04 -4.1846763e-03]
 [ 6.2189465e-03  5.3497328e-04  4.5721321e-03]
 [ 6.5523842e-03  1.7850427e-05 -6.6848118e-03]]
nu:  [[-0.00376259  0.00097399 -0.00321155]
 [ 0.00523736  0.00028469  0.00359729]
 [ 0.00557088 -0.0006008  -0.00570177]]
nu:  [[-2.8149788e-03  1.1397188e-03 -2.2780269e-03]
 [ 4.2720060e-03 -9.1242313e-05  2.6469096e-03]
 [ 4.6002800e-03 -1.0864055e-03 -4.7363606e-03]]
nu:  [[-0.00191121  0.00088591 -0.00138573]
 [ 0.00332929 -0.00032933  0.00173388]
 [ 0.00365379 -0.00121142 -0.00379023]]
nu:  [[-0.00105588  0.00048513 -0.00055325]
 [ 0.00241964 -0.00034645  0.00087357]
 [ 0.00273687 -0.00107057 -0.00287164]]
nu:  [[-2.5976484e-04  6.5703556e-05  2.0307174e-04]
 [ 1.5534930e-03 -1.9178115e-04  8.3973166e-05]
 [ 1.8597220e-03 -7.6582358e-04 -1.9909653e-03]]
nu:  [[ 4.5194512e-04 -3.2

### SVD++ - list()

In [9]:
train_indices = np.array([train_user_indices, train_item_indices]).T
test_indices = np.array([test_user_indices, test_item_indices]).T

In [6]:
start_time = time.time()
np.random.seed(42)
tf.reset_default_graph()
n_factors = 100
n_users = len(user2id)
n_items = len(item2id)
n_epochs = 500
lr = 0.01
reg = 1e-4
display_step = 5

global_mean = tf.placeholder(tf.float32, shape=[])
bu = tf.Variable(tf.zeros([n_users]))
bi = tf.Variable(tf.zeros([n_items]))
pu = tf.Variable(tf.random_normal([n_users, n_factors], 0.0, 0.01))
qi = tf.Variable(tf.random_normal([n_items, n_factors], 0.0, 0.01))
yj = tf.Variable(tf.random_normal([n_items, n_factors], 0.0, 0.01))  ####### yj
# nu_sqrt = np.sqrt(len(data[u]))
# Iu = [tf.reduce_sum(tf.gather(yj, list(data[u].keys())), axis=0) / np.sqrt(len(data[u])) for u in data.keys()]
# nu = tf.convert_to_tensor(Iu, dtype=tf.float32)
nu = [tf.reduce_sum(tf.gather(yj, list(data[u].keys())), axis=0) / np.sqrt(len(data[u])) for u in data.keys()]

ratings = tf.placeholder(tf.int32, shape=[None])
indices = tf.placeholder(tf.int32, shape=[None, 2])

pn = pu + nu
dot = tf.matmul(pn, qi, transpose_b=True)   ### + nu
bias_user_index = tf.gather(indices, 0, axis=1)
bias_item_index = tf.gather(indices, 1, axis=1)
bias_user = tf.nn.embedding_lookup(bu, bias_user_index)
bias_item = tf.nn.embedding_lookup(bi, bias_item_index)
pred = global_mean + bias_user + bias_item + tf.gather_nd(dot, indices)
loss = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(tf.cast(ratings, tf.float32), pred))))


# loss = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(tf.cast(ratings, tf.float32), 
#                                                   tf.gather_nd(dot, indices)))))


reg_pu = tf.contrib.layers.l2_regularizer(reg)(pu)
reg_qi = tf.contrib.layers.l2_regularizer(reg)(qi)
reg_bu = tf.contrib.layers.l2_regularizer(reg)(bu)
reg_bi = tf.contrib.layers.l2_regularizer(reg)(bi)
reg_yj = tf.contrib.layers.l2_regularizer(reg)(yj)
total_loss = tf.add_n([loss, reg_pu, reg_qi, reg_bu, reg_bi, reg_yj])
# optimizer = tf.train.GradientDescentOptimizer(lr)
optimizer = tf.train.AdamOptimizer(lr)
training_op = optimizer.minimize(total_loss)
init = tf.global_variables_initializer()

sess = tf.InteractiveSession()
sess.run(init)
print("before time: ", time.time() - start_time)

for epoch in range(n_epochs):   # batch
    sess.run(training_op, feed_dict={ratings: train_ratings, 
                                     indices: train_indices, 
                                     global_mean: np.mean(train_ratings)})
    
    if epoch % display_step == 0:
        train_loss = loss.eval(feed_dict={ratings: train_ratings, 
                                          indices: train_indices,
                                          global_mean: np.mean(train_ratings)})
        test_loss = loss.eval(feed_dict={ratings: test_ratings, 
                                         indices: test_indices, 
                                         global_mean: np.mean(train_ratings)})
        print("Epoch: ", epoch + 1, "\ttrain loss: ", train_loss, "\ttest loss: ", test_loss)

before time:  208.99212789535522
Epoch:  1 	train loss:  1.1095084 	test loss:  1.1097265
Epoch:  6 	train loss:  0.9734519 	test loss:  0.98090374
Epoch:  11 	train loss:  0.9440581 	test loss:  0.95206994
Epoch:  16 	train loss:  0.92387855 	test loss:  0.93359923
Epoch:  21 	train loss:  0.90651083 	test loss:  0.91885835
Epoch:  26 	train loss:  0.8933545 	test loss:  0.907738
Epoch:  31 	train loss:  0.88307565 	test loss:  0.8992146
Epoch:  36 	train loss:  0.8759572 	test loss:  0.8935039
Epoch:  41 	train loss:  0.8710839 	test loss:  0.88962024
Epoch:  46 	train loss:  0.8676232 	test loss:  0.88676316
Epoch:  51 	train loss:  0.86540294 	test loss:  0.8850033
Epoch:  56 	train loss:  0.8639244 	test loss:  0.88378817
Epoch:  61 	train loss:  0.8625998 	test loss:  0.88279444
Epoch:  66 	train loss:  0.86126405 	test loss:  0.8819406
Epoch:  71 	train loss:  0.85990965 	test loss:  0.88110894
Epoch:  76 	train loss:  0.858642 	test loss:  0.8803308
Epoch:  81 	train loss:  0.8

In [10]:
start_time = time.time()
np.random.seed(42)
tf.reset_default_graph()
n_factors = 100
n_users = len(user2id)
n_items = len(item2id)
n_epochs = 500
lr = 0.01
reg = 1e-4
display_step = 5

global_mean = tf.placeholder(tf.float32, shape=[])
bu = tf.Variable(tf.zeros([n_users]))
bi = tf.Variable(tf.zeros([n_items]))
pu = tf.Variable(tf.random_normal([n_users, n_factors], 0.0, 0.01))
qi = tf.Variable(tf.random_normal([n_items, n_factors], 0.0, 0.01))
yj = tf.Variable(tf.random_normal([n_items, n_factors], 0.0, 0.01))
# nu = [tf.reduce_sum(tf.gather(yj, list(data[u].keys())), axis=0) / np.sqrt(len(data[u])) for u in data.keys()]

N = [[] for u in range(n_users)]
for u, i, in zip(train_user_indices, train_item_indices):
    N[u].append(i)

sparse = {'indices': [], 'values': []}
for i, user in enumerate(N):
    for j, item in enumerate(user):
        sparse['indices'].append((i, j))
        sparse['values'].append(item)
        
sparse['dense_shape'] = (n_users, n_items)
implicit_feedback = tf.SparseTensor(**sparse)

yjs = tf.nn.embedding_lookup_sparse(yj, implicit_feedback, sp_weights=None, combiner='sqrtn')
nu = tf.gather(yjs, np.arange(n_users))

'''
ci = tf.nn.embedding_lookup(c, train_item_indices)
cs = tf.nn.embedding_lookup_sparse(ci, implicit_feedback, sp_weights=None, combine='sqrtn')
nu = tf.gather(cs, train_item_indices)
'''

ratings = tf.placeholder(tf.int32, shape=[None])
indices = tf.placeholder(tf.int32, shape=[None, 2])

pn = pu + nu
dot = tf.matmul(pn, qi, transpose_b=True)   ### + nu
bias_user_index = tf.gather(indices, 0, axis=1)
bias_item_index = tf.gather(indices, 1, axis=1)
bias_user = tf.nn.embedding_lookup(bu, bias_user_index)
bias_item = tf.nn.embedding_lookup(bi, bias_item_index)
pred = global_mean + bias_user + bias_item + tf.gather_nd(dot, indices)
loss = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(tf.cast(ratings, tf.float32), pred))))

reg_pu = tf.contrib.layers.l2_regularizer(reg)(pu)
reg_qi = tf.contrib.layers.l2_regularizer(reg)(qi)
reg_bu = tf.contrib.layers.l2_regularizer(reg)(bu)
reg_bi = tf.contrib.layers.l2_regularizer(reg)(bi)
reg_yj = tf.contrib.layers.l2_regularizer(reg)(yj)
total_loss = tf.add_n([loss, reg_pu, reg_qi, reg_bu, reg_bi, reg_yj])
# optimizer = tf.train.GradientDescentOptimizer(lr)
optimizer = tf.train.AdamOptimizer(lr)
training_op = optimizer.minimize(total_loss)
init = tf.global_variables_initializer()

sess = tf.InteractiveSession()
sess.run(init)
print("before time: ", time.time() - start_time)

for epoch in range(n_epochs):   # batch
    sess.run(training_op, feed_dict={ratings: train_ratings, 
                                     indices: train_indices, 
                                     global_mean: np.mean(train_ratings)})
    
    if epoch % display_step == 0:
        train_loss = loss.eval(feed_dict={ratings: train_ratings, 
                                          indices: train_indices,
                                          global_mean: np.mean(train_ratings)})
        test_loss = loss.eval(feed_dict={ratings: test_ratings, 
                                         indices: test_indices, 
                                         global_mean: np.mean(train_ratings)})
        print("Epoch: ", epoch + 1, "\ttrain loss: ", train_loss, "\ttest loss: ", test_loss)

before time:  5.503317832946777
Epoch:  1 	train loss:  1.1084875 	test loss:  1.1087164
Epoch:  6 	train loss:  0.9728028 	test loss:  0.9801653
Epoch:  11 	train loss:  0.9436891 	test loss:  0.95172304
Epoch:  16 	train loss:  0.92393833 	test loss:  0.9336424
Epoch:  21 	train loss:  0.906229 	test loss:  0.9189427
Epoch:  26 	train loss:  0.89356774 	test loss:  0.9078903
Epoch:  31 	train loss:  0.8826899 	test loss:  0.89891696
Epoch:  36 	train loss:  0.8748378 	test loss:  0.8927291
Epoch:  41 	train loss:  0.86970496 	test loss:  0.8887451
Epoch:  46 	train loss:  0.8666014 	test loss:  0.8860921
Epoch:  51 	train loss:  0.86459994 	test loss:  0.88449574
Epoch:  56 	train loss:  0.8633052 	test loss:  0.8833991
Epoch:  61 	train loss:  0.8620211 	test loss:  0.8825423
Epoch:  66 	train loss:  0.86076546 	test loss:  0.88172114
Epoch:  71 	train loss:  0.8595821 	test loss:  0.8809589
Epoch:  76 	train loss:  0.858482 	test loss:  0.88021207
Epoch:  81 	train loss:  0.8575063

### batch list [ ]

In [4]:
train_indices = np.array([train_user_indices, train_item_indices]).T
test_indices = np.array([test_user_indices, test_item_indices]).T

In [5]:
start_time = time.time()
np.random.seed(42)
tf.reset_default_graph()
n_factors = 100
n_users = len(user2id)
n_items = len(item2id)
n_epochs = 500
lr = 0.01
reg = 1e-4
display_step = 5

global_mean = tf.placeholder(tf.float32, shape=[])
bu = tf.Variable(tf.zeros([n_users]))
bi = tf.Variable(tf.zeros([n_items]))
pu = tf.Variable(tf.random_normal([n_users, n_factors], 0.0, 0.01))
qi = tf.Variable(tf.random_normal([n_items, n_factors], 0.0, 0.01))
yj = tf.Variable(tf.random_normal([n_items, n_factors], 0.0, 0.01))  ####### yj
# nu_sqrt = np.sqrt(len(data[u]))
# Iu = [tf.reduce_sum(tf.gather(yj, list(data[u].keys())), axis=0) / np.sqrt(len(data[u])) for u in data.keys()]
# nu = tf.convert_to_tensor(Iu, dtype=tf.float32)
nu = [tf.reduce_sum(tf.gather(yj, list(data[u].keys())), axis=0) / np.sqrt(len(data[u])) for u in data.keys()]

ratings = tf.placeholder(tf.int32, shape=[None])
indices = tf.placeholder(tf.int32, shape=[None, 2])

pn = pu + nu
dot = tf.matmul(pn, qi, transpose_b=True)   ### + nu
bias_user_index = tf.gather(indices, 0, axis=1)
bias_item_index = tf.gather(indices, 1, axis=1)
bias_user = tf.nn.embedding_lookup(bu, bias_user_index)
bias_item = tf.nn.embedding_lookup(bi, bias_item_index)
pred = global_mean + bias_user + bias_item + tf.gather_nd(dot, indices)
loss = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(tf.cast(ratings, tf.float32), pred))))


# loss = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(tf.cast(ratings, tf.float32), 
#                                                   tf.gather_nd(dot, indices)))))


reg_pu = tf.contrib.layers.l2_regularizer(reg)(pu)
reg_qi = tf.contrib.layers.l2_regularizer(reg)(qi)
reg_bu = tf.contrib.layers.l2_regularizer(reg)(bu)
reg_bi = tf.contrib.layers.l2_regularizer(reg)(bi)
reg_yj = tf.contrib.layers.l2_regularizer(reg)(yj)
total_loss = tf.add_n([loss, reg_pu, reg_qi, reg_bu, reg_bi, reg_yj])
# optimizer = tf.train.GradientDescentOptimizer(lr)
optimizer = tf.train.AdamOptimizer(lr)
training_op = optimizer.minimize(total_loss)
init = tf.global_variables_initializer()

sess = tf.InteractiveSession()
sess.run(init)
print("before time: ", time.time() - start_time)

batch_size = 10000
for epoch in range(n_epochs):
    n_batches = len(train_ratings) // batch_size
    t0 = time.time()
    for i in range(n_batches):
        end = min((i+1) * batch_size, len(train_ratings))
        train_batch_ratings = train_ratings[i * batch_size: end]
        train_batch_indices = train_indices[i * batch_size: end]
        sess.run(training_op, feed_dict={ratings: train_batch_ratings, 
                                         indices: train_batch_indices, 
                                         global_mean: np.mean(train_ratings)})
        
#        print("nu: ", pu.eval()[:3, :3])

    print("Epoch: ", epoch + 1, "training time: ", time.time() - t0)
    
    if epoch % display_step == 0:
        t1 = time.time()
        train_loss = loss.eval(feed_dict={ratings: train_ratings, 
                                          indices: train_indices,
                                          global_mean: np.mean(train_ratings)})
        test_loss = loss.eval(feed_dict={ratings: test_ratings, 
                                         indices: test_indices, 
                                         global_mean: np.mean(train_ratings)})
        print("evaluate time: ", time.time() - t1)
        print("Epoch: ", epoch + 1, "\ttrain loss: ", train_loss, "\ttest loss: ", test_loss)
        print()

before time:  208.64603281021118
Epoch:  1 training time:  78.99554300308228
evaluate time:  7.826488018035889
Epoch:  1 	train loss:  0.8932651 	test loss:  0.9069787

Epoch:  2 training time:  48.82956886291504
Epoch:  3 training time:  48.218790769577026
Epoch:  4 training time:  48.840534687042236
Epoch:  5 training time:  48.52321267127991
Epoch:  6 training time:  50.79559254646301
evaluate time:  0.4228475093841553
Epoch:  6 	train loss:  0.8731428 	test loss:  0.89099956

Epoch:  7 training time:  47.72296142578125
Epoch:  8 training time:  48.81165027618408
Epoch:  9 training time:  48.78493094444275
Epoch:  10 training time:  50.08138036727905
Epoch:  11 training time:  50.19994759559631
evaluate time:  0.4386272430419922
Epoch:  11 	train loss:  0.8755225 	test loss:  0.89458275

Epoch:  12 training time:  49.42346549034119


KeyboardInterrupt: 

-----

# <center> Self Implementation </center>

In [4]:
def ratings(dataset):
    for user, r in data.items():
        for item, rating in r.items():
            yield user, item, rating

In [5]:
def compute_rmse_slow(data="train"):
    if data == "train":
        user_indices = train_user_indices
        item_indices = train_item_indices
        ratings = train_ratings
    elif data == "test":
        user_indices = test_user_indices
        item_indices = test_item_indices
        ratings = test_ratings
        
    score = 0
    for u, i, rating in zip(user_indices, item_indices, ratings):
        pred = np.dot(pu[u], qi[i])
        score += np.power((rating - pred), 2)
    return np.sqrt(score)

In [5]:
def compute_rmse(mode="train"):
    if mode == "train":
        user_indices = train_user_indices
        item_indices = train_item_indices
        ratings = train_ratings
    elif mode == "test":
        user_indices = test_user_indices
        item_indices = test_item_indices
        ratings = test_ratings
        
    pred = np.dot(pu, qi.T)[user_indices, item_indices]
    score = np.sqrt(np.mean(np.power(pred - ratings, 2)))
    return score

In [6]:
def compute_rmse_bias(mode="train"):
    if mode == "train":
        user_indices = train_user_indices
        item_indices = train_item_indices
        ratings = train_ratings
    elif mode == "test":
        user_indices = test_user_indices
        item_indices = test_item_indices
        ratings = test_ratings
        
    global_mean = np.mean(train_ratings)    
    pred = global_mean + bu[user_indices] + bi[item_indices] + \
           np.dot(pu, qi.T)[user_indices, item_indices]
    score = np.sqrt(np.mean(np.power(pred - ratings, 2)))
    return score

### SVD

In [9]:
%%time
np.random.seed(42)
n_factors = 100
n_users = len(user2id)
n_items = len(item2id)
global_mean = np.mean(train_ratings)

n_epochs = 200
lr = 0.002
reg = 0.1   ############################ reg

bu = np.zeros((n_users))
bi = np.zeros((n_items))
pu = np.random.normal(size=(n_users, n_factors))
qi = np.random.normal(size=(n_items, n_factors))
for epoch in range(n_epochs):
    for u, i, r in ratings(data):
        dot = np.dot(qi[i], pu[u])
        err = r - (global_mean + bu[u] + bi[i] + dot)
        bu[u] += lr * (err - reg * bu[u])
        bi[i] += lr * (err - reg * bi[i])
        qi[i] += lr * (err * pu[u] - reg * qi[i])
        pu[u] += lr * (err * qi[i] - reg * pu[u])
    
    if epoch % 5 == 0:
        print("Epoch: ", epoch + 1, "\ttraining RMSE: ", compute_rmse_bias("train"), 
              "\ttest RMSE: ", compute_rmse_bias("test"))

Epoch:  1 	training RMSE:  2.9362434999652156 	test RMSE:  4.098058099608248
Epoch:  6 	training RMSE:  0.9600320510483358 	test RMSE:  2.1875638551408567
Epoch:  11 	training RMSE:  0.803399926953229 	test RMSE:  1.7524336067426713
Epoch:  16 	training RMSE:  0.7746065010862653 	test RMSE:  1.5165011842067824
Epoch:  21 	training RMSE:  0.7692515864275132 	test RMSE:  1.3652587199037947


KeyboardInterrupt: 

### SVD++

In [6]:
def compute_rmse_implicit(mode="train"):
    if mode == "train":
        user_indices = train_user_indices
        item_indices = train_item_indices
        ratings = train_ratings
    elif mode == "test":
        user_indices = test_user_indices
        item_indices = test_item_indices
        ratings = test_ratings
        
    global_mean = np.mean(train_ratings)
#    nu_sqrt = np.sqrt(len(data[u]))
    nu = np.array([np.sum(yj[list(data[u].keys())], axis=0) / np.sqrt(len(data[u])) for u in data.keys()])
    pred = global_mean + bu[user_indices] + bi[item_indices] + \
           np.dot(pu + nu, qi.T)[user_indices, item_indices]
    score = np.sqrt(np.mean(np.power(pred - ratings, 2)))
    return score

In [6]:
%%time
np.random.seed(42)
n_factors = 100
n_users = len(user2id)
n_items = len(item2id)
global_mean = np.mean(train_ratings)

n_epochs = 200
lr = 0.002
reg = 0.1

bu = np.zeros((n_users))
bi = np.zeros((n_items))
pu = np.random.normal(size=(n_users, n_factors))
qi = np.random.normal(size=(n_items, n_factors))
yj = np.random.normal(size=(n_items, n_factors))

for epoch in range(n_epochs):
    t0 = time.time()
    for u, i, r in ratings(data):
        t1 = time.time()
        nu_sqrt = np.sqrt(len(data[u]))
        nu = np.sum(yj[list(data[u].keys())], axis=0) / nu_sqrt
        dot = np.dot(qi[i], pu[u] + nu)
        err = r - (global_mean + bu[u] + bi[i] + dot)
        bu[u] += lr * (err - reg * bu[u])
        bi[i] += lr * (err - reg * bi[i])
        qi[i] += lr * (err * (pu[u] + nu) - reg * qi[i])
        pu[u] += lr * (err * qi[i] - reg * pu[u])

        for j in data[u]:
            yj[j] += lr * (err * qi[i] / nu_sqrt - reg * yj[j])
    print("Epoch: ", epoch + 1, "\ttime: ", time.time() - t0)
    
    if epoch % 1 == 0:
        print("Epoch: ", epoch + 1, "\ttraining RMSE: ", compute_rmse_implicit("train"), 
              "\ttest RMSE: ", compute_rmse_implicit("test"))

Epoch:  1 	time:  1610.9779999256134
Epoch:  1 	training RMSE:  3.3552425198797797 	test RMSE:  4.407144461077849
Epoch:  2 	time:  1644.4771127700806
Epoch:  2 	training RMSE:  2.0716387279736987 	test RMSE:  3.329736071044476
Epoch:  3 	time:  1653.775175333023
Epoch:  3 	training RMSE:  1.5287149341135262 	test RMSE:  2.8417261609841877
Epoch:  4 	time:  1641.5009999275208
Epoch:  4 	training RMSE:  1.244712407614461 	test RMSE:  2.549848920069196


KeyboardInterrupt: 

In [25]:
%%time
np.random.seed(42)
n_factors = 100
n_users = len(user2id)
n_items = len(item2id)
global_mean = np.mean(train_ratings)

n_epochs = 200
lr = 0.002
reg = 0.1

bu = np.zeros((n_users))
bi = np.zeros((n_items))
pu = np.random.normal(size=(n_users, n_factors))
qi = np.random.normal(size=(n_items, n_factors))
yj = np.random.normal(size=(n_items, n_factors))

for epoch in range(n_epochs):
    t0 = time.time()
    for u, i, r in ratings(data):
        u_items = list(data[u].keys())
        nu_sqrt = np.sqrt(len(u_items))
        nu = np.sum(yj[u_items], axis=0) / nu_sqrt
        dot = np.dot(qi[i], pu[u] + nu)
        err = r - (global_mean + bu[u] + bi[i] + dot)
        bu[u] += lr * (err - reg * bu[u])
        bi[i] += lr * (err - reg * bi[i])
        qi[i] += lr * (err * (pu[u] + nu) - reg * qi[i])
        pu[u] += lr * (err * qi[i] - reg * pu[u])
        yj[u_items] += lr * (err * qi[u_items] / nu_sqrt - reg * yj[u_items])
    print("Epoch: ", epoch + 1, "\ttime: ", time.time() - t0)
    
    if epoch % 1 == 0:
        t1 = time.time()
        print("Epoch: ", epoch + 1, "\ttraining RMSE: ", compute_rmse_implicit("train"), 
              "\ttest RMSE: ", compute_rmse_implicit("test"))
        print("evaluate time: ", time.time() - t1)

Epoch:  1 	time:  430.5702919960022
Epoch:  1 	training RMSE:  2.986250850310729 	test RMSE:  4.119221831354795
evaluate time:  1.342958927154541
Epoch:  2 	time:  430.4068841934204
Epoch:  2 	training RMSE:  1.8967664819373093 	test RMSE:  3.2052684515603778
evaluate time:  1.67173171043396
Epoch:  3 	time:  410.6034822463989
Epoch:  3 	training RMSE:  1.4367741172250574 	test RMSE:  2.7772792417419585
evaluate time:  1.5271949768066406
Epoch:  4 	time:  401.0427813529968
Epoch:  4 	training RMSE:  1.1929674473569962 	test RMSE:  2.512029929104096
evaluate time:  1.4937841892242432
Epoch:  5 	time:  403.96313071250916
Epoch:  5 	training RMSE:  1.0497991926666308 	test RMSE:  2.3239321630364063
evaluate time:  1.539628505706787


KeyboardInterrupt: 