In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.contrib import layers
from data import dataprep

In [2]:
train_dict, test_dict = dataprep.mf_train_test()

In [3]:
n_user = train_dict['user_id'].max() + 1
n_movie = train_dict['movie_id'].max() + 1
n_dim = 20
reg_param = 0.01
learning_rate = 0.01

In [4]:
# users = tf.placeholder(shape=[None], dtype=tf.int32)
# movies = tf.placeholder(shape=[None], dtype=tf.int32)
# ratings = tf.placeholder(shape=[None], dtype=tf.float32)

In [5]:
trainBatch = tf.train.shuffle_batch(train_dict, batch_size=10000, capacity=80000, min_after_dequeue=20000,
                                    num_threads=2,
                                    enqueue_many=True)
testBatch = tf.train.shuffle_batch(test_dict, batch_size=test_dict['user_id'].shape[0],
                                   capacity=test_dict['user_id'].shape[0], min_after_dequeue=0, num_threads=1,
                                   enqueue_many=True)
def get_train_data(): return trainBatch['user_id'], trainBatch['movie_id'], trainBatch['rating']
def get_test_data(): return testBatch['user_id'], testBatch['movie_id'], testBatch['rating']
is_train = tf.placeholder(dtype=tf.bool)
users, movies, ratings = tf.cond(is_train, get_train_data, get_test_data)

In [6]:
with tf.variable_scope("embedding"):
    user_weight = tf.get_variable("user_w"
                                  , shape=[n_user, n_dim]
                                  , dtype=tf.float32
                                  , initializer=layers.xavier_initializer())
    user_bias = tf.get_variable("user_b"
                                , shape=[n_user]
                                , dtype=tf.float32
                                , initializer=tf.zeros_initializer)
    movie_weight = tf.get_variable("movie_w"
                                   , shape=[n_movie, n_dim]
                                   , dtype=tf.float32
                                   , initializer=layers.xavier_initializer())
    movie_bias = tf.get_variable("movie_b"
                                 , shape=[n_movie]
                                 , dtype=tf.float32
                                 , initializer=tf.zeros_initializer)

In [7]:
with tf.name_scope("inference"):
    user_emb = tf.nn.embedding_lookup(user_weight, users)
    u_b = tf.nn.embedding_lookup(user_bias, users)
    movie_emb = tf.nn.embedding_lookup(movie_weight, movies)
    m_b = tf.nn.embedding_lookup(movie_bias, movies)
    pred = tf.reduce_sum(tf.multiply(user_emb, movie_emb), 1) + u_b + m_b

with tf.name_scope("loss"):
    reg_loss = layers.apply_regularization(layers.l2_regularizer(scale=reg_param),
                                           weights_list=[user_weight, movie_weight])
    loss = tf.nn.l2_loss(pred - ratings) + reg_loss
    tf.summary.scalar('l2loss', loss)
    train_ops = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
    rmse = tf.sqrt(tf.reduce_mean(tf.pow(pred - ratings, 2)))
    tf.summary.scalar('rmse', rmse)
    merged = tf.summary.merge_all()

In [8]:
# with tf.Session() as sess:
#     sess.run(tf.global_variables_initializer())
#     train_input_dict = {users: train_dict['user_id']
#         , movies: train_dict['movie_id']
#         , ratings: train_dict['rating']}
#     test_input_dict = {users: test_dict['user_id']
#         , movies: test_dict['movie_id']
#         , ratings: test_dict['rating']}
#     for i in range(200):
#         _, loss_val = sess.run([train_ops, loss], feed_dict=train_input_dict)
#         if i % 10 == 0:
#             rmse_train = sess.run(rmse, feed_dict=train_input_dict)
#             rmse_test = sess.run(rmse, feed_dict=test_input_dict)
#             print("train rmse: %.3f , test rmse %.3f" % (rmse_train, rmse_test))

In [10]:
with tf.Session() as sess:
    summaries_dir = '_summary/mf'
    train_writer = tf.summary.FileWriter(summaries_dir + '/train',
                                         sess.graph)
    test_writer = tf.summary.FileWriter(summaries_dir + '/test',
                                        sess.graph)
    tf.global_variables_initializer().run()
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)
    for i in range(200):
        _, loss_val = sess.run([train_ops, loss], feed_dict={is_train: True})
        if i % 10 == 0:
            rmse_train, train_summary = sess.run([rmse, merged], feed_dict={is_train: True})
            rmse_test, test_summary = sess.run([rmse, merged], feed_dict={is_train: False})
            print("train rmse: %.3f , test rmse %.3f" % (rmse_train, rmse_test))
            train_writer.add_summary(train_summary, i)
            test_writer.add_summary(test_summary, i)
    coord.request_stop()
    coord.join(threads)

train rmse: 3.679 , test rmse 3.691
train rmse: 3.463 , test rmse 3.469
train rmse: 2.976 , test rmse 2.995
train rmse: 2.091 , test rmse 2.107
train rmse: 1.131 , test rmse 1.173
train rmse: 1.081 , test rmse 1.110
train rmse: 0.949 , test rmse 0.992
train rmse: 0.934 , test rmse 0.978
train rmse: 0.908 , test rmse 0.965
train rmse: 0.902 , test rmse 0.956
train rmse: 0.894 , test rmse 0.954
train rmse: 0.883 , test rmse 0.953
train rmse: 0.883 , test rmse 0.952
train rmse: 0.879 , test rmse 0.950
train rmse: 0.898 , test rmse 0.949
train rmse: 0.876 , test rmse 0.949
train rmse: 0.870 , test rmse 0.948
train rmse: 0.867 , test rmse 0.946
train rmse: 0.856 , test rmse 0.944
train rmse: 0.846 , test rmse 0.944
