In [None]:
import tensorflow as tf
import numpy as np
from tensorflow.contrib import layers
from data import dataprep

In [None]:
train_dict, test_dict = dataprep.mf_train_test()

In [None]:
n_user = train_dict['user_id'].max() + 1
n_movie = train_dict['movie_id'].max() + 1
n_dim = 20
reg_param = 0.01
learning_rate = 0.01

In [None]:
trainBatch = tf.train.shuffle_batch(train_dict, batch_size=10000, capacity=80000, min_after_dequeue=20000,
                                    num_threads=2,
                                    enqueue_many=True)
testBatch = tf.train.shuffle_batch(test_dict, batch_size=test_dict['user_id'].shape[0],
                                   capacity=test_dict['user_id'].shape[0], min_after_dequeue=0, num_threads=1,
                                   enqueue_many=True)
def get_train_data(): return trainBatch['user_id'], trainBatch['movie_id'], trainBatch['rating']
def get_test_data(): return testBatch['user_id'], testBatch['movie_id'], testBatch['rating']
is_train = tf.placeholder(dtype=tf.bool)
users, movies, ratings = tf.cond(is_train, get_train_data, get_test_data)

In [None]:
with tf.variable_scope("embedding"):
    user_weight = tf.get_variable("user_w"
                                  , shape=[n_user, n_dim]
                                  , dtype=tf.float32
                                  , initializer=layers.xavier_initializer())
    user_bias = tf.get_variable("user_b"
                                , shape=[n_user]
                                , dtype=tf.float32
                                , initializer=tf.zeros_initializer)
    movie_weight = tf.get_variable("movie_w"
                                   , shape=[n_movie, n_dim]
                                   , dtype=tf.float32
                                   , initializer=layers.xavier_initializer())
    movie_bias = tf.get_variable("movie_b"
                                 , shape=[n_movie]
                                 , dtype=tf.float32
                                 , initializer=tf.zeros_initializer)
    for v in tf.trainable_variables():
        tf.summary.histogram(name=v.name.replace(":0",""), values=v)

In [None]:
with tf.name_scope("inference"):
    user_emb = tf.nn.embedding_lookup(user_weight, users)
    u_b = tf.nn.embedding_lookup(user_bias, users)
    movie_emb = tf.nn.embedding_lookup(movie_weight, movies)
    m_b = tf.nn.embedding_lookup(movie_bias, movies)
    pred = tf.reduce_sum(tf.multiply(user_emb, movie_emb), 1) + u_b + m_b

with tf.name_scope("loss"):
    reg_loss = layers.apply_regularization(layers.l2_regularizer(scale=reg_param),
                                           weights_list=[user_weight, movie_weight])
    loss = tf.nn.l2_loss(pred - ratings) + reg_loss
    tf.summary.scalar('l2loss', loss)
    train_ops = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
    rmse = tf.sqrt(tf.reduce_mean(tf.pow(pred - ratings, 2)))
    tf.summary.scalar('rmse', rmse)
    merged = tf.summary.merge_all()

In [None]:
with tf.Session() as sess:
    summaries_dir = '_summary/mf'
    train_writer = tf.summary.FileWriter(summaries_dir + '/train',
                                         sess.graph)
    test_writer = tf.summary.FileWriter(summaries_dir + '/test',
                                        sess.graph)
    tf.global_variables_initializer().run()
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)
    for i in range(200):
        _, loss_val = sess.run([train_ops, loss], feed_dict={is_train: True})
        if i % 10 == 0:
            rmse_train, train_summary = sess.run([rmse, merged], feed_dict={is_train: True})
            rmse_test, test_summary = sess.run([rmse, merged], feed_dict={is_train: False})
            print("train rmse: %.3f , test rmse %.3f" % (rmse_train, rmse_test))
            train_writer.add_summary(train_summary, i)
            test_writer.add_summary(test_summary, i)
    coord.request_stop()
    coord.join(threads)