In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split


In [2]:
NB_USERS = 5000
FEAT_USER = 3
NB_ITEMS = 100
FEAT_ITEM = 6

NB_EPOCHS = 500
LAMBDA_REG = 1e-5
# LAMBDA_REG = 0
# learning_rate = 0.001

In [3]:
# Load data
users = pd.read_csv('data/sushi/sushi3.udata', sep='\t', names=('uid', 'gender', 'age', 'time', 'old_prefecture', 'old_region', 'old_eastwest', 'prefecture', 'region', 'eastwest', 'same'))
items = pd.read_csv('data/sushi/sushi3.idata', sep='\t', names=('iid', 'name', 'style', 'major', 'minor', 'heaviness', 'frequency', 'price', 'popularity'))
R = pd.read_csv('data/sushi/sushi3b.5000.10.score', sep=' ', header=None)
triplets = []
for i, line in enumerate(np.array(R)):
    for j, v in enumerate(line):
        if v != -1:
            triplets.append((i, j, v))
df_ratings = pd.DataFrame(triplets, columns=('user', 'item', 'rating'))
train, test = train_test_split(df_ratings, test_size=0.2, shuffle=True)

In [4]:
# TF
A = tf.constant(np.array(users[['age', 'gender', 'region']]).astype(np.float32))
B = tf.constant(np.array(items[['heaviness', 'frequency', 'price', 'popularity', 'style', 'major']]).astype(np.float32))

W_V = tf.get_variable('W_V', shape=[NB_ITEMS, FEAT_USER], dtype=np.float32, initializer=tf.truncated_normal_initializer(stddev=1))
W_U = tf.get_variable('W_U', shape=[NB_USERS, FEAT_ITEM], dtype=np.float32, initializer=tf.truncated_normal_initializer(stddev=1))
M = tf.get_variable('M', shape=[FEAT_USER, FEAT_ITEM], dtype=np.float32, initializer=tf.truncated_normal_initializer(stddev=1))
user_bias = tf.get_variable("user_bias", shape=[NB_USERS],
                            initializer=tf.truncated_normal_initializer(stddev=1))
item_bias = tf.get_variable("item_bias", shape=[NB_ITEMS],
                            initializer=tf.truncated_normal_initializer(stddev=1))

user_batch = tf.placeholder(tf.int32, shape=[None])
item_batch = tf.placeholder(tf.int32, shape=[None])
rate_batch = tf.placeholder(tf.float32, shape=[None])

weight_items = tf.nn.embedding_lookup(W_V, item_batch)
weight_users = tf.nn.embedding_lookup(W_U, user_batch)

bias_items = tf.nn.embedding_lookup(item_bias, item_batch)
bias_users = tf.nn.embedding_lookup(user_bias, user_batch)

feat_items = tf.nn.embedding_lookup(B, item_batch)
feat_users = tf.nn.embedding_lookup(A, user_batch)

pred = (tf.reduce_sum(tf.multiply(feat_users, weight_items), 1)
        + tf.reduce_sum(tf.multiply(feat_items, weight_users), 1)
        + bias_items
        + bias_users)
# pred = (tf.reduce_sum(tf.multiply(tf.matmul(feat_users, M), feat_items), 1)
#         + bias_items
#         + bias_users)
cost_l2 = tf.losses.mean_squared_error(rate_batch, pred)

l2_user = tf.nn.l2_loss(weight_users)
l2_item = tf.nn.l2_loss(weight_items)
l2_bias_user = tf.nn.l2_loss(bias_users)
l2_bias_item = tf.nn.l2_loss(bias_items)
regularizer = tf.add(l2_user, l2_item)
regularizer = tf.add(regularizer, l2_bias_user)
regularizer = tf.add(regularizer, l2_bias_item)
# regularizer = tf.nn.l2_loss(M)
penalty = tf.constant(LAMBDA_REG, dtype=tf.float32, shape=[])
cost = tf.add(cost_l2, tf.multiply(regularizer, penalty))

In [5]:
global_step = tf.train.get_global_step()
train_op = tf.train.AdamOptimizer(0.1).minimize(cost, global_step=global_step)

init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
with tf.Session() as sess:
    sess.run(init_op)
    for i in range(NB_EPOCHS):
        _, train_pred, train_mse, reg, pen, train_cost = sess.run([train_op, pred, cost_l2, regularizer, penalty, cost], feed_dict={
            user_batch: train['user'],
            item_batch: train['item'],
            rate_batch: train['rating']
        })
        test_pred, test_mse = sess.run([pred, cost_l2], feed_dict={
            user_batch: test['user'],
            item_batch: test['item'],
            rate_batch: test['rating']
        })
        print('train rmse', train_mse ** 0.5, 'test rmse', test_mse ** 0.5)
        # print('reg', reg, 'full cost', train_cost)

train rmse 6.632528382092143 test rmse 5.743397778492466
train rmse 5.671019098106088 test rmse 4.985956879724265
train rmse 4.838722425464532 test rmse 4.368438377781428
train rmse 4.147162347423678 test rmse 3.8839308665101235
train rmse 3.5970648031922163 test rmse 3.5155957030029286
train rmse 3.1793589129302475 test rmse 3.2409100693158055
train rmse 2.875633501663359 test rmse 3.036608335128065
train rmse 2.6620647737344982 test rmse 2.8824899384454508
train rmse 2.514386553414219 test rmse 2.7628161288314064
train rmse 2.4110227538862175 test rmse 2.6660776878650654
train rmse 2.334549098946485 test rmse 2.5844804513951734
train rmse 2.2724199147900728 test rmse 2.5132934949468084
train rmse 2.21677095507286 test rmse 2.449752822396297
train rmse 2.1632177583986927 test rmse 2.391944371581309
train rmse 2.109438633841968 test rmse 2.338069083558
train rmse 2.054124994480725 test rmse 2.2861775520571515
train rmse 1.9963812275787551 test rmse 2.2343649697245365
train rmse 1.93560

train rmse 0.7834564712473342 test rmse 1.192696345017776
train rmse 0.7833662743101483 test rmse 1.192653515928921
train rmse 0.7832789967000569 test rmse 1.1926114349761263
train rmse 0.7831946393930316 test rmse 1.1925710018781142
train rmse 0.7831125944331664 test rmse 1.1925316670049866
train rmse 0.7830327864270761 test rmse 1.192494929960657
train rmse 0.7829554063782449 test rmse 1.1924605410623286
train rmse 0.7828811402217873 test rmse 1.192429050358236
train rmse 0.7828072096594486 test rmse 1.1924010079360075
train rmse 0.7827360134867196 test rmse 1.1923768139437567
train rmse 0.782667476294043 test rmse 1.1923566685715206
train rmse 0.7826001516972274 test rmse 1.1923393722731757
train rmse 0.7825352587084023 test rmse 1.1923252251142922
train rmse 0.7824713125241387 test rmse 1.192311877640887
train rmse 0.782409684633314 test rmse 1.192299029931721
train rmse 0.7823493089701253 test rmse 1.1922853322216063
train rmse 0.782291062037763 test rmse 1.1922717343391265
train 

train rmse 0.7801876474514967 test rmse 1.192403057404533
train rmse 0.7801853555131718 test rmse 1.1924086559347462
train rmse 0.7801830253689727 test rmse 1.1924094057359038
train rmse 0.7801803896237034 test rmse 1.1924081060802643
train rmse 0.7801785942558852 test rmse 1.1924138545464076
train rmse 0.7801764550888355 test rmse 1.1924127048553959
train rmse 0.7801739339201397 test rmse 1.192413804559865
train rmse 0.7801717565406187 test rmse 1.1924181533812384
train rmse 0.7801696555546183 test rmse 1.1924165538166003
train rmse 0.7801677455624354 test rmse 1.192419103121727
train rmse 0.7801659501655199 test rmse 1.19242155244897
train rmse 0.7801637345636309 test rmse 1.1924206027104323
train rmse 0.7801620919581688 test rmse 1.192424001771182
train rmse 0.7801596853439207 test rmse 1.1924247515626902
train rmse 0.780157889928456 test rmse 1.1924242517017372
train rmse 0.7801560563083976 test rmse 1.1924278506959218
train rmse 0.7801543754862249 test rmse 1.1924274508082153
trai

train rmse 0.7800818674822008 test rmse 1.1924774357322363
train rmse 0.7800819056862957 test rmse 1.1924836337168045
train rmse 0.780081829278104 test rmse 1.1924754363755037
train rmse 0.780082020298569 test rmse 1.1924872325317708
train rmse 0.7800816764616985 test rmse 1.192471987477261
train rmse 0.780081829278104 test rmse 1.1924919809515655
train rmse 0.7800814854411493 test rmse 1.1924662392913563
train rmse 0.7800816000534844 test rmse 1.1924994284350547
train rmse 0.7800811416040431 test rmse 1.1924583917232545
train rmse 0.7800814090329166 test rmse 1.1925058262360795
train rmse 0.7800807977667853 test rmse 1.1924576419528987
train rmse 0.7800813708287974 test rmse 1.1924974790982326
train rmse 0.7800805303377022 test rmse 1.1924745366638803
train rmse 0.7800808359709325 test rmse 1.1924759861989394
train rmse 0.7800807213584852 test rmse 1.1924945800786038
train rmse 0.7800803393168724 test rmse 1.1924642399158512
train rmse 0.7800807595626361 test rmse 1.1924962295216706
t