In [42]:
import pandas as pd
import tensorflow as tf
import numpy as np

In [38]:
from tensorflow.keras import layers
from tensorflow.keras import regularizers
from tensorflow.keras import models
from tensorflow.keras import optimizers

In [3]:
path = r'D:\Data\ml-100k'

In [4]:
ratings = pd.read_csv(path+'/u.data', delimiter='\t', header=None, names=['user', 'item', 'rating','timestamp'])
ratings.head()

Unnamed: 0,user,item,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [5]:
movies = pd.read_csv(path+'/u.item',  delimiter='|', encoding='latin-1', header=None,
                    names=['item', 'title', 'date', 'N', 'url', *[f'g{i}' for i in range(19)]])
rating_movie = ratings.merge(movies[['item', 'title']])
rating_movie.head()

Unnamed: 0,user,item,rating,timestamp,title
0,196,242,3,881250949,Kolya (1996)
1,63,242,3,875747190,Kolya (1996)
2,226,242,5,883888671,Kolya (1996)
3,154,242,3,879138235,Kolya (1996)
4,306,242,5,876503793,Kolya (1996)


In [6]:
len(rating_movie)

100000

In [43]:
np.random.seed = 42
msk = np.random.rand(len(rating_movie)) < 0.8
trn = rating_movie[msk]
val = rating_movie[~msk]

In [44]:
n_users = rating_movie.user.nunique()
n_movies = rating_movie.item.nunique()
n_users, n_movies

(943, 1682)

In [45]:
n_factors = 50

### Simple Model

In [57]:
user_in = layers.Input(shape=(1,), dtype='int64', name='user_in')
u = layers.Embedding(n_users, n_factors, input_length=1, activity_regularizer=regularizers.l2(1e-4))(user_in)
movie_in = layers.Input(shape=(1,), dtype='int64', name='movie_in')
m = layers.Embedding(n_movies, n_factors, input_length=1, activity_regularizer=regularizers.l2(1e-4))(movie_in)

In [58]:
x = layers.concatenate([u, m])
x = layers.Flatten()(x)
x = layers.Dropout(rate=0.3)(x)
x = layers.Dense(70, activation='relu')(x)
x = layers.Dropout(rate=0.75)(x)
x = layers.Dense(1)(x)

In [59]:
nn = models.Model([user_in, movie_in], x)
nn.compile(optimizers.Adam(0.001), loss='mse')

In [60]:
nn.fit([trn.user, trn.item], trn.rating, batch_size=64, epochs=8, 
          validation_data=([val.user, val.item], val.rating))

Train on 79999 samples, validate on 20001 samples
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<tensorflow.python.keras.callbacks.History at 0x259c1cb6c18>

In [61]:
nn.save('tf_model_nobias.hd5')

### Model with movie and user bias

In [103]:
def embedding_input(name, n_in, n_out, reg):
    inp = layers.Input(shape=(1,), dtype='int64', name=name)
    return inp, layers.Embedding(n_in, n_out, input_length=1, activity_regularizer=regularizers.l2(reg))(inp)

In [104]:
user_in, u = embedding_input('user_in', n_users, n_factors, 1e-4)
movie_in, m = embedding_input('movie_in', n_movies, n_factors, 1e-4)

In [105]:
def create_bias(inp, n_in):
    x = layers.Embedding(n_in, 1, input_length=1)(inp)
    return layers.Flatten()(x)

In [106]:
ub = create_bias(user_in, n_users)
mb = create_bias(movie_in, n_movies)

In [107]:
x = layers.dot([u, m], axes=-1)
#x = merge([u, m], mode='dot')
x = layers.Flatten()(x)
x = layers.add([x, ub])
x = layers.add([x, mb])
#x = merge([x, ub], mode='sum')
#x = merge([x, mb], mode='sum')
model = models.Model([user_in, movie_in], x)
model.compile(optimizers.Adam(0.001), loss='mse')

In [108]:
model.fit([trn.user, trn.item], trn.rating, batch_size=64, epochs=1, 
          validation_data=([val.user, val.item], val.rating))

Train on 79999 samples, validate on 20001 samples


<tensorflow.python.keras.callbacks.History at 0x25c0b5894a8>

In [109]:
model.optimizer.lr=0.01

In [110]:
model.fit([trn.user, trn.item], trn.rating, batch_size=64, epochs=6, 
          validation_data=([val.user, val.item], val.rating))

Train on 79999 samples, validate on 20001 samples
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<tensorflow.python.keras.callbacks.History at 0x25c0b7db358>

In [111]:
model.optimizer.lr=0.001

In [112]:
model.fit([trn.user, trn.item], trn.rating, batch_size=64, epochs=1, 
          validation_data=([val.user, val.item], val.rating))

Train on 79999 samples, validate on 20001 samples


<tensorflow.python.keras.callbacks.History at 0x25c0b7db7f0>

In [116]:
model.save_weights('tf_model_withbias.weights')


Consider using a TensorFlow optimizer from `tf.train`.
