In [14]:
import pandas as pd
from vaiutils import add_nb_path
from keras.layers import Input, Embedding, Dense, Dropout, Flatten
from keras.layers.merge import concatenate
from keras.backend import clear_session
from keras.models import Model
from keras.regularizers import l2

In [3]:
DIR_MAIN = add_nb_path()
DIR_DATA = DIR_MAIN + '/data/MovieLens'

In [4]:
ratings = pd.read_csv(DIR_DATA + '/ratings.csv')
movie_names = pd.read_csv(DIR_DATA + '/movies.csv')

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


In [6]:
users = ratings.userId.unique()
movies = ratings.movieId.unique()

n_users = len(users)
n_movies = len(movies)
n_factors = 50

In [9]:
userid2idx = {o:i for i, o in enumerate(users)}
movieid2idx = {o:i for i, o in enumerate(movies)}

In [10]:
ratings.userId = ratings.userId.apply(lambda x: userid2idx[x])
ratings.movieId = ratings.movieId.apply(lambda x: movieid2idx[x])

In [13]:
msk = np.random.rand(len(ratings)) < 0.8
trn = ratings[msk]
val = ratings[~msk]

In [15]:
def embedding_input(name, n_in, n_out, reg):
    inp = Input([1], dtype='int64', name=name)
    return inp, Embedding(n_in, n_out, input_length=1, embeddings_regularizer=l2(reg))(inp)

In [17]:
clear_session()
user_in, u = embedding_input('user_in', n_users, n_factors, 1e-4)
movie_in, m = embedding_input('movie_in', n_movies, n_factors, 1e-4)
x = concatenate([u, m], axis=1)
x = Flatten()(x)
x = Dropout(0.3)(x)
x = Dense(70, activation='relu')(x)
x = Dropout(0.75)(x)
x = Dense(1)(x)
model = Model([user_in, movie_in], x)
model.compile('adam', loss='mse')

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
user_in (InputLayer)             (None, 1)             0                                            
____________________________________________________________________________________________________
movie_in (InputLayer)            (None, 1)             0                                            
____________________________________________________________________________________________________
embedding_1 (Embedding)          (None, 1, 50)         33550       user_in[0][0]                    
____________________________________________________________________________________________________
embedding_2 (Embedding)          (None, 1, 50)         453300      movie_in[0][0]                   
___________________________________________________________________________________________

In [18]:
def fit_model(epochs=1, lr=1e-3):
    model.optimizer.lr = lr
    model.fit([trn.userId, trn.movieId], trn.rating, batch_size=64, epochs=epochs, validation_data=([val.userId, val.movieId], val.rating))

In [19]:
fit_model(8)

Train on 80063 samples, validate on 19941 samples
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
