In [1]:
# Import libraries
%matplotlib inline
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn

In [3]:
ratings = pd.read_csv('../data/ml-100k/ratings.csv')

ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [4]:
ratings['userEmbId'] = ratings['userId'] - 1
ratings['movieEmbId'] = ratings['movieId'] - 1
# Set max_userid to the maximum user_id in the ratings
max_userid = ratings['userId'].drop_duplicates().max()
# Set max_movieid to the maximum movie_id in the ratings
max_movieid = ratings['movieId'].drop_duplicates().max()

ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp,userEmbId,movieEmbId
0,1,1,4.0,964982703,0,0
1,1,3,4.0,964981247,0,2
2,1,6,4.0,964982224,0,5
3,1,47,5.0,964983815,0,46
4,1,50,5.0,964982931,0,49


In [6]:
shuffled_ratings = ratings.sample(frac=1., random_state=42)

# Shuffling users
Users = shuffled_ratings['userEmbId'].values
print('Users:', Users, ', shape =', Users.shape)

# Shuffling movies
Movies = shuffled_ratings['movieEmbId'].values
print('Movies:', Movies, ', shape =', Movies.shape)

# Shuffling ratings
Ratings = shuffled_ratings['rating'].values
print('Ratings:', Ratings, ', shape =', Ratings.shape)

Users: [431 287 598 ... 479   5 102] , shape = (100836,)
Movies: [77865   473  4350 ...  6866   980  6710] , shape = (100836,)
Ratings: [4.5 3.  3.  ... 4.  3.  5. ] , shape = (100836,)


In [7]:
import keras
from keras.layers import Input, Embedding, dot, Dense, Reshape
from keras.models import Model, Sequential

Using TensorFlow backend.


In [8]:
user_input = Input(shape=(1,), name='user_input')
x = Embedding(input_dim=max_userid, output_dim=100, input_length=1)(user_input)
user_output = Reshape((100,))(x)

In [9]:
movie_input = Input(shape=(1,), name='movie_input')
y = Embedding(input_dim=max_movieid, output_dim=100, input_length=1)(movie_input)
movie_output = Reshape((100,))(y)

In [10]:
z = dot([Dense(10)(user_output), Dense(10)(movie_output)], axes=1)

In [11]:
model = Model(inputs=[user_input, movie_input], outputs=z)

In [12]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
user_input (InputLayer)         (None, 1)            0                                            
__________________________________________________________________________________________________
movie_input (InputLayer)        (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 1, 100)       61000       user_input[0][0]                 
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, 1, 100)       19360900    movie_input[0][0]                
__________________________________________________________________________________________________
reshape_1 

In [13]:
model.compile(loss='mse', optimizer='adamax')

In [14]:
from keras.callbacks import Callback, EarlyStopping, ModelCheckpoint

In [None]:
callbacks = [EarlyStopping('val_loss', patience=2),
            ModelCheckpoint('movie_recommender_100k_trained.h5', save_best_only=True)]

history=model.fit([Users, Movies], Ratings, epochs=30, validation_split=.1, verbose=2, callbacks=callbacks)

Train on 90752 samples, validate on 10084 samples
Epoch 1/30


In [None]:
# Show the best validation RMSE
min_val_loss, idx = min((val, idx) for (idx, val) in enumerate(history.history['val_loss']))
print('Minimum RMSE at epoch', '{:d}'.format(idx+1), '=', '{:.4f}'.format(math.sqrt(min_val_loss)))