### Collaborative Filtering using Keras

In [1]:
import os
#path = "data/ml-20m/"
path = "data/ml-small/"
model_path = path + 'models/'
if not os.path.exists(model_path): os.mkdir(model_path)
batch_size=64

In [2]:
import pandas as pd
ratings = pd.read_csv(path+'ratings.csv')
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


In [3]:
movie_names = pd.read_csv(path+'movies.csv').set_index('movieId')['title'].to_dict()

In [4]:
users = ratings.userId.unique()
movies = ratings.movieId.unique()

In [5]:
userid2idx = {o:i for i,o in enumerate(users)}
movieid2idx = {o:i for i,o in enumerate(movies)}

In [6]:
ratings.movieId = ratings.movieId.apply(lambda x: movieid2idx[x])
ratings.userId = ratings.userId.apply(lambda x: userid2idx[x])

In [7]:
n_users = ratings.userId.nunique()
n_movies = ratings.movieId.nunique()
n_users, n_movies

(671, 9066)

In [8]:
n_factors = 50

In [9]:
import numpy as np
np.random.seed = 42
msk = np.random.rand(len(ratings)) < 0.8
trn = ratings[msk]
val = ratings[~msk]

In [11]:
from keras.layers import Input, Dense, Flatten, Dropout, merge
from keras.layers.embeddings import Embedding
from keras.models import Model
from keras.regularizers import l2

Using TensorFlow backend.


In [12]:
user_in = Input(shape=(1,), dtype='int64', name='user_in')
u = Embedding(n_users, n_factors, input_length=1, embeddings_regularizer=l2(1e-4))(user_in)
movie_in = Input(shape=(1,), dtype='int64', name='movie_in')
m = Embedding(n_movies, n_factors, input_length=1, embeddings_regularizer=l2(1e-4))(movie_in)

In [18]:
from keras.optimizers import Adam

In [28]:
x = merge([u, m], mode ='dot')
x = Flatten()(x)
x = Dropout(0.3)(x)
x = Dense(70, activation='relu')(x)
x = Dropout(0.75)(x)
x = Dense(1)(x)
nn = Model([user_in, movie_in], x)
nn.compile(Adam(0.001), loss='mse')

  if __name__ == '__main__':
  name=name)


In [20]:
nn.fit([trn.userId, trn.movieId], trn.rating, batch_size=64, epochs=8, 
          validation_data=([val.userId, val.movieId], val.rating))

Train on 80079 samples, validate on 19925 samples
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0xc111eb8>

In [21]:
nn.save_weights(model_path+'weights.h5')

In [22]:
nn.load_weights(model_path+'weights.h5')

In [23]:
nn.predict([pd.Series([1,3]), pd.Series([2,3])])

array([[ 3.52201748],
       [ 4.01135063]], dtype=float32)

In [24]:
nn.predict([trn.userId,trn.movieId])

array([[ 2.97101545],
       [ 2.81344771],
       [ 3.42902756],
       ..., 
       [ 3.56772828],
       [ 2.94926572],
       [ 3.57131338]], dtype=float32)

In [25]:
??merge