In [63]:
import pandas as pd
import numpy as np
from keras.layers import Dense, Flatten, Embedding, Input, merge, Dropout
from keras.regularizers import l2
from keras.models import Model
from keras.optimizers import Adam

In [4]:
movies = pd.read_csv("ml-latest-small/movies.csv")

In [3]:
ml.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [7]:
ratings = pd.read_csv("ml-latest-small/ratings.csv")

In [8]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


In [9]:
all_users = ratings.userId.unique()
all_movies = ratings.movieId.unique()

In [11]:
all_users[:10]

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [12]:
all_movies[:10]

array([  31, 1029, 1061, 1129, 1172, 1263, 1287, 1293, 1339, 1343])

In [13]:
user_mapping = {o:i for i,o in enumerate(all_users)}

In [14]:
movie_mapping = {o:i for i,o in enumerate(all_movies)}

In [17]:
ratings['user_idx'] = ratings.apply(lambda x: user_mapping[x['userId']], axis=1)
ratings['movie_idx'] = ratings.apply(lambda x: movie_mapping[x['movieId']], axis=1)

In [25]:
trn_idx = np.random.rand(len(ratings)) < .8
trn = ratings[trn_idx]
val = ratings[~trn_idx]

In [26]:
trn.head()

Unnamed: 0,userId,movieId,rating,timestamp,user_idx,movie_idx
1,1,1029,3.0,1260759179,0,1
3,1,1129,2.0,1260759185,0,3
5,1,1263,2.0,1260759151,0,5
6,1,1287,2.0,1260759187,0,6
7,1,1293,2.0,1260759148,0,7


In [27]:
val.head()

Unnamed: 0,userId,movieId,rating,timestamp,user_idx,movie_idx
0,1,31,2.5,1260759144,0,0
2,1,1061,3.0,1260759182,0,2
4,1,1172,4.0,1260759205,0,4
8,1,1339,3.5,1260759125,0,8
11,1,1405,1.0,1260759203,0,11


In [34]:
n_users = len(all_users)
n_movies = len(all_movies)
n_factors = 50

In [49]:
np.random.seed = 42

In [58]:
user_in = Input(shape=(1,), dtype='int64', name='user_in')
u = Embedding(n_users, n_factors, input_length=1, W_regularizer=l2(1e-4))(user_in)

movie_in = Input(shape=(1,), dtype='int64', name='movie_in')
m = Embedding(n_movies, n_factors, input_length=1, W_regularizer=l2(1e-4))(movie_in)

x = merge([u, m], mode='dot')
x = Flatten()(x)

model = Model([user_in, movie_in], x)
model.compile(Adam(0.0001), loss='mse')
model.fit([trn.user_idx, trn.movie_idx], trn.rating, batch_size=64, nb_epoch=100, 
          validation_data=([val.user_idx, val.movie_idx], val.rating))

Train on 80095 samples, validate on 19909 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100

KeyboardInterrupt: 

In [59]:
model.optimizer.lr = .00001
model.fit([trn.user_idx, trn.movie_idx], trn.rating, batch_size=64, nb_epoch=100, 
          validation_data=([val.user_idx, val.movie_idx], val.rating))

Train on 80095 samples, validate on 19909 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
16896/80095 [=====>........................] - ETA: 1s - loss: 1.8661

KeyboardInterrupt: 

In [45]:
model.compile(Adam(0.0001), loss='mse')
model.fit([trn.user_idx, trn.movie_idx], trn.rating, batch_size=64, nb_epoch=6, 
          validation_data=([val.user_idx, val.movie_idx], val.rating))

Train on 80095 samples, validate on 19909 samples
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<keras.callbacks.History at 0x7f6086dc1d90>

In [46]:
model.compile(Adam(0.0001), loss='mse')
model.fit([trn.user_idx, trn.movie_idx], trn.rating, batch_size=64, nb_epoch=6, 
          validation_data=([val.user_idx, val.movie_idx], val.rating))

Train on 80095 samples, validate on 19909 samples
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<keras.callbacks.History at 0x7f60899f8dd0>

# Neural Net Model

In [73]:
user_input = Input(shape=(1,), dtype='int64', name='user_in')
u = Embedding(n_users, n_factors, input_length=1, W_regularizer=l2(1e-4))(user_input)
movie_input = Input(shape=(1,), dtype='int64', name='movie_in')
m = Embedding(n_movies, n_factors, input_length=1, W_regularizer=l2(1e-4))(movie_input)

In [74]:
x = merge([u, m], mode='concat', concat_axis=1)

In [75]:
x = Flatten()(x)
x = Dropout(0.3)(x)
x = Dense(70, activation='relu')(x)
x = Dropout(0.75)(x)
x = Dense(1)(x)
nn = Model([user_input, movie_input], x)
nn.compile(Adam(0.001), loss='mse')
nn.fit([trn.user_idx, trn.movie_idx], 
       trn.rating, 
       batch_size=64, 
       nb_epoch=3, 
       validation_data=([val.user_idx, val.movie_idx], val.rating))

Train on 80095 samples, validate on 19909 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f607d099290>

In [76]:
nn.fit([trn.user_idx, trn.movie_idx], 
       trn.rating, 
       batch_size=64, 
       nb_epoch=3, 
       validation_data=([val.user_idx, val.movie_idx], val.rating))

Train on 80095 samples, validate on 19909 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f60825f7550>

In [77]:
nn.fit([trn.user_idx, trn.movie_idx], 
       trn.rating, 
       batch_size=64, 
       nb_epoch=10, 
       validation_data=([val.user_idx, val.movie_idx], val.rating))

Train on 80095 samples, validate on 19909 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f607d6ddc10>

In [78]:
nn.compile(Adam(0.0001), loss='mse')
nn.fit([trn.user_idx, trn.movie_idx], 
       trn.rating, 
       batch_size=64, 
       nb_epoch=10, 
       validation_data=([val.user_idx, val.movie_idx], val.rating))

Train on 80095 samples, validate on 19909 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f6083682310>

In [79]:
nn.compile(Adam(0.00001), loss='mse')
nn.fit([trn.user_idx, trn.movie_idx], 
       trn.rating, 
       batch_size=64, 
       nb_epoch=10, 

       validation_data=([val.user_idx, val.movie_idx], val.rating))

Train on 80095 samples, validate on 19909 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f607fd75110>

In [80]:
nn.optimizer.lr = .00005
nn.fit([trn.user_idx, trn.movie_idx], 
       trn.rating, 
       batch_size=64, 
       nb_epoch=10, 
       validation_data=([val.user_idx, val.movie_idx], val.rating))

Train on 80095 samples, validate on 19909 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f607fd75ad0>

In [81]:
nn.optimizer.lr = .0001
nn.fit([trn.user_idx, trn.movie_idx], 
       trn.rating, 
       batch_size=64, 
       nb_epoch=10, 
       validation_data=([val.user_idx, val.movie_idx], val.rating))

Train on 80095 samples, validate on 19909 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f607fe44450>