# Movie recommender

In [10]:
#Importing libraries
import pandas as pd
import numpy as np

In [2]:
ratings = pd.read_csv('ratings.csv')

In [3]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


In [19]:
users = ratings.userId.unique()
movies = ratings.movieId.unique()

#count of users, movies
n_users = ratings.userId.nunique()
n_movies = ratings.movieId.nunique()
'users =', n_users, 'movies =',  n_movies

('users =', 671, 'movies =', 9066)

In [5]:
#preparing data for embedding by indexing userids, moviesids
userid2idx = { o:i for i,o in enumerate(users) }
movieid2idx = {o:i for i,o in enumerate(movies) }

Updating users and movies column with continous indexes. This is the preparation for embedding. 

In [6]:
#embedding data
ratings.userId = ratings.userId.apply(lambda x:userid2idx[x])
ratings.movieId = ratings.movieId.apply(lambda x:movieid2idx[x])


In [7]:
#after data preprocessing necessary for embedding layers
#here original data for usedid and movied are replaced with continous integers 
#Note that 'rating' is not preprocesses and 'timestamp' is of little importance
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,0,0,2.5,1260759144
1,0,1,3.0,1260759179
2,0,2,3.0,1260759182
3,0,3,2.0,1260759185
4,0,4,4.0,1260759205


In [21]:
#defining latent factor for embedding layers
n_factors = 50
np.random.seed = 42

In [23]:
#splitting the data
msk = np.random.rand(len(ratings)) < 0.8
trn = ratings[msk]
val = ratings[~msk]

In [44]:
#creating input layer
from keras.models import Model
from keras.layers import Dense,Input,Embedding,Dropout, merge, Flatten
from keras.regularizers import l2
from keras.optimizers import Adam

In [39]:
#Input layer
user_in = Input(shape=(1,), dtype='int64',name='user_in')
movie_in = Input(shape=(1,), dtype='int64', name='movies_in')

In [40]:
#embedding layer
u = Embedding(n_users, n_factors, input_length=1, W_regularizer=l2(1e-4))(user_in)
m = Embedding(n_movies, n_factors, input_length=1, W_regularizer=l2(1e-4))(movie_in)

  
  This is separate from the ipykernel package so we can avoid doing imports until


In [42]:
x = merge([u,m], mode='concat')
x = Flatten()(x)
x = Dropout(0.3)(x)
x = Dense(70,activation='relu')(x)
x = Dropout(0.7)(x)
x = Dense(1)(x)
nn = Model([user_in,movie_in],x)


  """Entry point for launching an IPython kernel.
  name=name)


In [45]:
nn.compile(optimizer=Adam(0.01), loss='mse', metrics=['accuracy'])


In [49]:
nn.fit( [trn.userId, trn.movieId], trn.rating,
        epochs=10,
        batch_size=64,
        validation_data=([val.userId,val.movieId],val.rating)
      )

Train on 79894 samples, validate on 20110 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x207fe3d4710>

In [51]:
nn.compile(optimizer=Adam(0.001), loss='mse', metrics=['accuracy'])


In [52]:
nn.fit( [trn.userId, trn.movieId], trn.rating,
        epochs=10,
        batch_size=64,
        validation_data=([val.userId,val.movieId],val.rating)
      )

Train on 79894 samples, validate on 20110 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x207ba651550>

In [54]:
nn.save_weights('mov_rec.h5')

In [55]:
#Lets predict for user #5 and movie #10
nn.predict([np.array([5]), np.array([10]) ])

array([[ 2.99065065]], dtype=float32)