In [2]:
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.model_selection import train_test_split

pd.options.display.max_columns= None

In [3]:
DATA_FOLDER = Path('data/ml-latest-small')
MOVIES_FILEPATH = DATA_FOLDER / 'movies.csv'
RATINGS_FILEPATH = DATA_FOLDER / 'ratings.csv'

In [4]:
movies = pd.read_csv(MOVIES_FILEPATH)
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [5]:
ratings = pd.read_csv(RATINGS_FILEPATH)
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


#### converting as category

In [6]:
ratings.userId = ratings.userId.astype('category').cat.codes.values 
ratings.movieId = ratings.movieId.astype('category').cat.codes.values

In [7]:
train, test = train_test_split(ratings, test_size=0.2)


In [8]:
train.columns

Index(['userId', 'movieId', 'rating', 'timestamp'], dtype='object')

In [9]:
import tensorflow as tf
from tensorflow import keras
from keras.optimizers import Adam

Using TensorFlow backend.


In [10]:
n_users, n_movies = len(ratings.userId.unique()), len(ratings.movieId.unique()) #taking the count of movies and users
n_latent_factors = 20 # k value

#### Building the layers

In [11]:
movie_input = keras.layers.Input(shape=[1],name='movie')
movie_embedding = keras.layers.Embedding(n_movies + 1, n_latent_factors, name='Movie-Embedding')(movie_input)
movie_vec = keras.layers.Flatten(name='FlattenMovies')(movie_embedding) #taking the embeddings and flattening the input
 
user_input = keras.layers.Input(shape=[1],name='User')
user_vec = keras.layers.Flatten(name='FlattenUsers')(keras.layers.Embedding(n_users + 1, n_latent_factors,name='User-Embedding')(user_input))
 
prod = keras.layers.dot([movie_vec, user_vec], axes=1,name='DotProduct')
model = keras.Model([user_input, movie_input], prod)#model using inputs as user_input and movie_input, output as prod

In [12]:
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae', 'mse'])

In [14]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
movie (InputLayer)              [(None, 1)]          0                                            
__________________________________________________________________________________________________
User (InputLayer)               [(None, 1)]          0                                            
__________________________________________________________________________________________________
Movie-Embedding (Embedding)     (None, 1, 20)        194500      movie[0][0]                      
__________________________________________________________________________________________________
User-Embedding (Embedding)      (None, 1, 20)        12220       User[0][0]                       
______________________________________________________________________________________________

In [15]:
history = model.fit([train.userId, train.movieId], train.rating, epochs=100, verbose=0)

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: Bad argument number for Name: 4, expecting 3
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: Bad argument number for Name: 4, expecting 3


In [16]:
results = model.evaluate((test.userId, test.movieId), test.rating, batch_size=1)

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: Bad argument number for Name: 4, expecting 3
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: Bad argument number for Name: 4, expecting 3


In [17]:
movie_embedding_learnt = model.get_layer(name='Movie-Embedding').get_weights()[0] #taking the weights from the embedding layer. [1]for bias
pd.DataFrame(movie_embedding_learnt).describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
count,9725.0,9725.0,9725.0,9725.0,9725.0,9725.0,9725.0,9725.0,9725.0,9725.0,9725.0,9725.0,9725.0,9725.0,9725.0,9725.0,9725.0,9725.0,9725.0,9725.0
mean,0.274392,-0.27609,0.275836,0.194063,0.198458,-0.277091,-0.278327,-0.270145,0.276235,0.268449,-0.272086,0.253619,0.275843,-0.256721,0.269351,0.201775,-0.265423,0.249132,-0.248116,0.272122
std,0.241268,0.239343,0.230014,0.298006,0.334532,0.23536,0.226697,0.237728,0.241221,0.241803,0.27797,0.276214,0.242011,0.253987,0.235515,0.299639,0.263032,0.286011,0.286714,0.26288
min,-1.202378,-2.332898,-0.942256,-1.545462,-1.84733,-1.5964,-2.058715,-1.906422,-1.140967,-1.500451,-1.978323,-1.181476,-1.416301,-2.104303,-1.29786,-1.587173,-2.037366,-1.30141,-2.115776,-1.248034
25%,0.15074,-0.377625,0.16228,0.03402,-5.4e-05,-0.378714,-0.380843,-0.370774,0.148171,0.139756,-0.394016,0.099921,0.158399,-0.366582,0.15096,0.038916,-0.383201,0.077526,-0.378496,0.130606
50%,0.279183,-0.281434,0.288628,0.241813,0.228097,-0.278843,-0.291117,-0.275171,0.287418,0.282136,-0.27703,0.262735,0.286027,-0.258045,0.283418,0.249638,-0.275844,0.272292,-0.248234,0.278569
75%,0.3815,-0.156782,0.377733,0.354969,0.372803,-0.155897,-0.164733,-0.153203,0.387473,0.380094,-0.102527,0.377035,0.376938,-0.116555,0.376747,0.362331,-0.114427,0.383153,-0.066991,0.38378
max,1.78525,1.17108,1.688953,1.732763,2.443241,1.297406,1.374755,1.197117,2.07415,1.695162,1.544013,2.144409,1.935503,1.153653,2.042787,1.914645,1.366353,2.288879,1.705622,2.119372


In [18]:
user_embedding_learnt = model.get_layer(name='User-Embedding').get_weights()[0]

In [22]:
def recommend(user_id, number_of_movies=5):
  movies = user_embedding_learnt[user_id]@movie_embedding_learnt.T
  mids = np.argpartition(movies, -number_of_movies)[-number_of_movies:]#to get in partitioned order
  return mids

In [23]:
recommend(user_id = 1) #5 movies ids for user_id=1

array([6756, 7841, 1668, 1708, 7647], dtype=int64)