In [1]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Embedding, Flatten, Concatenate
from tensorflow.keras.optimizers import Adam, SGD

from sklearn.utils import shuffle

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt



  return f(*args, **kwds)
  return f(*args, **kwds)


In [2]:
df = pd.read_csv("ml-20m/ratings.csv")

In [3]:
df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,2,3.5,1112486027
1,1,29,3.5,1112484676
2,1,32,3.5,1112484819
3,1,47,3.5,1112484727
4,1,50,3.5,1112484580


In [4]:
df.tail()

Unnamed: 0,userId,movieId,rating,timestamp
20000258,138493,68954,4.5,1258126920
20000259,138493,69526,4.5,1259865108
20000260,138493,69644,3.0,1260209457
20000261,138493,70286,5.0,1258126944
20000262,138493,71619,2.5,1255811136


## VVI

We need to create our own userId & movieId ids as the IDs provided might not be consecutive. 
That causes issues when we use the embedding layer. 
E.g. if we have 3 ids say, 1, 2, 20000 . Then we have to make embedding layer of (20000+1) size as they are indexed

In [5]:
df.userId = pd.Categorical(df.userId)
df['new_user_id'] = df.userId.cat.codes

In [6]:
df.movieId = pd.Categorical(df.movieId)
df['new_movie_id'] = df.movieId.cat.codes

In [7]:
df.head()

Unnamed: 0,userId,movieId,rating,timestamp,new_user_id,new_movie_id
0,1,2,3.5,1112486027,0,1
1,1,29,3.5,1112484676,0,28
2,1,32,3.5,1112484819,0,31
3,1,47,3.5,1112484727,0,46
4,1,50,3.5,1112484580,0,49


In [8]:
user_ids = df['new_user_id'].values
movie_ids = df['new_movie_id'].values

ratings = df['rating'].values

In [11]:
N = len(set(user_ids))
M = len(set(movie_ids))

In [10]:
K = 10  # embedding dim

In [13]:
u = Input(shape=(1,))

m = Input(shape=(1,))

u_emb = Embedding(N,K)(u)   # (n_samples,1,K) or N,T,D as here T = 1
m_emb = Embedding(M,K)(m)   # (n_samples,1,K)


u_emb = Flatten()(u_emb)  # (n_samples,K)
m_emb = Flatten()(m_emb) # (n_samples,K)


x = Concatenate()([u_emb,m_emb])  # (n_samples,2K)

x = Dense(1024 , activation = 'relu')(x)

x = Dense(1)(x)

In [14]:
model = Model(inputs = [u,m] , outputs = x)

In [15]:
model.compile(loss = 'mse' , optimizer = SGD(lr = 0.08 , momentum = 0.9))

In [16]:
user_ids , movie_ids , ratings = shuffle(user_ids , movie_ids , ratings)

In [19]:
N_train = int(0.8 * len(ratings))

In [23]:
train_user = user_ids[:N_train]
train_movie = movie_ids[:N_train]
train_ratings = ratings[:N_train]

In [24]:
test_user = user_ids[N_train:]
test_movie = movie_ids[N_train:]
test_ratings = ratings[N_train:]

In [25]:
avg_rating = train_ratings.mean()

train_ratings -= avg_rating
test_ratings -= avg_rating

In [27]:
r = model.fit(
        x = [train_user , train_movie],
        y = train_ratings,
        epochs = 25,
        batch_size = 1024,
        validation_data = ([test_user,test_movie],test_ratings)
    

)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [28]:
M

26744

In [29]:
N

138493

In [30]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, 1)]          0                                            
__________________________________________________________________________________________________
input_4 (InputLayer)            [(None, 1)]          0                                            
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, 1, 10)        1384930     input_3[0][0]                    
__________________________________________________________________________________________________
embedding_3 (Embedding)         (None, 1, 10)        267440      input_4[0][0]                    
______________________________________________________________________________________________