In [27]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [28]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
from keras.layers import  Input, dot, concatenate
from keras.models import Model
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
from keras.layers import Activation, Dense, Dropout, Embedding, Flatten, Conv1D, MaxPooling1D, LSTM

In [29]:
movies = pd.read_csv('./movielens_dataset/u.item' , header = None , sep = "|" , encoding='latin-1')
movies.columns = ['movie_id' , 'movie_title' , 'release_date' , 'video_release_date' ,
             'IMDb_URL' , 'unknown' , 'Action' , 'Adventure' , 'Animation' ,
             'Childrens' , 'Comedy' , 'Crime' , 'Documentary' , 'Drama' , 'Fantasy' ,
             'Film_Noir' , 'Horror' , 'Musical' , 'Mystery' , 'Romance' , 'Sci_Fi' ,
             'Thriller' , 'War' , 'Western']

users = pd.read_csv('./movielens_dataset/u.user', header= None , sep = '|')
users.columns = ['user_id' , 'age' , 'gender' , 'occupation' , 'zip_code']

ratings = pd.read_csv('./movielens_dataset/u.data', header= None , sep = '\t')
ratings.columns = ['user_id' , 'movie_id' , 'rating' , 'timestamp']

In [30]:
data = ratings.merge(users , on='user_id')
data = data.merge(movies , on='movie_id')

In [31]:
data = data.sample(frac = 1)
data_train_x = np.array(data[['user_id' , 'movie_id']].values)
data_train_y = np.array(data['rating'].values)
x_train, x_test, y_train, y_test = train_test_split(data_train_x, data_train_y, test_size = 0.2, random_state = 98)
n_factors = 50
n_users = len(np.unique(data['user_id']))
n_movies = len(np.unique(data['movie_id']))

In [32]:
user_input = Input(shape=(1,))
user_embeddings = Embedding(input_dim = n_users+1, output_dim=n_factors, input_length=1)(user_input)
user_vector = Flatten()(user_embeddings)

movie_input = Input(shape = (1,))
movie_embeddings = Embedding(input_dim = n_movies+1 , output_dim = n_factors , input_length = 1)(movie_input)
movie_vector = Flatten()(movie_embeddings)

merged_vectors = concatenate([user_vector, movie_vector])
dense_layer_1 = Dense(100 , activation = 'relu')(merged_vectors)
dense_layer_3 = Dropout(.5)(dense_layer_1)
dense_layer_2 = Dense(1)(dense_layer_3)
model = Model([user_input, movie_input], dense_layer_2)

In [36]:
model.compile(loss='mean_squared_error', optimizer='adam' ,metrics = ['accuracy'] )
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, 1)]          0                                            
__________________________________________________________________________________________________
input_4 (InputLayer)            [(None, 1)]          0                                            
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, 1, 50)        47200       input_3[0][0]                    
__________________________________________________________________________________________________
embedding_3 (Embedding)         (None, 1, 50)        84150       input_4[0][0]                    
____________________________________________________________________________________________

In [37]:
history = model.fit(
    x = [x_train[:,0] , x_train[:,1]] , 
    y =y_train , batch_size = 128 , 
    epochs = 20 , 
    validation_data = ([x_test[:,0] , x_test[:,1]] , y_test)
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [38]:
loss , val_loss , accuracy , val_accuracy = history.history['loss'],history.history['val_loss'],history.history['accuracy'],history.history['val_accuracy']

In [26]:
plt.figure(figsize = (12,10))
plt.plot( loss, 'r--')
plt.plot(val_loss, 'b-')
plt.plot( accuracy, 'g--')
plt.plot(val_accuracy,'-')
plt.legend(['Training Loss', 'Validation Loss' , 'Training Accuracy' , 'Validation Accuracy'])
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.show()

NameError: name 'loss' is not defined

<Figure size 864x720 with 0 Axes>

In [13]:
score = model.evaluate([x_test[:,0], x_test[:,1]], y_test)
print(np.sqrt(score))

[0.93088483 0.2541653 ]


(100000, 1)

[[array([395]), array([866])]]

(1,)

(20000,)