# Task 4 - Keras

# Imports

In [17]:
import numpy as np
import pandas as pd
import os
import tensorflow as T
import keras as keras
from keras import backend as K
from keras import initializers
from keras.initializers import RandomNormal
from keras.models import Sequential, Model, load_model, save_model
from keras.layers.core import Dense, Lambda, Activation
from keras.layers import Embedding, Input, Dense, merge, Reshape,  Flatten, Dropout
from keras.optimizers import Adagrad, Adam, SGD, RMSprop, Adamax
from keras.regularizers import l2
from keras.layers import Multiply, Concatenate
from keras.callbacks import Callback, EarlyStopping, ModelCheckpoint
import time
import multiprocessing as mp
import sys
import math
import argparse
import matplotlib.pyplot as plt

In [18]:
RATING_DATA_TEST_FILE = "u1.test"
RATING_DATA_TRAIN_FILE = "u1.base"
MODEL_WEIGHTS_FILE = "u_emb_weights.h5"

In [19]:
#import of the data

m_cols = ['user_id','movie_id','rating','timestamp']

df_train = pd.read_csv(RATING_DATA_TRAIN_FILE, sep='\t', engine='python', encoding='latin-1',names=m_cols)
df_test = pd.read_csv(RATING_DATA_TEST_FILE, sep='\t', engine='python', encoding='latin-1',names=m_cols)

print(df_train.head())

max_userid = df_train['user_id'].drop_duplicates().max()
max_movieid = df_train['movie_id'].drop_duplicates().max()
df_train['user_emb_id'] = df_train['user_id'] - 1
df_train['movie_emb_id'] = df_train['movie_id'] - 1
df_test['user_emb_id'] = df_test['user_id'] - 1
df_test['movie_emb_id'] = df_test['movie_id'] - 1


Train_Users = df_train['user_emb_id'].values
Train_Movies = df_train['movie_emb_id'].values
Train_Ratings = df_train['rating'].values

Test_Users = df_test['user_emb_id'].values
Test_Movies = df_test['movie_emb_id'].values
Test_Ratings = df_test['rating'].values

   user_id  movie_id  rating  timestamp
0        1         1       5  874965758
1        1         2       3  876893171
2        1         3       4  878542960
3        1         4       3  876893119
4        1         5       3  889751712


# First Model

In [20]:
def get_ncf_model1(num_users, num_items, latent_dim,hidden_dim,do):
    # Input variables
    user_input = Input(shape=(1,), dtype='int32', name = 'user_input')
    item_input = Input(shape=(1,), dtype='int32', name = 'item_input')

    NCF_Embedding_User = Embedding(input_dim = num_users, output_dim = latent_dim, name = 'user_embedding', input_length=1)
    NCF_Embedding_Item = Embedding(input_dim = num_items, output_dim = latent_dim, name = 'item_embedding', input_length=1)   
    
    # Crucial to flatten an embedding vector!
    user_latent = Flatten()(NCF_Embedding_User(user_input))
    item_latent = Flatten()(NCF_Embedding_Item(item_input))
    
    # Element-wise product of user and item embeddings
    conc = Concatenate()([user_latent, item_latent])
    drop = Dropout(0.3)(conc)
    hid1 = Dense(hidden_dim, activation='relu')(conc)
    drop2  = Dropout(do)(hid1)
    prediction = Dense(1, activation='relu', kernel_initializer='lecun_uniform', name = 'prediction')(drop2)
    
    
    model = Model(inputs=[user_input, item_input], outputs=prediction)
    print("ncf model1")
    model.summary()

    return model

# Compile

In [21]:
K_LATENT = 20
hidden_dim = 20
do = 0.3
NCF_model1 = get_ncf_model1(max_userid,max_movieid,K_LATENT,hidden_dim,do)
NCF_model1.compile(loss='mse',optimizer=Adamax(),metrics=['mae'])

ncf model1
Model: "model_4"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
user_input (InputLayer)         (None, 1)            0                                            
__________________________________________________________________________________________________
item_input (InputLayer)         (None, 1)            0                                            
__________________________________________________________________________________________________
user_embedding (Embedding)      (None, 1, 20)        18860       user_input[0][0]                 
__________________________________________________________________________________________________
item_embedding (Embedding)      (None, 1, 20)        33640       item_input[0][0]                 
_________________________________________________________________________________

# Fit

In [22]:
learnTime1 = time.time()
callbacks = [EarlyStopping('val_loss', patience=5), ModelCheckpoint(MODEL_WEIGHTS_FILE, save_best_only=True)]
history = NCF_model1.fit([Train_Users, Train_Movies], Train_Ratings, epochs=100, validation_split=.2, verbose=1, callbacks=callbacks, batch_size = 32)
learnTime1 = time.time() - learnTime1

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 64000 samples, validate on 16000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100


# Predict

In [23]:
preddict_model1 = NCF_model1.predict([Test_Users,Test_Movies])
test_predict1 = pd.DataFrame(data=preddict_model1, columns=['Prediction'])
test_predict1['Real_Rating'] = Test_Ratings

MAE1 = np.sum(abs(test_predict1['Real_Rating']-test_predict1['Prediction']))/test_predict1.shape[0]
                                
print("MAE1 = "+ str(MAE1))

MAE1 = 0.749746728515625


# Second Model

In [24]:
def get_ncf_model2(num_users, num_items, latent_dim,hidden_dim,do):
    # Input variables
    user_input = Input(shape=(1,), dtype='int32', name = 'user_input')
    item_input = Input(shape=(1,), dtype='int32', name = 'item_input')

    NCF_Embedding_User = Embedding(input_dim = num_users, output_dim = latent_dim, name = 'user_embedding', input_length=1)
    NCF_Embedding_Item = Embedding(input_dim = num_items, output_dim = latent_dim, name = 'item_embedding', input_length=1)   
    
    # Crucial to flatten an embedding vector!
    user_latent = Flatten()(NCF_Embedding_User(user_input))
    item_latent = Flatten()(NCF_Embedding_Item(item_input))
    
    # Element-wise product of user and item embeddings
    conc = Concatenate()([user_latent, item_latent])
    drop = Dropout(0.3)(conc)
    hid1 = Dense(hidden_dim, activation='sigmoid')(conc)
    drop2  = Dropout(do)(hid1)
    prediction = Dense(1, activation='sigmoid', kernel_initializer='lecun_uniform', name = 'prediction')(drop2)
    
    
    model = Model(inputs=[user_input, item_input], outputs=prediction)
    print("ncf model2")
    model.summary()

    return model

# Compile

In [25]:
K_LATENT = 20
hidden_dim = 20
do = 0.3
NCF_model2 = get_ncf_model2(max_userid,max_movieid,K_LATENT,hidden_dim,do)
NCF_model2.compile(loss='mse',optimizer=Adamax(),metrics=['mae'])

ncf model2
Model: "model_5"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
user_input (InputLayer)         (None, 1)            0                                            
__________________________________________________________________________________________________
item_input (InputLayer)         (None, 1)            0                                            
__________________________________________________________________________________________________
user_embedding (Embedding)      (None, 1, 20)        18860       user_input[0][0]                 
__________________________________________________________________________________________________
item_embedding (Embedding)      (None, 1, 20)        33640       item_input[0][0]                 
_________________________________________________________________________________

# Fit

In [26]:
learnTime2 = time.time()
callbacks = [EarlyStopping('val_loss', patience=5), ModelCheckpoint(MODEL_WEIGHTS_FILE, save_best_only=True)]
history = NCF_model2.fit([Train_Users, Train_Movies], Train_Ratings, epochs=100, validation_split=.2, verbose=1, callbacks=callbacks, batch_size = 32)
learnTime2 = time.time() - learnTime2

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 64000 samples, validate on 16000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100


# Predict

In [27]:
preddict_model2 = NCF_model2.predict([Test_Users,Test_Movies])
test_predict2 = pd.DataFrame(data=preddict_model2, columns=['Prediction'])
test_predict2['Real_Rating'] = Test_Ratings

MAE2 = np.sum(abs(test_predict2['Real_Rating']-test_predict2['Prediction']))/test_predict2.shape[0]
                                
print("MAE2 = "+ str(MAE2))

MAE2 = 2.5359


# Third Model

In [31]:
def get_ncf_model3(num_users, num_items, latent_dim,hidden_dim,do):
    # Input variables
    user_input = Input(shape=(1,), dtype='int32', name = 'user_input')
    item_input = Input(shape=(1,), dtype='int32', name = 'item_input')

    NCF_Embedding_User = Embedding(input_dim = num_users, output_dim = latent_dim, name = 'user_embedding', input_length=1)
    NCF_Embedding_Item = Embedding(input_dim = num_items, output_dim = latent_dim, name = 'item_embedding', input_length=1)   
    
    # Crucial to flatten an embedding vector!
    user_latent = Flatten()(NCF_Embedding_User(user_input))
    item_latent = Flatten()(NCF_Embedding_Item(item_input))
    
    # Element-wise product of user and item embeddings
    conc = Concatenate()([user_latent, item_latent])
    drop = Dropout(0.3)(conc)
    hid1 = Dense(hidden_dim, activation='relu')(conc)
    drop2  = Dropout(do)(hid1)
    prediction = Dense(1, activation='relu', kernel_initializer='lecun_uniform', name = 'prediction')(drop2)
    
    
    model = Model(inputs=[user_input, item_input], outputs=prediction)
    print("ncf model3")
    model.summary()

    return model

# Compile

In [32]:
K_LATENT = 20
hidden_dim = 20
do = 0.2
NCF_model3 = get_ncf_model3(max_userid,max_movieid,K_LATENT,hidden_dim,do)
NCF_model3.compile(loss='mse',optimizer=Adam(),metrics=['mae'])

ncf model3
Model: "model_7"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
user_input (InputLayer)         (None, 1)            0                                            
__________________________________________________________________________________________________
item_input (InputLayer)         (None, 1)            0                                            
__________________________________________________________________________________________________
user_embedding (Embedding)      (None, 1, 20)        18860       user_input[0][0]                 
__________________________________________________________________________________________________
item_embedding (Embedding)      (None, 1, 20)        33640       item_input[0][0]                 
_________________________________________________________________________________

# Fit

In [33]:
learnTime3 = time.time()
callbacks = [EarlyStopping('val_loss', patience=5), ModelCheckpoint(MODEL_WEIGHTS_FILE, save_best_only=True)]
history = NCF_model3.fit([Train_Users, Train_Movies], Train_Ratings, epochs=100, validation_split=.2, verbose=1, callbacks=callbacks, batch_size = 32)
learnTime3 = time.time() - learnTime3

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 64000 samples, validate on 16000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100


# Predict

In [34]:
preddict_model3 = NCF_model3.predict([Test_Users,Test_Movies])
test_predict3 = pd.DataFrame(data=preddict_model3, columns=['Prediction'])
test_predict3['Real_Rating'] = Test_Ratings

MAE3 = np.sum(abs(test_predict3['Real_Rating']-test_predict3['Prediction']))/test_predict3.shape[0]
                                
print("MAE3 = "+ str(MAE3))

MAE3 = 0.74768330078125


# Compare of models

In [36]:
print("MAE1 = "+ str(MAE1))
print("Running Time: " + str(learnTime1) + "\n")
print("MAE2 = "+ str(MAE2))
print("Running Time: " + str(learnTime2) + "\n")
print("MAE3 = "+ str(MAE3))
print("Running Time: " + str(learnTime3) + "\n")

print("The first and third models usually gets the best MAE score, but their learing is usually the longest, their model setup is")
print("Model 1")
print("activation function: relu")
print("loss function: mse")
print("optimizer: adamax")
print("number of hidden layer: 1")
print("Model 3")
print("activation function: relu")
print("loss function: mse")
print("optimizer: adam")
print("number of hidden layer: 1 but Dropout is 0.2")

MAE1 = 0.749746728515625
Running Time: 34.50171446800232

MAE2 = 2.5359
Running Time: 30.887426614761353

MAE3 = 0.74768330078125
Running Time: 38.842615365982056

The first and third models usually gets the best MAE score, but their learing is usually the longest, their model setup is
Model 1
activation function: relu
loss function: mse
optimizer: adamax
number of hidden layer: 1
Model 3
activation function: relu
loss function: mse
optimizer: adam
number of hidden layer: 1 but Dropout is 0.2
