In [1]:
import sys
sys.path.append("../")
import pickle

import time
import pandas as pd

import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)

from reco_utils.recommender.ncf.ncf_singlenode import NCF
from reco_utils.recommender.ncf.dataset import Dataset as NCFDataset
from reco_utils.evaluation.python_evaluation import rmse
from sklearn.metrics import mean_squared_error
from math import sqrt

print("System version: {}".format(sys.version))
print("Pandas version: {}".format(pd.__version__))
print("Tensorflow version: {}".format(tf.__version__))

System version: 3.7.6 (default, Dec 19 2019, 23:50:13) 
[GCC 7.4.0]
Pandas version: 0.25.3
Tensorflow version: 1.15.2


In [2]:
tf.logging.set_verbosity(tf.logging.ERROR)

In [3]:
# top k items to recommend
TOP_K = 10

# Model parameters
EPOCHS = 20
BATCH_SIZE = 32

SEED = 42

In [4]:
train = pd.read_csv('../Data/train.csv', names=['user_id', 'profile_id', 'rating'])
test = pd.read_csv('../Data/test.csv', names=['user_id', 'profile_id', 'rating'])

In [5]:
data = NCFDataset(train=train, test=test, col_user='user_id', col_item= 'profile_id', seed=SEED)

In [6]:
model = NCF (
    n_users=data.n_users, 
    n_items=data.n_items,
    model_type="NeuMF",
    n_factors=8,
    layer_sizes=[32,16,8],
    n_epochs=10,
    batch_size=BATCH_SIZE,
    learning_rate=1e-3,
    verbose=1,
    seed=SEED
)

In [7]:
start_time = time.time()

model.fit(data)

train_time = time.time() - start_time

print("Took {} seconds for training.".format(train_time))

Epoch 1 [12.83s]: train_loss = 0.359371 
Epoch 2 [12.55s]: train_loss = 0.279874 
Epoch 3 [15.99s]: train_loss = 0.254373 
Epoch 4 [15.76s]: train_loss = 0.239457 
Epoch 5 [14.57s]: train_loss = 0.230343 
Epoch 6 [15.11s]: train_loss = 0.222742 
Epoch 7 [15.71s]: train_loss = 0.216937 
Epoch 8 [17.54s]: train_loss = 0.212384 
Epoch 9 [16.41s]: train_loss = 0.208327 
Epoch 10 [16.33s]: train_loss = 0.205183 
Took 152.85115957260132 seconds for training.


In [8]:
test['predictions'] = test.apply(lambda x: model.predict(x.user_id, x.profile_id), axis=1)

In [9]:
test['predictions'] *= 10

In [10]:
rms = sqrt(mean_squared_error(test['rating'].values, test['predictions'].values))

In [11]:
print(rms)

4.313171179775356


In [12]:
train['predictions'] = train.apply(lambda x: model.predict(x.user_id, x.profile_id), axis=1)

In [13]:
train['predictions'] *= 10

In [14]:
rms = sqrt(mean_squared_error(train['rating'].values, train['predictions'].values))

In [15]:
rms

4.15754817832538

In [16]:
model.save('model')

In [17]:
f = open('model/user2id_embedding.pickle', 'wb')
pickle.dump(model.user2id, f)  
f.close()

In [18]:
f = open('model/item2id_embedding.pickle', 'wb')
pickle.dump(model.item2id, f)  
f.close()

In [19]:
f = open('model/id2user_embedding.pickle', 'wb')
pickle.dump(model.id2user, f)  
f.close()

In [20]:
f = open('model/id2item_embedding.pickle', 'wb')
pickle.dump(model.id2item, f)  
f.close()

In [21]:
f = open('model/user2id_embedding.pickle','rb') 
user2id_embedding = pickle.load(f)
f.close()

In [22]:
f = open('model/item2id_embedding.pickle','rb') 
item2id_embedding = pickle.load(f)
f.close()

In [23]:
f = open('model/id2user_embedding.pickle','rb') 
id2user_embedding = pickle.load(f)
f.close()

In [24]:
f = open('model/id2item_embedding.pickle','rb') 
id2item_embedding = pickle.load(f)
f.close()

In [25]:
model1 = NCF (
    n_users=data.n_users, 
    n_items=data.n_items,
    model_type="NeuMF",
    n_factors=8,
    layer_sizes=[32,16,8],
    n_epochs=10,
    batch_size=BATCH_SIZE,
    learning_rate=1e-3,
    verbose=1,
    seed=SEED
)

In [26]:
model1.load(neumf_dir='model')
model1.user2id = user2id_embedding
model1.item2id = item2id_embedding
model1.id2user = id2user_embedding
model1.id2item = id2item_embedding

In [27]:
train['predictions1'] = train.apply(lambda x: model1.predict(x.user_id, x.profile_id), axis=1)
train['predictions1'] *= 10

rms = sqrt(mean_squared_error(train['rating'].values, train['predictions1'].values))

In [28]:
rms

4.15754817832538