In [1]:
import pickle
import sys
sys.path.append("../")
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import tensorflow as tf

from sklearn.metrics import mean_squared_error, mean_absolute_error
from math import sqrt
from surprise import accuracy
from reco_utils.dataset.python_splitters import python_stratified_split
from reco_utils.evaluation.python_evaluation import rmse
from reco_utils.recommender.sar import SAR
from reco_utils.recommender.ncf.ncf_singlenode import NCF
from reco_utils.recommender.surprise.surprise_utils import predict
from keras.models import load_model


Using TensorFlow backend.


In [2]:
tf.logging.set_verbosity(tf.logging.ERROR)

In [3]:
test = pd.read_csv('../Data/test.csv', names=['user_id', 'profile_id', 'rating'])

In [4]:
test_knn = pd.read_csv('../Data/test.csv', names=['user_id', 'profile_id', 'rating'])
test_knn['user_id'] = test_knn['user_id'].astype(str)
test_knn['profile_id'] = test_knn['profile_id'].astype(str)

tuples = [tuple(x) for x in test_knn.to_numpy()]

In [5]:
f = open('../kNN/model/KNN.pickle','rb') 
loaded_model = pickle.load(f)
f.close()

In [6]:
predictions = loaded_model.test(tuples)

In [7]:
predictions_knn = [el.est for el in predictions]

In [8]:
test['pred_knn'] = predictions_knn

In [28]:
rms = sqrt(mean_squared_error(test['rating'].values, test['pred_knn'].values))
rms

2.019823515221608

In [9]:
f = open('../SAR/model/SAR.pickle','rb') 
loaded_sar = pickle.load(f)
f.close()

In [10]:
predictions = loaded_sar.predict(test, normalize=True)

Items found in test not seen during training, new items will have score of 0


In [11]:
predictions = predictions.replace([np.inf, -np.inf], 0)

In [12]:
test['pred_sar'] = predictions['prediction']

In [27]:
rms = sqrt(mean_squared_error(test['rating'].values, test['pred_sar'].values))
rms

2.861099430521089

In [13]:
loaded_MF = load_model('../MF/mf-model')

In [14]:
y_hat = np.round(loaded_MF.predict([test.user_id, test.profile_id]) * 10, 0)

test['predictions_mf'] = y_hat

In [26]:
rms = sqrt(mean_squared_error(test['rating'].values, test['predictions_mf'].values))
rms

2.5851321472759263

In [15]:
f = open('../SVD/model/SVD.pickle','rb') 
loaded_svd = pickle.load(f)
f.close()

In [16]:
test_svd = pd.read_csv('../Data/test.csv', names=['user_id', 'profile_id', 'rating'])

test_svd['user_id'] = test_svd['user_id'].astype(str)
test_svd['profile_id'] = test_svd['profile_id'].astype(str)

predictions = predict(loaded_svd, test_svd, usercol='user_id', itemcol='profile_id')

In [17]:
test['pred_svd'] = predictions['prediction'].values

In [25]:
rms = sqrt(mean_squared_error(test['rating'].values, test['pred_svd'].values))
rms

2.1151723153332824

In [18]:
# ensemble knn and svd

test['ensemble'] = test['pred_knn'] * 0.4 + test['pred_svd'] * 0.6

rms = sqrt(mean_squared_error(test['rating'].values, test['ensemble'].values))

print(rms)

1.9965370031762195


In [19]:
# ensemble knn and mf

test['ensemble'] = test['pred_knn'] * 0.5 + test['predictions_mf'] * 0.5

rms = sqrt(mean_squared_error(test['rating'].values, test['ensemble'].values))

print(rms)

2.1450310320535806


In [75]:
# ensemble svd and mf

test['ensemble'] = test['pred_svd'] * 0.5 + test['predictions_mf'] * 0.5

rms = sqrt(mean_squared_error(test['rating'].values, test['ensemble'].values))

print(rms)

1.6677913976306813


In [30]:
# ensemble knn and mf аnd sar

test['ensemble'] = test['pred_knn'] * 0.4 + test['predictions_mf'] * 0.3 + test['pred_svd'] * 0.3

rms = sqrt(mean_squared_error(test['rating'].values, test['ensemble'].values))

print(rms)

2.0567617043299746


In [21]:
mean_absolute_error(test['rating'].values, test['pred_knn'].values)

1.4962389907881706

In [22]:
mean_absolute_error(test['rating'].values, test['predictions_mf'].values)

2.0145701620521765

In [23]:
mean_absolute_error(test['rating'].values, test['pred_svd'].values)

1.6412310874144493

In [24]:
mean_absolute_error(test['rating'].values, test['pred_sar'].values)

2.3765798664460775