In [43]:
#import libraries
import pickle
import random
import pandas as pd
import numpy as np
import tensorflow as tf

In [60]:
# define error metric
def _error(actual: np.ndarray, predicted: np.ndarray):
    """ Simple error """
    return actual - predicted


def mae(actual: np.ndarray, predicted: np.ndarray):
    """ Mean Absolute Error """
    return np.mean(np.abs(_error(actual, predicted)))

def nmae(actual: np.ndarray, predicted: np.ndarray):
    """ Normalized Mean Absolute Error """
    return mae(actual, predicted) / (actual.max() - actual.min())

In [61]:
#load saved models
xgb_svd_model = pickle.load(open('xgb_svd.pickle', 'rb'))
svd_model = pickle.load(open('svd.pickle', 'rb'))
xgb_bsl_model = pickle.load(open('xgb_bsl.pickle', 'rb'))
knn_bsl_u_model = pickle.load(open('knn_bsl_u.pickle', 'rb'))
bsl_algo_model = pickle.load(open('bsl_algo.pickle', 'rb'))
dataframe = pickle.load(open('dataframe.pickle', 'rb'))
train_df_structured = pickle.load(open('train_df_structured.pickle', 'rb'))
test_df_structured = pickle.load(open('test_df_structured.pickle', 'rb'))
train_df_structured_target = pickle.load(open('train_df_structured_target.pickle', 'rb'))
test_df_structured_target = pickle.load(open('test_df_structured_target.pickle', 'rb'))
global_model_name = pickle.load(open('global_model_name.pickle', 'rb'))
first_best_model = tf.keras.models.load_model('weights-improvement-1.hdf5')
min_max_scaler_X = pickle.load(open('min_max_scaler_X.pickle', 'rb'))

In [58]:
user_id = random.randint(0,24983)
user_id = user_id + 1

user_actual_data = dataframe.iloc[user_id]
user_actual_rating = user_actual_data.values[1:]

data_prepared = [(user_id, i+1 , j) for i,j in enumerate (user_actual_rating)]
test_preds_bsl_algo = bsl_algo_model.test(data_prepared)
test_preds_bsl_algo = [i.est for i in test_preds_bsl_algo]
test_preds_knn_bsl_u = knn_bsl_u_model.test(data_prepared)
test_preds_knn_bsl_u = [i.est for i in test_preds_knn_bsl_u]
test_preds_svd = svd_model.test(data_prepared)
test_preds_svd = [i.est for i in test_preds_svd]
user_avg = train_df_structured[train_df_structured['user']==user_id].iloc[0]['user_avg']
joke_avg = []
for i in range(1,101):
    joke_avg.append(train_df_structured[train_df_structured['joke']==i].iloc[0]['joke_avg'])

In [65]:
user_data_frame = pd.DataFrame()

user_data_frame['user_avg'] = user_avg
user_data_frame['joke_avg'] = joke_avg
user_data_frame['gavg'] = 0.73979
user_data_frame['BaselineOnly'] = test_preds_bsl_algo
user_data_frame['KnnBaseline_joke'] = test_preds_knn_bsl_u
user_data_frame['user_avg'] = user_avg
user_data_frame['SVD'] = test_preds_svd
user_data_frame['special_feature'] = user_data_frame['user_avg']+user_data_frame['joke_avg']-user_data_frame['gavg']
user_data_frame['special_feature_1'] = user_data_frame['special_feature']**2
user_data_frame_scaled = min_max_scaler_X.transform(user_data_frame)
user_rating_predicted = first_best_model.predict(user_data_frame_scaled)
final_user_predicted = []
final_user_actual = []
for i in range(100):
    if(user_actual_rating[i]==99.0):
        pass
    else:
        final_user_predicted.append(user_rating_predicted[i])
        final_user_actual.append(user_actual_rating[i])
        
print(nmae(np.array(final_user_actual), np.array(final_user_predicted)))
for i in range(0, len(final_user_predicted)):
    print(final_user_actual[i],final_user_predicted[i][0])

0.18830800169255246
1.02 0.97502613
-0.83 0.1457119
5.05 0.193254
0.58 -1.6425452
-9.51 0.28639126
5.39 1.4870381
2.91 -0.5749135
-3.01 -0.7990527
0.63 -0.61582327
-1.17 1.2790899
-0.73 1.8861613
5.63 1.6639681
-0.83 -2.5672688
4.9 1.4790325
-1.99 -2.646646
-8.3 -4.1257243
-3.2 -1.1482968
-6.75 -0.75017023
-0.19 0.19587278
3.64 -1.028048
2.91 2.5500631
3.2 0.9311142
-1.89 0.19923162
0.19 -2.4814315
3.11 0.45348692
4.03 1.2932429
2.91 3.4368691
-0.58 1.5159049
1.31 3.1738338
1.99 -0.479012
2.38 2.295967
2.86 3.4939938
2.52 -1.5943074
3.4 1.0465555
1.65 3.3130112
3.74 3.4355044
2.52 -1.6247382
4.71 1.5095024
1.6 1.2082119
-0.1 1.1615896
-0.34 -0.500937
3.98 1.9287496
3.98 -0.7464156
0.68 -2.5954967
3.88 1.1458974
5.78 1.390408
6.31 1.445859
4.95 2.0102682
2.04 3.079002
5.19 3.8661966
1.75 -0.6767192
-1.65 -0.3028016
4.76 3.1011834
4.85 2.9337573
1.65 0.36842155
3.11 1.88552
-3.69 -2.5790772
-3.69 -5.1205163
1.65 -0.5677285
1.94 -0.4739251
3.2 2.522922
3.69 3.199677
3.88 0.27615166
3.74 -

`As we can see a random user from test data set gives 18% NMAE`