# Hybrid Models for Recommendation Systems

Load Pandas, we are going to need it for manipulating data

In [1]:
import pandas as pd
import numpy as np
from IPython.display import Image
np.set_printoptions(precision = 3)

Now load the data

In [10]:
user_ratings_df = pd.read_csv("user_ratings.csv")
user_features_df = pd.read_csv("user_features.csv")
item_features_df = pd.read_csv("item_features.csv")

user_features_df["key"] = 0
user_features_df["user_id"] = range(0,user_features_df.shape[0])
item_features_df["key"] = 0
item_features_df["item_id"] = range(0,item_features_df.shape[0])
print ("user_features_df:\n", user_features_df)
print ("\nitem_features_df:\n", item_features_df)

merged_df = pd.merge(user_features_df, item_features_df,left_index=True,on="key")
merged_df[["item_id", "user_id"]]

merged_df["rating"] = map(lambda ids: user_ratings_df.values[ids[1]][ids[2]], 
                          merged_df[["user_id", "item_id"]].itertuples())

print ("\nmerged_df:\n", merged_df)

train = merged_df.dropna()

test = merged_df[merged_df.isnull().any(axis=1)]

print ("\ntest:\n", test.to_latex())

user_features_df:
    Sex   Over60  key  user_id
0  1.0      0.0    0        0
1  0.0      1.0    0        1
2  0.0      0.0    0        2
3  1.0      0.0    0        3
4  0.0      1.0    0        4
5  0.0      0.0    0        5
6  0.0      0.0    0        6
7  1.0      0.0    0        7
8  0.0      1.0    0        8
9  1.0      0.0    0        9

item_features_df:
    Critic0   Critic1  key  item_id
0      0.3       0.9    0        0
1      0.9       0.3    0        1
2      0.6       0.4    0        2
3      0.2       0.1    0        3
4      0.7       0.8    0        4

merged_df:
    Sex   Over60  key  user_id  Critic0   Critic1  item_id  \
0  1.0      0.0    0        0      0.3       0.9        0   
1  1.0      0.0    0        0      0.9       0.3        1   
2  1.0      0.0    0        0      0.6       0.4        2   
3  1.0      0.0    0        0      0.2       0.1        3   
4  1.0      0.0    0        0      0.7       0.8        4   
0  0.0      1.0    0        1      0.3    

In [20]:
n_latent_features = 2

user_ratings = user_ratings_df.values
latent_user_preferences = np.random.random((user_ratings.shape[0], n_latent_features))
latent_item_features = np.random.random((user_ratings.shape[1],n_latent_features))

user_features = user_features_df.values
item_features = item_features_df.values
print (item_features_df.to_latex())

print ("\nuser_features:\n", user_features)
print ("\nitem_features:\n", user_features)

user_features = np.concatenate([np.ones(shape = (user_features.shape[0],1)), user_features], axis = 1)
item_features = np.concatenate([np.ones(shape = (item_features.shape[0],1)), item_features], axis = 1)

print ("\nInsert ones in the first colums -->")
print ("user_features:\n", user_features)
print ("\nitem_features:\n", user_features)

user_features_weights = np.random.random((user_ratings.shape[0], user_features.shape[1] ))
item_features_weights = np.random.random((user_ratings.shape[1], item_features.shape[1] ))

print ("\nuser_features_weights:\n", user_features)
print ("\nitem_features_weights:\n", user_features)
# print user_features

\begin{tabular}{lrrrr}
\toprule
{} &  Critic0 &   Critic1 &  key &  item\_id \\
\midrule
0 &      0.3 &       0.9 &    0 &        0 \\
1 &      0.9 &       0.3 &    0 &        1 \\
2 &      0.6 &       0.4 &    0 &        2 \\
3 &      0.2 &       0.1 &    0 &        3 \\
4 &      0.7 &       0.8 &    0 &        4 \\
\bottomrule
\end{tabular}


user_features:
 [[1. 0. 0. 0.]
 [0. 1. 0. 1.]
 [0. 0. 0. 2.]
 [1. 0. 0. 3.]
 [0. 1. 0. 4.]
 [0. 0. 0. 5.]
 [0. 0. 0. 6.]
 [1. 0. 0. 7.]
 [0. 1. 0. 8.]
 [1. 0. 0. 9.]]

item_features:
 [[1. 0. 0. 0.]
 [0. 1. 0. 1.]
 [0. 0. 0. 2.]
 [1. 0. 0. 3.]
 [0. 1. 0. 4.]
 [0. 0. 0. 5.]
 [0. 0. 0. 6.]
 [1. 0. 0. 7.]
 [0. 1. 0. 8.]
 [1. 0. 0. 9.]]

Insert ones in the first colums -->
user_features:
 [[1. 1. 0. 0. 0.]
 [1. 0. 1. 0. 1.]
 [1. 0. 0. 0. 2.]
 [1. 1. 0. 0. 3.]
 [1. 0. 1. 0. 4.]
 [1. 0. 0. 0. 5.]
 [1. 0. 0. 0. 6.]
 [1. 1. 0. 0. 7.]
 [1. 0. 1. 0. 8.]
 [1. 1. 0. 0. 9.]]

item_features:
 [[1. 1. 0. 0. 0.]
 [1. 0. 1. 0. 1.]
 [1. 0. 0. 0. 2.]
 [1. 1. 0. 0.

In [25]:
def predict_rating(user_id,item_id):
    """ Predict a rating given a user_id and an item_id.
    """
    user_preference = latent_user_preferences[user_id]
    item_preference = latent_item_features[item_id]
    
    user_score = user_features_weights[user_id].dot(user_features[user_id])
    item_score = item_features_weights[item_id].dot(item_features[item_id])
    #print user_preference.dot(item_preference), user_score, item_score
    return user_preference.dot(item_preference) + user_score + item_score

def train(user_id, item_id, rating,alpha = 0.001, 
                                   latent_feature_weight_decay = 0.1, 
                                   user_weight_decay = 0.01,
                                   item_weight_decay = 0.0001):
    
    #print item_id
    prediction_rating = predict_rating(user_id, item_id)
    err =  ( prediction_rating - rating );
    
    #print err
    user_pref_values = latent_user_preferences[user_id][:]
    latent_user_preferences[user_id] -= alpha * err *  \
        (latent_item_features[item_id] + latent_feature_weight_decay*latent_user_preferences[user_id])
    latent_item_features[item_id] -= alpha * err * \
        (user_pref_values + latent_feature_weight_decay*latent_item_features[item_id])
    
    user_features_weights[user_id] -=alpha * err * \
        (user_features[user_id] + user_weight_decay* user_features_weights[user_id])
    item_features_weights[item_id] -=alpha * err * \
        (item_features_weights[item_id] + item_weight_decay* item_features_weights[item_id])
    
    return err
    
def sgd(iterations = 30000):
    """ Iterate over all users and all items and train for 
        a certain number of iterations
    """
    for iteration in range(0, iterations):
        print ("\rIteration: %6d/%d" % (iteration, iterations), end ="")
        error = []
        for user_id in range(0,latent_user_preferences.shape[0]):
            for item_id in range(0,latent_item_features.shape[0]):
                rating = user_ratings[user_id][item_id]
                if(not np.isnan(rating)):
                    err = train(user_id,item_id,rating)
                    error.append(err)
    mse = (np.array(error) ** 2).mean()          
    print ("\nmse = %5f"% mse)

In [26]:
for _ in range(0,10): 
    sgd()

Iteration:  29999/30000
mse = 0.290201
Iteration:  29999/30000
mse = 0.288487
Iteration:  29999/30000
mse = 0.287344
Iteration:  29999/30000
mse = 0.286531
Iteration:  29999/30000
mse = 0.285927
Iteration:  29999/30000
mse = 0.285467
Iteration:  29999/30000
mse = 0.285112
Iteration:  29999/30000
mse = 0.284836
Iteration:  29999/30000
mse = 0.284622
Iteration:  29999/30000
mse = 0.284458


In [27]:
predictions = np.zeros(shape = (latent_user_preferences.shape[0], latent_item_features.shape[0]) )
#print latent_user_preferences
#print user_features_weights
#print item_features_weights
for user_id in range(0,latent_user_preferences.shape[0]):
            for item_id in range(0,latent_item_features.shape[0]):
                predictions[user_id,item_id] =  predict_rating(user_id,item_id) 

In [36]:
values = [zip(user_ratings[i], predictions[i]) for i in range(0,predictions.shape[0])]
comparison_data = pd.DataFrame(values)
comparison_data.columns = user_ratings_df.columns

try:
    comparison_data.applymap(lambda x,y: "(%2.3f|%2.3f)"%(x,y))
except:
    import traceback
    traceback.print_exc()

Traceback (most recent call last):
  File "<ipython-input-36-beda21834f26>", line 6, in <module>
    comparison_data.applymap(lambda x,y: "(%2.3f|%2.3f)"%(x,y))
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/frame.py", line 6562, in applymap
    return self.apply(infer)
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/frame.py", line 6496, in apply
    return op.get_result()
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/apply.py", line 151, in get_result
    return self.apply_standard()
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/apply.py", line 257, in apply_standard
    self.apply_series_generator()
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/apply.py", line 286, in apply_series_generator
    results[i] = self.f(v)
  File "/Libr

In [37]:
comparison_data

Unnamed: 0,The Call of Cthulhu,Frankenstein,Dracula,Neuromancer,Space Odyssey
0,"(8.0, 7.920440234691004)","(2.0, 2.2990238569371875)","(nan, -40.18429664108626)","(5.0, 4.787570200711672)","(4.0, 3.99546399379134)"
1,"(3.0, 2.89519519299483)","(2.0, 2.45087346039751)","(nan, 99.33803256820667)","(7.0, 6.662947608541815)","(7.0, 6.991678119135645)"
2,"(9.0, 8.850684527284331)","(nan, 4.998764881541236)","(7.0, 6.994355595960251)","(8.0, 8.141738120721566)","(5.0, 5.011427411958908)"
3,"(nan, 9.049313055911647)","(nan, 5.0172431509207795)","(7.0, 6.999842012909504)","(8.0, 8.00014135963323)","(9.0, 9.000065735936678)"
4,"(nan, 4.446810647225432)","(1.0, 0.5098213010040722)","(8.0, 7.990836673612405)","(3.0, 3.463552635018541)","(7.0, 7.017269248767451)"
5,"(2.0, 2.0125608985885)","(3.0, 2.9896723945081227)","(5.0, 5.000238758633483)","(nan, 10.430446192843482)","(nan, -103.70047008756161)"
6,"(4.0, 4.43952070536973)","(2.0, 0.30578351380846097)","(nan, 2.8722900505356748)","(2.0, 3.186107677891592)","(7.0, 7.026560862450027)"
7,"(7.0, 6.915243557987358)","(1.0, 2.6813801062834584)","(2.0, 2.0790711967951445)","(7.0, 5.538167306771719)","(9.0, 8.948914658661534)"
8,"(3.0, 3.065275224799624)","(3.0, 2.775501409268461)","(nan, 98.59532219634839)","(7.0, 7.147221317872702)","(3.0, 3.0030498668639916)"
9,"(4.0, 3.898977696292559)","(nan, 0.025672603554185325)","(5.0, 4.9883280903980864)","(3.0, 3.0882059079747672)","(3.0, 3.0070994447582238)"


In [38]:
d = comparison_data.to_latex()
text_file = open("comparison.txt", "w")
text_file.write(d)
text_file.close()