# Hybrid Models for Recommendation Systems

Load Pandas, we are going to need it for manipulating data

In [1]:
import pandas as pd
import numpy as np
from IPython.display import Image
np.set_printoptions(precision = 3)

Now load the data

In [5]:
user_ratings_df = pd.read_csv("user_ratings.csv")
user_features_df = pd.read_csv("user_features.csv")
item_features_df = pd.read_csv("item_features.csv")


user_features_df["key"] = 0
user_features_df["user_id"] = range(0,user_features_df.shape[0])
item_features_df["key"] = 0
item_features_df["item_id"] = range(0,item_features_df.shape[0])

merged_df = pd.merge(user_features_df, item_features_df,left_index=True,on="key")
merged_df[["item_id", "user_id"]]




merged_df["rating"] = map(lambda ids: user_ratings_df.values[ids[1]][ids[2]], 
                          merged_df[["user_id", "item_id"]].itertuples())

train = merged_df.dropna()

test = merged_df[merged_df.isnull().any(axis=1)]

print(test.to_latex())



\begin{tabular}{lrrrrrrrl}
\toprule
Empty DataFrame
Columns: Index(['Sex', ' Over60', 'key', 'user\_id', 'Critic0', ' Critic1', 'item\_id',
       'rating'],
      dtype='object')
Index: Int64Index([], dtype='int64') \\
\bottomrule
\end{tabular}



In [8]:
n_latent_features = 2

user_ratings = user_ratings_df.values
latent_user_preferences = np.random.random((user_ratings.shape[0], n_latent_features))
latent_item_features = np.random.random((user_ratings.shape[1],n_latent_features))

user_features = user_features_df.values
item_features = item_features_df.values

print(item_features_df.to_latex())


user_features = np.concatenate([np.ones(shape = (user_features.shape[0],1)), user_features], axis = 1)
item_features = np.concatenate([np.ones(shape = (item_features.shape[0],1)), item_features], axis = 1)



user_features_weights = np.random.random((user_ratings.shape[0], user_features.shape[1] ))
item_features_weights = np.random.random((user_ratings.shape[1],item_features.shape[1] ))



# print user_features

\begin{tabular}{lrrrr}
\toprule
{} &  Critic0 &   Critic1 &  key &  item\_id \\
\midrule
0 &      0.3 &       0.9 &    0 &        0 \\
1 &      0.9 &       0.3 &    0 &        1 \\
2 &      0.6 &       0.4 &    0 &        2 \\
3 &      0.2 &       0.1 &    0 &        3 \\
4 &      0.7 &       0.8 &    0 &        4 \\
\bottomrule
\end{tabular}



In [11]:
def predict_rating(user_id,item_id):
    """ Predict a rating given a user_id and an item_id.
    """
    user_preference = latent_user_preferences[user_id]
    item_preference = latent_item_features[item_id]
    
    user_score = user_features_weights[user_id].dot(user_features[user_id])
    item_score = item_features_weights[item_id].dot(item_features[item_id])
    #print user_preference.dot(item_preference), user_score, item_score
    return user_preference.dot(item_preference) + user_score + item_score

def train(user_id, item_id, rating,alpha = 0.001, 
                                   latent_feature_weight_decay = 0.1, 
                                   user_weight_decay = 0.01,
                                   item_weight_decay = 0.0001):
    
    #print item_id
    prediction_rating = predict_rating(user_id, item_id)
    err =  ( prediction_rating - rating );
    #print err
    user_pref_values = latent_user_preferences[user_id][:]
    latent_user_preferences[user_id] -= alpha * err *  ( latent_item_features[item_id] + latent_feature_weight_decay*latent_user_preferences[user_id])
    latent_item_features[item_id] -= alpha * err * ( user_pref_values + latent_feature_weight_decay*latent_item_features[item_id])
    
    user_features_weights[user_id] -=alpha * err *(  user_features[user_id] + user_weight_decay* user_features_weights[user_id])
    item_features_weights[item_id] -=alpha * err * ( item_features_weights[item_id] + item_weight_decay* item_features_weights[item_id])
    
    
    return err
    


def sgd(iterations = 30000):
    """ Iterate over all users and all items and train for 
        a certain number of iterations
    """
    for iteration in range(0,iterations):
        error = []
        for user_id in range(0,latent_user_preferences.shape[0]):
            for item_id in range(0,latent_item_features.shape[0]):
                rating = user_ratings[user_id][item_id]
                if(not np.isnan(rating)):
                    err = train(user_id,item_id,rating)
                    error.append(err)
    mse = (np.array(error) ** 2).mean()          
    print(mse)

In [12]:
for _ in range(0,10): 
    sgd()

0.28367714939398486
0.27800628884752643
0.2763571860545079
0.2755829072424352
0.27515393078575495
0.2749025918119836
0.2747587613054194
0.2746880821221305
0.2746720251174935
0.27470004908107726


In [13]:
predictions = np.zeros(shape = (latent_user_preferences.shape[0], latent_item_features.shape[0]) )
#print latent_user_preferences
print(user_features_weights)
print(item_features_weights)
for user_id in range(0,latent_user_preferences.shape[0]):
            for item_id in range(0,latent_item_features.shape[0]):
                predictions[user_id,item_id] =  predict_rating(user_id,item_id)

  

[[ 2.61   2.671  0.416  0.479  0.706]
 [-1.241  0.562 -0.7    0.711 -1.336]
 [ 1.257  0.487  0.192  0.022  1.718]
 [ 1.126  0.73   0.484  0.136  1.165]
 [ 0.724  0.16   0.535  0.705  0.35 ]
 [ 0.116  0.839  0.979  0.787 -2.04 ]
 [ 0.477  0.018  0.578  0.574  0.139]
 [ 0.763 -0.042  0.152  0.976  0.306]
 [ 0.305  0.502  0.74   0.905 -0.558]
 [ 0.889  0.116  0.725  0.75  -0.018]]
[[1.879 3.204 1.319 3.577 1.599]
 [0.09  0.064 0.108 0.093 0.082]
 [1.11  1.09  0.281 0.479 0.418]
 [0.554 4.833 1.811 1.695 0.565]
 [0.031 0.028 0.018 0.035 0.008]]


In [15]:
values = [zip(user_ratings[i], predictions[i]) for i in range(0,predictions.shape[0])]
comparison_data = pd.DataFrame(values)
comparison_data.columns = user_ratings_df.columns
comparison_data.applymap(lambda x: "(%2.3f|%2.3f)"%(x[0],x[1]))

Unnamed: 0,The Call of Cthulhu,Frankenstein,Dracula,Neuromancer,Space Odyssey
0,(8.000|7.871),(2.000|2.404),(nan|17.229),(5.000|4.729),(4.000|3.997)
1,(3.000|2.914),(2.000|2.276),(nan|-22.008),(7.000|6.813),(7.000|6.998)
2,(9.000|8.747),(nan|5.027),(7.000|7.043),(8.000|8.181),(5.000|5.022)
3,(nan|8.973),(nan|5.002),(7.000|7.000),(8.000|7.999),(9.000|9.000)
4,(nan|5.491),(1.000|0.688),(8.000|8.014),(3.000|3.274),(7.000|7.010)
5,(2.000|2.010),(3.000|2.991),(5.000|4.999),(nan|9.079),(nan|-71.163)
6,(4.000|4.546),(2.000|0.286),(nan|3.194),(2.000|3.120),(7.000|7.011)
7,(7.000|6.515),(1.000|2.752),(2.000|2.038),(7.000|5.822),(9.000|8.983)
8,(3.000|3.125),(3.000|2.610),(nan|-20.499),(7.000|7.249),(3.000|3.002)
9,(4.000|4.264),(nan|-0.030),(5.000|4.975),(3.000|2.826),(3.000|2.979)


In [16]:
comparison_data

Unnamed: 0,The Call of Cthulhu,Frankenstein,Dracula,Neuromancer,Space Odyssey
0,"(8.0, 7.870882868944827)","(2.0, 2.4037355534583913)","(nan, 17.228672080328376)","(5.0, 4.729418151082328)","(4.0, 3.997301581843029)"
1,"(3.0, 2.914057615244747)","(2.0, 2.2762903086537816)","(nan, -22.008230829411747)","(7.0, 6.813458287264119)","(7.0, 6.998275736558438)"
2,"(9.0, 8.746599583196783)","(nan, 5.027423867494672)","(7.0, 7.04325223287071)","(8.0, 8.181233914499426)","(5.0, 5.021871299876603)"
3,"(nan, 8.972896330410045)","(nan, 5.002383526416432)","(7.0, 7.000086237167638)","(8.0, 7.998866397531094)","(9.0, 9.000037561315283)"
4,"(nan, 5.490954419448839)","(1.0, 0.6881610197427666)","(8.0, 8.014269474475887)","(3.0, 3.274491260133513)","(7.0, 7.010423599044714)"
5,"(2.0, 2.0103807327342196)","(3.0, 2.9906673301561697)","(5.0, 4.998627995999344)","(nan, 9.079024667902399)","(nan, -71.16270875434697)"
6,"(4.0, 4.546281700308219)","(2.0, 0.2858722679965226)","(nan, 3.1943461185765503)","(2.0, 3.1200527805797815)","(7.0, 7.01077431087073)"
7,"(7.0, 6.515144510545583)","(1.0, 2.7515978314391907)","(2.0, 2.038352362235766)","(7.0, 5.821571026364966)","(9.0, 8.9828286543988)"
8,"(3.0, 3.1246414840339396)","(3.0, 2.6102026149523723)","(nan, -20.499408147266248)","(7.0, 7.248550531712029)","(3.0, 3.002326373622121)"
9,"(4.0, 4.264167385445144)","(nan, -0.03042866205909084)","(5.0, 4.975424612400049)","(3.0, 2.8255393511415177)","(3.0, 2.9789699614987706)"


In [18]:
d = comparison_data.to_latex()
text_file = open("comparison2.txt", "w")
text_file.write(d)
text_file.close()