In [1]:
%matplotlib inline

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models
import torchvision
import math
from torch.autograd import Variable
import copy
import torchvision.transforms as transforms

In [3]:
import pandas as pd 
import numpy as np

In [4]:
rating_dir = './RCdata/rating_final.csv'
rest_profile_dir = './RCdata/geoplaces2.csv'
rest_cuisine_dir = './RCdata/chefmozcuisine.csv'
rest_payment_dir = './RCdata/chefmozaccepts.csv'
rest_parking_dir = './RCdata/chefmozparking.csv'
user_cuisine_dir = './RCdata/usercuisine.csv'
user_payment_dir = './RCdata/userpayment.csv'
user_profile_dir = './RCdata/userprofile.csv'

In [5]:
rest_profile = pd.read_csv(rest_profile_dir, encoding = 'cp1252')
rest_cuisine = pd.read_csv(rest_cuisine_dir) #encoding = 'cp1252')
rest_payment = pd.read_csv(rest_payment_dir)
rest_parking = pd.read_csv(rest_parking_dir)

In [6]:
user_profile = pd.read_csv(user_profile_dir)
user_payment = pd.read_csv(user_payment_dir)
user_cuisine = pd.read_csv(user_cuisine_dir)

In [7]:
n_users = len(user_profile.userID)
print (n_users)

138


## Get Contexts

In [8]:
n_rests = len(rest_profile.placeID)
print (n_rests)

130


In [9]:
rests = np.array(rest_profile.placeID)

In [10]:
cuisine_data = [0 for i in range(n_rests)]

In [11]:
for i in range(len(cuisine_data)):
    cuisines = rest_cuisine[rest_cuisine['placeID'] == rests[i]]['Rcuisine']
    this_one = ''
    if len(cuisines)>0: 
        for j in range(len(cuisines)):
            this_one = this_one + cuisines.iloc[j] + ' '        
    cuisine_data[i] = this_one

In [12]:
rest_profile.loc[:,'Cuisine'] = pd.Series(cuisine_data, index=rest_profile.index)

In [13]:
payment_data = [0 for i in range(n_rests)]

In [14]:
for i in range(len(payment_data)):
    payment = rest_payment[rest_payment['placeID'] == rests[i]]['Rpayment']
    this_one = ''
    if len(payment)>0: 
        for j in range(len(payment)):
            this_one = this_one + payment.iloc[j] + ' '        
    payment_data[i] = this_one

In [15]:
rest_profile.loc[:,'Payment'] = pd.Series(payment_data, index=rest_profile.index)

In [16]:
parking_data = [0 for i in range(n_rests)]

In [17]:
for i in range(len(parking_data)):
    parking = rest_parking[rest_parking['placeID'] == rests[i]]['parking_lot']
    this_one = ''
    if len(parking)>0: 
        for j in range(len(parking)):
            this_one = this_one + parking.iloc[j] + ' '        
    parking_data[i] = this_one

In [18]:
rest_profile.loc[:,'Parking'] = pd.Series(parking_data, index=rest_profile.index)

In [19]:
rest_profile.columns

Index(['placeID', 'latitude', 'longitude', 'the_geom_meter', 'name', 'address',
       'city', 'state', 'country', 'fax', 'zip', 'alcohol', 'smoking_area',
       'dress_code', 'accessibility', 'price', 'url', 'Rambience', 'franchise',
       'area', 'other_services', 'Cuisine', 'Payment', 'Parking'],
      dtype='object')

In [20]:
rest_profile

Unnamed: 0,placeID,latitude,longitude,the_geom_meter,name,address,city,state,country,fax,...,accessibility,price,url,Rambience,franchise,area,other_services,Cuisine,Payment,Parking
0,134999,18.915421,-99.184871,0101000020957F000088568DE356715AC138C0A525FC46...,Kiku Cuernavaca,Revolucion,Cuernavaca,Morelos,Mexico,?,...,no_accessibility,medium,kikucuernavaca.com.mx,familiar,f,closed,none,Japanese,,none
1,132825,22.147392,-100.983092,0101000020957F00001AD016568C4858C1243261274BA5...,puesto de tacos,esquina santos degollado y leon guzman,s.l.p.,s.l.p.,mexico,?,...,completely,low,?,familiar,f,open,none,Mexican,cash,none
2,135106,22.149709,-100.976093,0101000020957F0000649D6F21634858C119AE9BF528A3...,El Rincón de San Francisco,Universidad 169,San Luis Potosi,San Luis Potosi,Mexico,?,...,partially,medium,?,familiar,f,open,none,Mexican,cash VISA MasterCard-Eurocard,none
3,132667,23.752697,-99.163359,0101000020957F00005D67BCDDED8157C1222A2DC8D84D...,little pizza Emilio Portes Gil,calle emilio portes gil,victoria,tamaulipas,?,?,...,completely,low,?,familiar,t,closed,none,Armenian,cash,none
4,132613,23.752903,-99.165076,0101000020957F00008EBA2D06DC8157C194E03B7B504E...,carnitas_mata,lic. Emilio portes gil,victoria,Tamaulipas,Mexico,?,...,completely,medium,?,familiar,t,closed,none,Mexican,cash,yes
5,135040,22.135617,-100.969709,0101000020957F00001B552189B84A58C15A2AAEFD2CA2...,Restaurant los Compadres,Camino a Simon Diaz 155 Centro,San Luis Potosi,SLP,Mexico,?,...,no_accessibility,high,?,familiar,f,closed,none,,cash VISA MasterCard-Eurocard,yes
6,132732,23.754357,-99.171288,0101000020957F00008A20E615808157C16272FECBF84F...,Taqueria EL amigo,Calle Mezquite Fracc Framboyanes,Cd Victoria,Tamaulipas,Mexico,?,...,completely,low,?,familiar,f,open,none,Mexican,cash,none
7,132875,22.149901,-100.993779,0101000020957F00008A2A0747DE4758C11EB31D2A31A8...,shi ro ie,?,?,?,?,?,...,no_accessibility,high,?,familiar,t,open,Internet,Japanese,cash VISA MasterCard-Eurocard American_Express...,valet parking
8,132609,23.760268,-99.165865,0101000020957F0000A478418BBA8057C133851EB22C4E...,Pollo_Frito_Buenos_Aires,tampico,victoria,Tamaulipas,Mexico,?,...,completely,low,?,quiet,t,closed,none,Fast_Food,cash,yes
9,135082,22.151448,-100.915099,0101000020957F0000A29FAF95CD4958C1FEEEBB73A991...,la Estrella de Dimas,Villa de Pozos 192 Villa de Pozos,San Luis Potosi,SLP,Mexico,?,...,no_accessibility,medium,?,familiar,f,closed,none,,cash,none


In [21]:
user_data = user_profile
users = np.array(user_data.userID)

In [22]:
cuisine_data = [0 for i in range(n_users)]

In [23]:
for i in range(len(cuisine_data)):
    cuisines = user_cuisine[user_cuisine['userID'] == users[i]]['Rcuisine']
    this_one = ''
    if len(cuisines)>0: 
        for j in range(len(cuisines)):
            this_one = this_one + cuisines.iloc[j] + ' '        
    cuisine_data[i] = this_one

In [24]:
user_data.loc[:,'Cuisine'] = pd.Series(cuisine_data, index=user_data.index)

In [25]:
payment_data = [0 for i in range(n_users)]

In [26]:
for i in range(len(payment_data)):
    payment = user_payment[user_payment['userID'] == users[i]]['Upayment']
    this_one = ''
    if len(payment)>0: 
        for j in range(len(payment)):
            this_one = this_one + payment.iloc[j] + ' '        
    payment_data[i] = this_one

In [27]:
user_data.loc[:,'Payment'] = pd.Series(payment_data, index=user_data.index)

In [28]:
user_data.columns

Index(['userID', 'latitude', 'longitude', 'smoker', 'drink_level',
       'dress_preference', 'ambience', 'transport', 'marital_status', 'hijos',
       'birth_year', 'interest', 'personality', 'religion', 'activity',
       'color', 'weight', 'budget', 'height', 'Cuisine', 'Payment'],
      dtype='object')

In [29]:
user_data

Unnamed: 0,userID,latitude,longitude,smoker,drink_level,dress_preference,ambience,transport,marital_status,hijos,...,interest,personality,religion,activity,color,weight,budget,height,Cuisine,Payment
0,U1001,22.139997,-100.978803,false,abstemious,informal,family,on foot,single,independent,...,variety,thrifty-protector,none,student,black,69,medium,1.77,American,cash
1,U1002,22.150087,-100.983325,false,abstemious,informal,family,public,single,independent,...,technology,hunter-ostentatious,Catholic,student,red,40,low,1.87,Mexican,cash
2,U1003,22.119847,-100.946527,false,social drinker,formal,family,public,single,independent,...,none,hard-worker,Catholic,student,blue,60,low,1.69,Mexican,cash
3,U1004,18.867000,-99.183000,false,abstemious,informal,family,public,single,independent,...,variety,hard-worker,none,professional,green,44,medium,1.53,Bakery Breakfast-Brunch Japanese Contemporary ...,cash bank_debit_cards
4,U1005,22.183477,-100.959891,false,abstemious,no preference,family,public,single,independent,...,none,thrifty-protector,Catholic,student,black,65,medium,1.69,American,cash
5,U1006,22.150000,-100.983000,true,social drinker,no preference,friends,car owner,single,independent,...,variety,hard-worker,none,student,blue,75,medium,1.80,Mexican,cash
6,U1007,22.118464,-100.938256,false,casual drinker,informal,solitary,public,single,independent,...,variety,thrifty-protector,Catholic,student,purple,60,low,1.59,Family,cash
7,U1008,22.122989,-100.923811,false,social drinker,formal,solitary,public,single,independent,...,technology,hard-worker,Catholic,student,green,68,low,1.72,Cafe-Coffee_Shop Mexican Juice Hawaiian Hot_Do...,cash
8,U1009,22.159427,-100.990448,false,abstemious,formal,family,on foot,single,kids,...,variety,thrifty-protector,Catholic,student,green,75,medium,1.78,Diner Fast_Food Family Cafe-Coffee_Shop Deli-S...,cash
9,U1010,22.190889,-100.998669,false,social drinker,no preference,friends,car owner,married,kids,...,technology,hard-worker,none,student,green,40,medium,1.67,Mexican,cash


In [30]:
def get_context(user, rest, user_profile, rest_profile):
    this_user = user_profile[user_profile['userID']==user]
    this_rest = rest_profile[rest_profile['placeID'] == rest]
    
    #print (this_user)
    #print (this_rest)
    contexts = 0
    
    if this_user['smoker'].item() == 'true' and (this_rest['smoking_area'].item() == 'section'or this_rest['smoking_area'].item() == 'permitted' or this_rest['smoking_area'].item() == 'only at bar'):
        contexts = contexts + 1
    
    if (this_user['drink_level'].item() == 'casual drinker' or this_user['drink_level'].item() == 'social_drinker') and this_rest['alcohol'].item() == 'Wine-Beer' or this_rest['alcohol'].item() == 'Full_Bar':
        contexts = contexts + 1 
    
    if (this_user['dress_preference'].item() == 'formal' or this_user['dress_preference'].item() == 'elegant') and (this_rest['dress_code'].item() == 'formal' or this_rest['Rambience'].item() == 'quiet' or this_rest['franchise'].item()== 'f'):
        contexts = contexts + 1 
        
    if (this_user['dress_preference'].item() == 'informal') and (this_rest['dress_code'].item() == 'informal' or this_rest['dress_code'].item() == 'casual'):
        contexts = contexts + 1 
    
    if (this_user['ambience'].item() == 'family' or this_user['ambience'].item() == 'friends') and (this_rest['Rambience'].item() == 'familiar'):
        contexts = contexts + 1 
    
    if (this_user['ambience'].item() == 'solitary') and (this_rest['Rambience'].item() == 'quiet'):
        contexts = contexts + 1
    
    if (this_user['transport'].item() == 'car owner') and (this_rest['Parking'].item() == 'yes' or this_rest['Parking'].item() == 'public' or this_rest['Parking'].item() == 'valet parking'):
        contexts = contexts + 1 
        
    if (this_user['activity'].item() == 'student' or this_user['activity'].item() == 'working class') and (this_rest['price'].item() == 'low' or this_rest['price'].item() == 'medium'):
        contexts = contexts + 1 
    
    if (this_user['activity'].item() == 'unemployed') and (this_rest['price'].item() == 'low'):
        contexts = contexts + 1 
    
    if (this_user['activity'].item() == 'professional') and (this_rest['price'].item() == 'medium' or this_rest['price'].item() == 'high'):
        contexts = contexts + 1 
    
    if (this_user['budget'].item() == 'high') and (this_rest['price'].item() == 'high'):
        contexts = contexts + 1 
    
    if (this_user['budget'].item() == 'medium') and (this_rest['price'].item() == 'medium'):
        contexts = contexts + 1 
    
    if (this_user['budget'].item() == 'low') and (this_rest['price'].item() == 'low' or this_rest['franchise'].item() == 't'):
        contexts + contexts + 1 
        
    this_user_cuisines = this_user['Cuisine'].item().split(' ')
    this_rest_cuisines = this_rest['Cuisine'].item()
    
    this_user_payment = this_user['Payment'].item().split(' ')
    this_rest_payment = this_rest['Payment'].item()
    
    for j in range(len(this_user_cuisines)-1):
        if this_user_cuisines[j] in this_rest_cuisines:
            contexts = contexts + 1
    
    for m in range(len(this_user_payment)-1):
        if this_user_payment[m] in this_rest_payment:
            contexts = contexts + 1 
        
    return contexts
      

In [31]:
from scipy.sparse import rand as sprand
from scipy.sparse import lil_matrix

In [32]:
n_users = len(user_data.userID.unique())
n_items = len(rest_profile.placeID.unique())
print (n_users)
print (n_items)

138
130


In [33]:
user_list = np.array(user_data.userID.unique())
item_list = np.array(rest_profile.placeID.unique())

In [34]:
contexts_full = lil_matrix((n_users, n_items), dtype = float)
for i in range(len(user_list)):
    for j in range(len(item_list)):
        contexts_full[i, j] = get_context(user_list[i], item_list[j], user_data, rest_profile)

In [35]:
contexts_norm = lil_matrix((n_users, n_items), dtype = float)
for i in range(len(user_list)):
    for j in range(len(item_list)):
        contexts_norm[i, j] = (get_context(user_list[i], item_list[j], user_data, rest_profile))/11.0

## Standardizing the user profile

In [36]:
user_profile = pd.read_csv(user_profile_dir)

In [37]:
users = np.array(user_profile.userID)
cuisine_types = np.array(user_cuisine.Rcuisine.unique())

In [38]:
len(cuisine_types)

103

In [39]:
cuisine_data = [[0 for i in range(len(cuisine_types)) ] for j in range(len(users))]

In [40]:
for i in range(len(cuisine_data)):
    cuisines = user_cuisine[user_cuisine['userID'] == users[i]]['Rcuisine']
    if len(cuisines) > 0:
        for j in range(len(cuisines)):
            this_cuisine = cuisines.iloc[j]
            here, = np.where(cuisine_types == this_cuisine)
            cuisine_data[i][here[0]] = 1 

In [41]:
#create a pandas dataframe with the cuisines
cuisinedf = pd.DataFrame(data = cuisine_data, columns = cuisine_types)

In [42]:
user_profile = pd.concat([user_profile, cuisinedf], axis=1)

In [43]:
payment_types = np.array(user_payment.Upayment.unique())
len(payment_types)

5

In [44]:
payment_data = [[0 for i in range(len(payment_types))] for j in range(len(users))]

In [45]:
for i in range(len(payment_data)):
    payment = user_payment[user_payment['userID'] == users[i]]['Upayment']
    if len(payment)>0:
        for j in range(len(payment)):
            this_payment = payment.iloc[j]
            here, = np.where(payment_types == this_payment)
            payment_data[i][here[0]] = 1

In [46]:
paymentdf = pd.DataFrame(data = payment_data, columns = payment_types)

In [47]:
user_profile = pd.concat([user_profile, paymentdf], axis =1)

In [127]:
len(user_profile.columns)

152

In [48]:
user_columns = user_profile.columns[:30]
print (user_columns)

Index(['userID', 'latitude', 'longitude', 'smoker', 'drink_level',
       'dress_preference', 'ambience', 'transport', 'marital_status', 'hijos',
       'birth_year', 'interest', 'personality', 'religion', 'activity',
       'color', 'weight', 'budget', 'height', 'American', 'Mexican', 'Bakery',
       'Breakfast-Brunch', 'Japanese', 'Contemporary', 'Bagels',
       'Cafe-Coffee_Shop', 'Continental-European', 'Cafeteria', 'Family'],
      dtype='object')


In [49]:
#replace smoking
user_profile['smoker'] = user_profile['smoker'].replace(['false', '?'], 0)
user_profile['smoker'] = user_profile['smoker'].replace(['true'], 1)

In [50]:
#replace hijos 
user_profile['hijos'] = user_profile['hijos'].replace(['independent', '?'], 0)
user_profile['hijos'] = user_profile['hijos'].replace(['kids', 'dependent'], 1)

In [51]:
#replace married
user_profile['marital_status'] = user_profile['marital_status'].replace(['single', '?', 'widow'], 0)
user_profile['marital_status'] = user_profile['marital_status'].replace(['married'], 1)

In [52]:
#drop latitude, longitude, weight, height, color
#encode age? 

In [53]:
user_profile= pd.get_dummies(user_profile, columns=["drink_level", "dress_preference","ambience", "transport", "interest", "personality", 
                                     "religion", "activity", "budget"])

In [54]:
user_profile = user_profile.drop(['latitude', 'longitude', 'weight', 'height', 'color'], axis = 1)

## Calculating Similarity Matrices Among Users - Euclidean and Pearson

In [55]:
user_matrix = user_profile.drop(['userID'], axis = 1)

In [56]:
user_matrix_val = user_matrix.values

In [57]:
euclidean_mat = [[0 for i in range(len(users)) ] for j in range(len(users))]

In [58]:
#find the euclidean distance between users
for i in range(len(users)):
    for j in range(len(users)):
        if i==j:
            euclidean_mat[i][j] = 0
        else:
            a = user_matrix_val[i]
            b = user_matrix_val[j]
            euclidean_mat[i][j] = -1*np.linalg.norm(a-b)

In [59]:
#sort and get the indicies of top? 
sorted_euclidean = np.argsort(euclidean_mat, axis=1)

In [60]:
neighbors = 15

In [61]:
top_e = sorted_euclidean[:,:neighbors]
top_e

array([[105, 121,  23, ..., 134,  50,  82],
       [ 23, 121, 105, ...,  50, 134,  82],
       [121, 105,  23, ...,  50, 134,  82],
       ...,
       [121,  23, 105, ...,  50, 134,  82],
       [ 23, 121, 105, ...,  50, 134,  82],
       [121,  23, 105, ...,  50, 134,  82]])

In [62]:
import scipy.stats

In [63]:
pearson_mat = [[0 for i in range(len(users)) ] for j in range(len(users))]

In [64]:
for i in range(len(users)):
    for j in range(len(users)):
        if i==j:
            pearson_mat[i][j] = 0
        else:
            a = user_matrix_val[i]
            b = user_matrix_val[j]
            pearson_mat[i][j] = -1*scipy.stats.pearsonr(a,b)[0]

In [65]:
sorted_pearson = np.argsort(pearson_mat, axis=1)

In [66]:
top_p = sorted_pearson[:,:neighbors]
top_p

array([[109,   4,  46, ...,  86,  49,  27],
       [ 25,  65,  80, ..., 118,  53,  93],
       [136,  97,  35, ...,  31,  55,  87],
       ...,
       [ 26,  79,  37, ...,  22,  24, 130],
       [  2,  95,  53, ..., 108,  31,  62],
       [117,  39,  53, ..., 118,  69,  99]])

## Get the Ratings + the new Ratings based on the weighted average of "close" people 

In [67]:
ratings = pd.read_csv(rating_dir)

In [68]:
from sklearn.model_selection import train_test_split
rating_train, rating_test = train_test_split(ratings, test_size=0.3)   

In [96]:
interactions_full_orig = lil_matrix((n_users, n_items), dtype = float)
interactions_full_SIM_P = lil_matrix((n_users, n_items), dtype = float)
interactions_full_SIM_E = lil_matrix((n_users, n_items), dtype = float)
for row in ratings.itertuples():
    user, = np.where(user_list == row[1])
    item, = np.where(item_list == row[2])
    interactions_full_orig[user[0], item[0]] = row[3] 
    interactions_full_SIM_P[user[0], item[0]] = row[3]
    interactions_full_SIM_E[user[0], item[0]] = row[3]

In [70]:
interactions_train_orig = lil_matrix((n_users, n_items), dtype = float)
interactions_train_SIM_E = lil_matrix((n_users, n_items), dtype = float)
interactions_train_SIM_P = lil_matrix((n_users, n_items), dtype = float)
for row in rating_train.itertuples():
    user, = np.where(user_list == row[1])
    item, = np.where(item_list == row[2])
    interactions_train_orig[user[0], item[0]] = row[3]
    interactions_train_SIM_E[user[0], item[0]] = row[3]
    interactions_train_SIM_P[user[0], item[0]] = row[3]

In [71]:
interactions_test_orig = lil_matrix((n_users, n_items), dtype = float)
interactions_test_SIM_E = lil_matrix((n_users, n_items), dtype = float)
interactions_test_SIM_P = lil_matrix((n_users, n_items), dtype = float)
for row in rating_test.itertuples():
    user, = np.where(user_list == row[1])
    item, = np.where(item_list == row[2])
    interactions_test_orig[user[0], item[0]] = row[3]
    interactions_test_SIM_E[user[0], item[0]] = row[3]
    interactions_test_SIM_P[user[0], item[0]] = row[3]

In [72]:
rat_train_dense = interactions_train_orig.todense()
rat_test_dense = interactions_test_orig.todense()
rat_full_dense = interactions_full_orig.todense()

In [73]:
for i in range(interactions_train_orig.shape[0]):
    top = top_e[i]
    
    for m in range(n_items):
        if m not in interactions_train_orig[0].rows[0]:
            other = interactions_train_orig[j]
            total = 0.0
            count = 0.0
            for j in range(neighbors):
                if m in interactions_train_orig[top[j]].rows[0]:
                    total = total + (rat_train_dense[top[j],m])* -1* (euclidean_mat[top[j]][m])
                    count = count + (-1* (euclidean_mat[top[j]][m]))
        
            if count == 0:
                nm = 0
            else:
                nm = total/count
            
            interactions_train_SIM_E[i,m] = nm

In [74]:
for i in range(interactions_test_orig.shape[0]):
    top = top_e[i]
    
    for m in range(n_items):
        if m not in interactions_test_orig[0].rows[0]:
            other = interactions_test_orig[j]
            total = 0.0
            count = 0.0
            for j in range(neighbors):
                if m in interactions_test_orig[top[j]].rows[0]:
                    total = total + (rat_test_dense[top[j],m])* -1* (euclidean_mat[top[j]][m])
                    count = count + 1 * -1* (euclidean_mat[top[j]][m])
        
            if count == 0:
                nm = 0
            else:
                nm = total/count
            
            interactions_test_SIM_E[i,m] = nm

In [75]:
for i in range(interactions_train_orig.shape[0]):
    top = top_p[i]
    
    for m in range(n_items):
        if m not in interactions_train_orig[0].rows[0]:
            other = interactions_train_orig[j]
            total = 0.0
            count = 0.0
            for j in range(neighbors):
                if m in interactions_train_orig[top[j]].rows[0]:
                    total = total + (rat_train_dense[top[j],m])* -1* (pearson_mat[top[j]][m])
                    count = count + 1*( -1* (pearson_mat[top[j]][m]))
        
            if count == 0:
                nm = 0
            else:
                nm = total/count
            
            interactions_train_SIM_P[i,m] = nm

In [76]:
for i in range(interactions_test_orig.shape[0]):
    top = top_p[i]
    
    for m in range(n_items):
        if m not in interactions_test_orig[0].rows[0]:
            other = interactions_test_orig[j]
            total = 0.0
            count = 0.0
            for j in range(neighbors):
                if m in interactions_test_orig[top[j]].rows[0]:
                    total = total + (rat_test_dense[top[j],m])* -1* (pearson_mat[top[j]][m])
                    count = count + 1*( -1* (pearson_mat[top[j]][m]))
        
            if count == 0:
                nm = 0
            else:
                nm = total/count
            
            interactions_test_SIM_P[i,m] = nm

In [77]:
for i in range(interactions_full_orig.shape[0]):
    top = top_p[i]
    
    for m in range(n_items):
        if m not in interactions_full_orig[0].rows[0]:
            other = interactions_full_orig[j]
            total = 0.0
            count = 0.0
            for j in range(neighbors):
                if m in interactions_full_orig[top[j]].rows[0]:
                    total = total + (rat_full_dense[top[j],m])* -1* (pearson_mat[top[j]][m])
                    count = count + 1*( -1* (pearson_mat[top[j]][m]))
        
            if count == 0:
                nm = 0
            else:
                nm = total/count
            
            interactions_full_SIM_P[i,m] = nm

In [97]:
for i in range(interactions_full_orig.shape[0]):
    top = top_p[i]
    
    for m in range(n_items):
        if m not in interactions_full_orig[0].rows[0]:
            other = interactions_full_orig[j]
            total = 0.0
            count = 0.0
            for j in range(neighbors):
                if m in interactions_full_orig[top[j]].rows[0]:
                    total = total + (rat_full_dense[top[j],m])* -1* (euclidean_mat[top[j]][m])
                    count = count + 1*( -1* (euclidean_mat[top[j]][m]))
        
            if count == 0:
                nm = 0
            else:
                nm = total/count
            
            interactions_full_SIM_E[i,m] = nm

## Begin the Model

In [78]:
class MatrixFactorization(torch.nn.Module):
    
    def __init__(self, n_users, n_items, n_factors=5):
        super().__init__()
        self.user_factors = torch.nn.Embedding(n_users,
                                               n_factors,
                                               sparse=False)
        self.item_factors = torch.nn.Embedding(n_items,n_factors,sparse=False)
                  
    
    def forward(self, users, items, context):
        #return (self.user_factors(user) * self.item_factors(item)).sum(1)
    
        orig = torch.mm(self.user_factors(users),torch.transpose(self.item_factors(items),0,1))
        
        #here = torch.from_numpy(context)
        #context = torch.mm(self.user_context(users), torch.transpose(self.item_context(items),0,1))
        
        #fused_context = context * self.fusion.expand_as(context)
        
        mat = torch.add(orig, context)
        return mat
        #return torch.mm(self.user_factors(users),torch.transpose(self.item_factors(items),0,1))

In [79]:
class BiasedMatrixFactorization(torch.nn.Module):
    
    def __init__(self, n_users, n_items,mean, n_factors=5):
        super().__init__()
        self.user_factors = torch.nn.Embedding(n_users,
                                               n_factors,
                                               sparse=False)
        self.item_factors = torch.nn.Embedding(n_items, n_factors,sparse=False)
        self.user_biases = torch.nn.Embedding(n_users, 1, sparse = False)
        
        self.item_biases = torch.nn.Embedding(n_items,1, sparse = False)
                                               
        self.mu = mean
    
    
    def forward(self, users, items, context):
        
        item_means = []
        user_means = []
        for i in range(len(users)):
            item_means.append(torch.t(self.item_biases(items)))
            
        for j in range(len(items)):
            user_means.append(torch.t(self.user_biases(users)))
        
        it_means = torch.cat(item_means, 0)
        us_means = torch.cat(user_means, 0)
        us_means = torch.t(us_means)
        
        orig = torch.mm(self.user_factors(users),torch.transpose(self.item_factors(items),0,1))
        
        y = torch.add(orig, it_means)
        
        f = torch.add(y,us_means)
        
        result = torch.add(f,self.mu)
        
        mat = torch.add(result, context)
        
        return mat

In [80]:
def get_batch(batch_size,ratings):
    # Sort our data and scramble it
    rows, cols = ratings.shape
    p = np.random.permutation(rows)
    
    # create batches
    sindex = 0
    eindex = batch_size
    while eindex < rows:
        batch = p[sindex:eindex]
        temp = eindex
        eindex = eindex + batch_size
        sindex = temp
        yield batch

    if eindex >= rows:
        batch = range(sindex,rows)
        yield batch

In [81]:
def load_model(model,load_path):
    load_dict = torch.load(load_path)
    val_loss = load_dict['val_loss']
    model.load_state_dict(load_dict['model_state_dict'])


def checkpoint_model(val_loss, model,save_path):
    save_dict = dict(
                     val_loss=val_loss,
                     model_state_dict=model.state_dict())
                     #opt_state_dict=predictions.state_dict())
    torch.save(save_dict, save_path)

In [82]:
def run_validation(model, test_ratings_SIM, test_ratings, BATCH_SIZE, load_path):
    load_model(model, load_path)
    running_loss = 0.0
    loss_func = torch.nn.MSELoss()
    accur = 0
    for i,batch in enumerate(get_batch(BATCH_SIZE, test_ratings_SIM)):
        
        interactions = Variable(torch.FloatTensor(test_ratings_SIM[batch, :].toarray()))
        rows = Variable(torch.LongTensor(batch))
        cols = Variable(torch.LongTensor(np.arange(test_ratings_SIM.shape[1])))
        
        predictions = model(rows, cols)
        loss = loss_func(predictions, interactions)
        
        running_loss += np.sqrt(loss.data[0])*BATCH_SIZE
        
        correct = get_accuracy(test_ratings_SIM,test_ratings, model, rows)
        accur = accur+ correct
    
    epoch_loss = running_loss/test_ratings_SIM.shape[0]
    return epoch_loss,accur

In [83]:
def run_validation_context(model, test_ratings_SIM, test_ratings, context_mat, fusion, BATCH_SIZE, load_path):
    load_model(model, load_path)
    running_loss = 0.0
    loss_func = torch.nn.MSELoss()
    accur = 0
    for i,batch in enumerate(get_batch(BATCH_SIZE, test_ratings_SIM)):
        
        interactions = Variable(torch.FloatTensor(test_ratings_SIM[batch, :].toarray()))
        rows = Variable(torch.LongTensor(batch))
        cols = Variable(torch.LongTensor(np.arange(test_ratings_SIM.shape[1])))
        
        first = (context_mat[batch,:].toarray())*fusion
        contexts = Variable(torch.FloatTensor(first))
        
        
        predictions = model(rows, cols, contexts)
        loss = loss_func(predictions, interactions)
        
        running_loss += np.sqrt(loss.data[0])*BATCH_SIZE
        
        correct = get_accuracy_context(test_ratings_SIM,test_ratings, model, rows, fusion, context_mat)
        accur = accur+ correct
    
    epoch_loss = running_loss/test_ratings_SIM.shape[0]
    return epoch_loss,accur

In [84]:
def get_accuracy(interactions_SIM, interactions_orig,model,rows):
    
    checks = np.nonzero(interactions_orig)
    check_these = checks[0]
    
    batch = range(interactions_SIM.shape[0])
    interactions = Variable(torch.FloatTensor(interactions_SIM[batch,:].toarray()))
    users = Variable(torch.LongTensor(batch))
    cols = Variable(torch.LongTensor(np.arange(interactions_SIM.shape[1])))
    predictions = model(users, cols)
    
    x = predictions.data.numpy()
    
    correct = 0
    
    for u in range(len(rows)):
        user = rows.data[u]
        where = np.where(check_these==user)
        index = where[0]
        if len(index) > 0:
            for c in range(len(index)):
                rest = checks[1][index[c]]
                prediction = predictions[u][rest]
                   
                val = prediction.data[0]
                
                if val <= np.percentile(x,22):
                    this_val = 0.0
                elif np.percentile(x,22) < val < np.percentile(x, 58):
                    this_val = 1.0
                elif val >= np.percentile(x, 58):
                     this_val= 2.0
        
                
                if this_val == interactions_orig[user,rest]:
                    correct = correct + 1 
            else:
                next
    return correct

In [85]:
def get_accuracy_context(interactions_SIM, interactions_orig,model,rows,fusion, contexts_full):
    
    checks = np.nonzero(interactions_orig)
    check_these = checks[0]
    
    batch = range(interactions_SIM.shape[0])
    interactions = Variable(torch.FloatTensor(interactions_SIM[batch,:].toarray()))
    users = Variable(torch.LongTensor(batch))
    cols = Variable(torch.LongTensor(np.arange(interactions_SIM.shape[1])))
    
    first = (contexts_full[batch,:].toarray())*fusion
    contexts = Variable(torch.FloatTensor(first))
    
    predictions = model(users, cols, contexts)
    
    x = predictions.data.numpy()
    
    correct = 0
    
    for u in range(len(rows)):
        user = rows.data[u]
        where = np.where(check_these==user)
        index = where[0]
        if len(index) > 0:
            for c in range(len(index)):
                rest = checks[1][index[c]]
                prediction = predictions[u][rest]
                   
                val = prediction.data[0]
                
                if val <= np.percentile(x,22):
                    this_val = 0.0
                elif np.percentile(x,22) < val < np.percentile(x, 58):
                    this_val = 1.0
                elif val >= np.percentile(x, 58):
                     this_val= 2.0
        
                
                if this_val == interactions_orig[user,rest]:
                    correct = correct + 1 
            else:
                next
    return correct

In [86]:
def generate_predictions(model, contexts_full, interactions_sim, interactions_full, fusion, load_path):
    load_model(model, load_path)

    batch = range(interactions_sim.shape[0])
    interactions = Variable(torch.FloatTensor(interactions_sim[batch,:].toarray()))
    rows = Variable(torch.LongTensor(batch))
    cols = Variable(torch.LongTensor(np.arange(interactions_sim.shape[1])))
    first = (contexts_full[batch,:].toarray())*fusion
        
    #contexts = Variable(torch.FloatTensor(contexts_full[batch,:].toarray()))
    contexts = Variable(torch.FloatTensor(first))
        
    predictions = model(rows, cols, contexts)
    
    interactions2 = Variable(torch.FloatTensor(interactions_full[batch,:].toarray()))
    
    predictions[interactions2!=0] = 0
        
    val, index = torch.sort(predictions,1, descending = True)
        
    top5 = index[:,:5]
        
    return top5

In [87]:
def give_top5_cuisines(recs, name_list, cuisine_list, user_list, rest_list):
    your_rests = [[0 for j in range(11)] for u in range(len(user_list))]
    for i in range(len(user_list)):
        user, = np.where(user_list == user_list[i])
        your_rests[i][0] = user_list[i]
        count = 1
        for r in range(5):
            this_rest = recs[user[0]][r].data[0]
            rest = rest_list[this_rest]
            x = name_list.loc[name_list['placeID'] == rest]
            name = x['name'].item()
            
            your_rests[i][count] = name
            count = count +1
            
            y = cuisine_list.loc[cuisine_list['placeID'] == rest]['Rcuisine']
            
            cuisine_types = ''
            for m in y:
                cuisine_types = cuisine_types + m + ', '
            
            your_rests[i][count] = cuisine_types
            count = count + 1
            
    return your_rests

In [88]:
def give_number_of_contexts(recs, context, this_user,user_list, rest_list):
   
    contexts = [0 for i in range(5)]
    
    user, = np.where(this_user == user_list)
    print (user)
    
    for u in range(5):
        this_rest = recs[user[0]][u].data[0]
        contexts[u] = context[user[0], this_rest]
    
    return contexts

In [89]:
batch_size = 10
save_path = './project_models/model.ckpt'
save_path_val = './project_models/modelv.ckpt'
factors = [2,3,4,5]
fusion_arr = [0.0,1.0]

In [90]:
checks = np.nonzero(interactions_train_orig)
checks_v = np.nonzero(interactions_test_orig)

In [92]:
#Euclidean Distance
best_val_loss = 1000.0
best_val_accuracy = 0.0
#FOCUS ON FACTOR 2
for x in range(1):
    print ('Factor ' + str(factors[x]))
    for f in range(1):  
        fusion = 1.0
        print ('FUSION: ' + str(fusion))
        best_train_loss = 1000.0
        #accur = 0
        model = MatrixFactorization(interactions_train_orig.shape[0], interactions_train_orig.shape[1], n_factors=factors[x])
        loss_func = torch.nn.MSELoss()
        reg_loss_func = torch.optim.Adam(model.parameters(), lr=1e-6, weight_decay=0.001)
        for i in range(5):
            print ("Epoch: " + str(i))
            running_loss = 0.0
            accur = 0
            for m, batch in enumerate(get_batch(batch_size, interactions_train_SIM_E)):
                reg_loss_func.zero_grad()
                interactions = Variable(torch.FloatTensor(interactions_train_SIM_E[batch,:].toarray()))
                rows = Variable(torch.LongTensor(batch))
                cols = Variable(torch.LongTensor(np.arange(interactions_train_SIM_E.shape[1])))
                first = (contexts_full[batch,:].toarray())*fusion
        
                contexts = Variable(torch.FloatTensor(first))
                #predictions = model(rows, cols)
                predictions = model(rows, cols, contexts)
        
                loss = loss_func(predictions, interactions)
                running_loss += np.sqrt(loss.data[0])*batch_size
                
                #correct = get_accuracy_test(interactions_train_orig, model, rows, fusion, contexts_full)
                correct = get_accuracy_context(interactions_train_SIM_E, interactions_train_orig, model, rows, fusion, contexts_full)
            #correct = get_accuracy(interactions_train_SIM_E, interactions_train_orig, model, rows)
                accur = accur + correct
                
                loss.backward()
            reg_loss_func.step()
    
            epoch_loss = running_loss/interactions_train_SIM_E.shape[0]
            print ('train_loss: ' + str(epoch_loss))
            
            print ('accuracy:' + str(accur/len(checks[0])))
            
            if epoch_loss < best_train_loss:
                best_train_loss = epoch_loss
                print ('checkpoint model with train loss: ' + str(best_train_loss))
                checkpoint_model(epoch_loss, model, save_path)
        
            #val_loss, val_accuracy = run_validation(model,interactions_test_orig, contexts_full,fusion, batch_size, save_path)
            val_loss, val_accuracy = run_validation_context(model, interactions_test_SIM_E, interactions_test_orig, contexts_full, fusion,batch_size, save_path)
            print ('val loss: ' + str(val_loss))
            val_percent = val_accuracy/len(checks_v[0])
            print ('val accuracy: ' + str(val_accuracy/len(checks_v[0])))
        
        
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                print ('checkpointing_model with val loss: ' + str(best_val_loss))
                checkpoint_model(val_loss, model, save_path_val)
            if val_percent > best_val_accuracy:
                best_val_accuracy = val_percent
                checkpoint_model(val_loss, model, save_path_val)
                print ('New Best Val Accuracy: ' + str(best_val_accuracy)+ ' with fusion: ' + str(fusion) + ' and factor :' + str(factors[x]))

Factor 2
FUSION: 1.0
Epoch: 0
train_loss: 3.8151748981124483
accuracy:0.43285939968404424
checkpoint model with train loss: 3.8151748981124483
val loss: 3.930516918778681
val accuracy: 0.4306569343065693
checkpointing_model with val loss: 3.930516918778681
New Best Val Accuracy: 0.4306569343065693 with fusion: 1.0 and factor :2
Epoch: 1
train_loss: 3.7943076296584204
accuracy:0.41232227488151657
checkpoint model with train loss: 3.7943076296584204
val loss: 3.939097655510871
val accuracy: 0.3759124087591241
Epoch: 2
train_loss: 3.823497646222488
accuracy:0.4470774091627172
val loss: 3.9348100172761478
val accuracy: 0.4233576642335766
Epoch: 3
train_loss: 3.80264606693316
accuracy:0.4060031595576619
val loss: 3.97464476063009
val accuracy: 0.40145985401459855
Epoch: 4
train_loss: 3.8345290558052905
accuracy:0.4423380726698262
val loss: 3.9327578391290103
val accuracy: 0.40145985401459855


In [761]:
#Euclidean Distance
#context_norm
best_val_loss = 1000.0
best_val_accuracy = 0.0
#FOCUS ON FACTOR 2
for x in range(1):
    print ('Factor ' + str(factors[x]))
    for f in range(len(fusion_arr)):  
        fusion = 0
        print ('FUSION: ' + str(fusion))
        best_train_loss = 1000.0
        #accur = 0
        model = MatrixFactorization(interactions_train_orig.shape[0], interactions_train_orig.shape[1], n_factors=factors[x])
        loss_func = torch.nn.MSELoss()
        reg_loss_func = torch.optim.Adam(model.parameters(), lr=1e-6, weight_decay=0.001)
        for i in range(5):
            print ("Epoch: " + str(i))
            running_loss = 0.0
            accur = 0
            for m, batch in enumerate(get_batch(batch_size, interactions_train_SIM_E)):
                reg_loss_func.zero_grad()
                interactions = Variable(torch.FloatTensor(interactions_train_SIM_E[batch,:].toarray()))
                rows = Variable(torch.LongTensor(batch))
                cols = Variable(torch.LongTensor(np.arange(interactions_train_SIM_E.shape[1])))
                first = (contexts_norm[batch,:].toarray())*fusion
                #print (first)
        
                contexts = Variable(torch.FloatTensor(first))
                #predictions = model(rows, cols)
                predictions = model(rows, cols, contexts)
        
                loss = loss_func(predictions, interactions)
                running_loss += np.sqrt(loss.data[0])*batch_size
                
                #correct = get_accuracy_test(interactions_train_orig, model, rows, fusion, contexts_full)
                correct = get_accuracy_context(interactions_train_SIM_E, interactions_train_orig, model, rows, fusion, contexts_norm)
            #correct = get_accuracy(interactions_train_SIM_E, interactions_train_orig, model, rows)
                accur = accur + correct
                
                loss.backward()
            reg_loss_func.step()
    
            epoch_loss = running_loss/interactions_train_SIM_E.shape[0]
            print ('train_loss: ' + str(epoch_loss))
            
            print ('accuracy:' + str(accur/len(checks[0])))
            
            if epoch_loss < best_train_loss:
                best_train_loss = epoch_loss
                print ('checkpoint model with train loss: ' + str(best_train_loss))
                checkpoint_model(epoch_loss, model, save_path)
        
            #val_loss, val_accuracy = run_validation(model,interactions_test_orig, contexts_full,fusion, batch_size, save_path)
            val_loss, val_accuracy = run_validation_context(model, interactions_test_SIM_E, interactions_test_orig, contexts_norm, fusion,batch_size, save_path)
            print ('val loss: ' + str(val_loss))
            val_percent = val_accuracy/len(checks_v[0])
            print ('val accuracy: ' + str(val_accuracy/len(checks_v[0])))
        
        
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                print ('checkpointing_model with val loss: ' + str(best_val_loss))
                checkpoint_model(val_loss, model, save_path_val)
            if val_percent > best_val_accuracy:
                best_val_accuracy = val_percent
                checkpoint_model(val_loss, model, save_path_val)
                print ('New Best Val Accuracy: ' + str(best_val_accuracy)+ ' with fusion: ' + str(fusion) + ' and factor :' + str(factors[x]))

Factor 2
FUSION: 0
Epoch: 0
train_loss: 1.4751177309534327
accuracy:0.4195583596214511
checkpoint model with train loss: 1.4751177309534327
val loss: 1.4956723884934258
val accuracy: 0.43223443223443225
checkpointing_model with val loss: 1.4956723884934258
New Best Val Accuracy: 0.43223443223443225 with fusion: 0 and factor :2
Epoch: 1
train_loss: 1.4881473243211203
accuracy:0.3832807570977918
val loss: 1.5161030360947874
val accuracy: 0.43956043956043955
New Best Val Accuracy: 0.43956043956043955 with fusion: 0 and factor :2
Epoch: 2
train_loss: 1.530080225960882
accuracy:0.40536277602523657
val loss: 1.5000531755389261
val accuracy: 0.3882783882783883
Epoch: 3
train_loss: 1.517843250059643
accuracy:0.3943217665615142
val loss: 1.50407163933605
val accuracy: 0.39194139194139194
Epoch: 4
train_loss: 1.5300708909035556
accuracy:0.37381703470031546
val loss: 1.492402444988429
val accuracy: 0.3882783882783883
checkpointing_model with val loss: 1.492402444988429
FUSION: 0
Epoch: 0
train_lo

In [102]:
col_names = ['User','Rec1', 'Rec1_Cuisines', 'Rec2', 'Rec2_Cuisines', 'Rec3', 
             'Rec3_Cuisines', 'Rec4','Rec4_Cuisines',  'Rec5', 'Rec5_Cuisines']

In [832]:
#Pearson Coefficient
best_val_loss = 1000.0
best_val_accuracy = 0.0
#FOCUS ON FACTOR 2
for x in range(1):
    print ('Factor ' + str(factors[x]))
    for f in range(1):  
        #fusion = fusion_arr[f]
        fusion = 1.0
        print ('FUSION: ' + str(fusion))
        best_train_loss = 1000.0
        #accur = 0
        model = BiasedMatrixFactorization(interactions_train_orig.shape[0], interactions_train_orig.shape[1], mean, n_factors=factors[x])
        loss_func = torch.nn.MSELoss()
        reg_loss_func = torch.optim.Adam(model.parameters(), lr=1e-6, weight_decay=0.001)
        for i in range(5):
            print ("Epoch: " + str(i))
            running_loss = 0.0
            accur = 0
            for m, batch in enumerate(get_batch(batch_size, interactions_train_SIM_P)):
                reg_loss_func.zero_grad()
                interactions = Variable(torch.FloatTensor(interactions_train_SIM_P[batch,:].toarray()))
                rows = Variable(torch.LongTensor(batch))
                cols = Variable(torch.LongTensor(np.arange(interactions_train_SIM_P.shape[1])))
                first = (contexts_full[batch,:].toarray())*fusion
        
                contexts = Variable(torch.FloatTensor(first))
                #predictions = model(rows, cols)
                predictions = model(rows, cols, contexts)
        
                loss = loss_func(predictions, interactions)
                running_loss += np.sqrt(loss.data[0])*batch_size
                
                #correct = get_accuracy_test(interactions_train_orig, model, rows, fusion, contexts_full)
                correct = get_accuracy_context(interactions_train_SIM_P, interactions_train_orig, model, rows, fusion, contexts_full)
            #correct = get_accuracy(interactions_train_SIM_E, interactions_train_orig, model, rows)
                accur = accur + correct
                
                loss.backward()
            reg_loss_func.step()
    
            epoch_loss = running_loss/interactions_train_SIM_P.shape[0]
            print ('train_loss: ' + str(epoch_loss))
            
            print ('accuracy:' + str(accur/len(checks[0])))
            
            if epoch_loss < best_train_loss:
                best_train_loss = epoch_loss
                print ('checkpoint model with train loss: ' + str(best_train_loss))
                checkpoint_model(epoch_loss, model, save_path)
        
            #val_loss, val_accuracy = run_validation(model,interactions_test_orig, contexts_full,fusion, batch_size, save_path)
            val_loss, val_accuracy = run_validation_context(model, interactions_test_SIM_P, interactions_test_orig, contexts_full, fusion,batch_size, save_path)
            print ('val loss: ' + str(val_loss))
            val_percent = val_accuracy/len(checks_v[0])
            print ('val accuracy: ' + str(val_accuracy/len(checks_v[0])))
        
        
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                print ('checkpointing_model with val loss: ' + str(best_val_loss))
                checkpoint_model(val_loss, model, save_path_val)
            if val_percent > best_val_accuracy:
                best_val_accuracy = val_percent
                checkpoint_model(val_loss, model, save_path_val)
                print ('New Best Val Accuracy: ' + str(best_val_accuracy)+ ' with fusion: ' + str(fusion) + ' and factor :' + str(factors[x]))

Factor 2
FUSION: 1.0
Epoch: 0
train_loss: 3.820659435960233
accuracy:0.47791798107255523
checkpoint model with train loss: 3.820659435960233
val loss: 4.023977354701263
val accuracy: 0.5201465201465202
checkpointing_model with val loss: 4.023977354701263
New Best Val Accuracy: 0.5201465201465202 with fusion: 1.0 and factor :2
Epoch: 1
train_loss: 3.862341302218013
accuracy:0.4605678233438486
val loss: 4.02268299870417
val accuracy: 0.45787545787545786
checkpointing_model with val loss: 4.02268299870417
Epoch: 2
train_loss: 3.8717096118867453
accuracy:0.4605678233438486
val loss: 4.088140383082364
val accuracy: 0.46153846153846156
Epoch: 3
train_loss: 3.865314164747259
accuracy:0.501577287066246
val loss: 4.064666958739246
val accuracy: 0.4468864468864469
Epoch: 4
train_loss: 3.8583422771996094
accuracy:0.4794952681388013
val loss: 4.092608682109769
val accuracy: 0.4542124542124542


In [98]:
recs = generate_predictions(model, contexts_full, interactions_full_orig, interactions_full_SIM_E, fusion, save_path_val)
results = give_top5_cuisines(recs, rest_profile, rest_cuisine, user_list, item_list)

In [114]:
give_number_of_contexts(recs, contexts_full, 'U1093', user_list,item_list)

[92]


[6.0, 6.0, 2.0, 6.0, 5.0]

In [115]:
give_number_of_contexts(recs, contexts_full, 'U1077', user_list,item_list)

[76]


[7.0, 5.0, 7.0, 6.0, 5.0]

In [118]:
df = pd.DataFrame(results,columns = col_names)

In [126]:
len(df['Rec5'].value_counts().index.tolist())

46

In [120]:
df[df['User']== 'U1093'].values

array([['U1093', 'Restaurant El Muladar de Calzada', '', 'Vips', '',
        'Restaurante y Pescaderia Tampico', 'Seafood, ',
        'El Rincón de San Francisco', 'Mexican, ', 'vips', 'Mexican, ']],
      dtype=object)

In [121]:
df[df['User']== 'U1077'].values

array([['U1077', 'vips', 'Mexican, ', 'cafe punta del cielo',
        'Cafeteria, ', 'El Rincón de San Francisco', 'Mexican, ',
        'Restaurant El Muladar de Calzada', '',
        'Restaurante de Mariscos la Langosta', 'Seafood, ']], dtype=object)

In [646]:
#Pearson 
#context_norm
best_val_loss = 1000.0
best_val_accuracy = 0.0

for x in range(1):
    print ('Factor ' + str(factors[x]))
    for f in range(len(fusion_arr)):  
        fusion = fusion_arr[f]
        print ('FUSION: ' + str(fusion))
        best_train_loss = 1000.0
        #accur = 0
        model = MatrixFactorization(interactions_train_orig.shape[0], interactions_train_orig.shape[1], n_factors=factors[x])
        loss_func = torch.nn.MSELoss()
        reg_loss_func = torch.optim.Adam(model.parameters(), lr=1e-6, weight_decay=0.001)
        for i in range(5):
            print ("Epoch: " + str(i))
            running_loss = 0.0
            accur = 0
            for m, batch in enumerate(get_batch(batch_size, interactions_train_SIM_P)):
                reg_loss_func.zero_grad()
                interactions = Variable(torch.FloatTensor(interactions_train_SIM_P[batch,:].toarray()))
                rows = Variable(torch.LongTensor(batch))
                cols = Variable(torch.LongTensor(np.arange(interactions_train_SIM_P.shape[1])))
                first = (contexts_norm[batch,:].toarray())*fusion
        
                contexts = Variable(torch.FloatTensor(first))
                #predictions = model(rows, cols)
                predictions = model(rows, cols, contexts)
        
                loss = loss_func(predictions, interactions)
                running_loss += np.sqrt(loss.data[0])*batch_size
                
                #correct = get_accuracy_test(interactions_train_orig, model, rows, fusion, contexts_full)
                correct = get_accuracy_context(interactions_train_SIM_P, interactions_train_orig, model, rows, fusion, contexts_norm)
            #correct = get_accuracy(interactions_train_SIM_E, interactions_train_orig, model, rows)
                accur = accur + correct
                
                loss.backward()
            reg_loss_func.step()
    
            epoch_loss = running_loss/interactions_train_SIM_P.shape[0]
            print ('train_loss: ' + str(epoch_loss))
            
            print ('accuracy:' + str(accur/len(checks[0])))
            
            if epoch_loss < best_train_loss:
                best_train_loss = epoch_loss
                print ('checkpoint model with train loss: ' + str(best_train_loss))
                checkpoint_model(epoch_loss, model, save_path)
        
            #val_loss, val_accuracy = run_validation(model,interactions_test_orig, contexts_full,fusion, batch_size, save_path)
            val_loss, val_accuracy = run_validation_context(model, interactions_test_SIM_P, interactions_test_orig, contexts_norm, fusion,batch_size, save_path)
            print ('val loss: ' + str(val_loss))
            val_percent = val_accuracy/len(checks_v[0])
            print ('val accuracy: ' + str(val_accuracy/len(checks_v[0])))
        
        
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                print ('checkpointing_model with val loss: ' + str(best_val_loss))
                checkpoint_model(val_loss, model, save_path_val)
            if val_percent > best_val_accuracy:
                best_val_accuracy = val_percent
                checkpoint_model(val_loss, model, save_path_val)
                print ('New Best Val Accuracy: ' + str(best_val_accuracy)+ ' with fusion: ' + str(fusion) + ' and factor :' + str(factors[x]))

Factor 2
FUSION: 0.0
Epoch: 0
train_loss: 1.9097777606599595
accuracy:0.37066246056782337
checkpoint model with train loss: 1.9097777606599595
val loss: 1.7664581341810683
val accuracy: 0.3663003663003663
checkpointing_model with val loss: 1.7664581341810683
New Best Val Accuracy: 0.3663003663003663 with fusion: 0.0 and factor :2
Epoch: 1
train_loss: 1.8995913041085437
accuracy:0.4085173501577287
checkpoint model with train loss: 1.8995913041085437
val loss: 1.755335653178724
val accuracy: 0.4358974358974359
checkpointing_model with val loss: 1.755335653178724
New Best Val Accuracy: 0.4358974358974359 with fusion: 0.0 and factor :2
Epoch: 2
train_loss: 1.9076858343906742
accuracy:0.3943217665615142
val loss: 1.7383997527298298
val accuracy: 0.42124542124542125
checkpointing_model with val loss: 1.7383997527298298
Epoch: 3
train_loss: 1.9083596851535243
accuracy:0.42586750788643535
val loss: 1.757681261423319
val accuracy: 0.41025641025641024
Epoch: 4
train_loss: 1.8669805415957454
accu

In [111]:
mean = np.mean(interactions_train_orig)
save_path_b = './project_models/bmodel.ckpt'
save_path_val_b = './project_models/bmodelv.ckpt'

In [112]:
#Euclidean Distance
best_val_loss = 1000.0
best_val_accuracy = 0.0
#FOCUS ON FACTOR 2
for x in range(1):
    print ('Factor ' + str(factors[x]))
    for f in range(len(fusion_arr)):  
        fusion = 1.0
        print ('FUSION: ' + str(fusion))
        best_train_loss = 1000.0
        #accur = 0
        model = BiasedMatrixFactorization(interactions_train_orig.shape[0], interactions_train_orig.shape[1],mean, n_factors=factors[x])
        loss_func = torch.nn.MSELoss()
        reg_loss_func = torch.optim.Adam(model.parameters(), lr=1e-6, weight_decay=0.001)
        for i in range(5):
            print ("Epoch: " + str(i))
            running_loss = 0.0
            accur = 0
            for m, batch in enumerate(get_batch(batch_size, interactions_train_SIM_E)):
                reg_loss_func.zero_grad()
                interactions = Variable(torch.FloatTensor(interactions_train_SIM_E[batch,:].toarray()))
                rows = Variable(torch.LongTensor(batch))
                cols = Variable(torch.LongTensor(np.arange(interactions_train_SIM_E.shape[1])))
                first = (contexts_full[batch,:].toarray())*fusion
        
                contexts = Variable(torch.FloatTensor(first))
                #predictions = model(rows, cols)
                predictions = model(rows, cols, contexts)
        
                loss = loss_func(predictions, interactions)
                running_loss += np.sqrt(loss.data[0])*batch_size
                
                #correct = get_accuracy_test(interactions_train_orig, model, rows, fusion, contexts_full)
                correct = get_accuracy_context(interactions_train_SIM_E, interactions_train_orig, model, rows, fusion, contexts_full)
            #correct = get_accuracy(interactions_train_SIM_E, interactions_train_orig, model, rows)
                accur = accur + correct
                
                loss.backward()
            reg_loss_func.step()
    
            epoch_loss = running_loss/interactions_train_SIM_E.shape[0]
            print ('train_loss: ' + str(epoch_loss))
            
            print ('accuracy:' + str(accur/len(checks[0])))
            
            if epoch_loss < best_train_loss:
                best_train_loss = epoch_loss
                print ('checkpoint model with train loss: ' + str(best_train_loss))
                checkpoint_model(epoch_loss, model, save_path_b)
        
            #val_loss, val_accuracy = run_validation(model,interactions_test_orig, contexts_full,fusion, batch_size, save_path)
            val_loss, val_accuracy = run_validation_context(model, interactions_test_SIM_E, interactions_test_orig, contexts_full, fusion,batch_size, save_path_b)
            print ('val loss: ' + str(val_loss))
            val_percent = val_accuracy/len(checks_v[0])
            print ('val accuracy: ' + str(val_accuracy/len(checks_v[0])))
        
        
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                print ('checkpointing_model with val loss: ' + str(best_val_loss))
                checkpoint_model(val_loss, model, save_path_val_b)
            if val_percent > best_val_accuracy:
                best_val_accuracy = val_percent
                checkpoint_model(val_loss, model, save_path_val_b)
                print ('New Best Val Accuracy: ' + str(best_val_accuracy)+ ' with fusion: ' + str(fusion) + ' and factor :' + str(factors[x]))

Factor 2
FUSION: 1.0
Epoch: 0
train_loss: 3.937960452319388
accuracy:0.43759873617693523
checkpoint model with train loss: 3.937960452319388
val loss: 4.104613756372536
val accuracy: 0.4306569343065693
checkpointing_model with val loss: 4.104613756372536
New Best Val Accuracy: 0.4306569343065693 with fusion: 1.0 and factor :2
Epoch: 1
train_loss: 3.9685611770404816
accuracy:0.4312796208530806
val loss: 4.050210139011982
val accuracy: 0.4489051094890511
checkpointing_model with val loss: 4.050210139011982
New Best Val Accuracy: 0.4489051094890511 with fusion: 1.0 and factor :2
Epoch: 2
train_loss: 3.8947109154080612
accuracy:0.40916271721958924
checkpoint model with train loss: 3.8947109154080612
val loss: 4.11121239502249
val accuracy: 0.4927007299270073
New Best Val Accuracy: 0.4927007299270073 with fusion: 1.0 and factor :2
Epoch: 3
train_loss: 3.982972099040826
accuracy:0.4028436018957346
val loss: 4.073445658255646
val accuracy: 0.48905109489051096
Epoch: 4
train_loss: 3.9454013677

In [113]:
recs = generate_predictions(model, contexts_full, interactions_full_orig, interactions_full_SIM_E, fusion, save_path_val_b)
results = give_top5_cuisines(recs, rest_profile, rest_cuisine, user_list, item_list)

In [689]:
#Pearson Coefficient
best_val_loss = 1000.0
best_val_accuracy = 0.0

for x in range(1):
    print ('Factor ' + str(factors[x]))
    for f in range(len(fusion_arr)):  
        fusion = fusion_arr[f]
        print ('FUSION: ' + str(fusion))
        best_train_loss = 1000.0
        #accur = 0
        model = BiasedMatrixFactorization(interactions_train_orig.shape[0], interactions_train_orig.shape[1],mean, n_factors=factors[x])
        loss_func = torch.nn.MSELoss()
        reg_loss_func = torch.optim.Adam(model.parameters(), lr=1e-6, weight_decay=0.001)
        for i in range(5):
            print ("Epoch: " + str(i))
            running_loss = 0.0
            accur = 0
            for m, batch in enumerate(get_batch(batch_size, interactions_train_SIM_P)):
                reg_loss_func.zero_grad()
                interactions = Variable(torch.FloatTensor(interactions_train_SIM_P[batch,:].toarray()))
                rows = Variable(torch.LongTensor(batch))
                cols = Variable(torch.LongTensor(np.arange(interactions_train_SIM_P.shape[1])))
                first = (contexts_full[batch,:].toarray())*fusion
        
                contexts = Variable(torch.FloatTensor(first))
                #predictions = model(rows, cols)
                predictions = model(rows, cols, contexts)
        
                loss = loss_func(predictions, interactions)
                running_loss += np.sqrt(loss.data[0])*batch_size
                
                #correct = get_accuracy_test(interactions_train_orig, model, rows, fusion, contexts_full)
                correct = get_accuracy_context(interactions_train_SIM_P, interactions_train_orig, model, rows, fusion, contexts_full)
            #correct = get_accuracy(interactions_train_SIM_E, interactions_train_orig, model, rows)
                accur = accur + correct
                
                loss.backward()
            reg_loss_func.step()
    
            epoch_loss = running_loss/interactions_train_SIM_P.shape[0]
            print ('train_loss: ' + str(epoch_loss))
            
            print ('accuracy:' + str(accur/len(checks[0])))
            
            if epoch_loss < best_train_loss:
                best_train_loss = epoch_loss
                print ('checkpoint model with train loss: ' + str(best_train_loss))
                checkpoint_model(epoch_loss, model, save_path_b)
        
            #val_loss, val_accuracy = run_validation(model,interactions_test_orig, contexts_full,fusion, batch_size, save_path)
            val_loss, val_accuracy = run_validation_context(model, interactions_test_SIM_P, interactions_test_orig, contexts_full, fusion,batch_size, save_path_b)
            print ('val loss: ' + str(val_loss))
            val_percent = val_accuracy/len(checks_v[0])
            print ('val accuracy: ' + str(val_accuracy/len(checks_v[0])))
        
        
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                print ('checkpointing_model with val loss: ' + str(best_val_loss))
                checkpoint_model(val_loss, model, save_path_val_b)
            if val_percent > best_val_accuracy:
                best_val_accuracy = val_percent
                checkpoint_model(val_loss, model, save_path_val_b)
                print ('New Best Val Accuracy: ' + str(best_val_accuracy)+ ' with fusion: ' + str(fusion) + ' and factor :' + str(factors[x]))

Factor 2
FUSION: 0.0
Epoch: 0
train_loss: 2.002863560526983
accuracy:0.46845425867507884
checkpoint model with train loss: 2.002863560526983
val loss: 1.9439555707660658
val accuracy: 0.5091575091575091
checkpointing_model with val loss: 1.9439555707660658
New Best Val Accuracy: 0.5091575091575091 with fusion: 0.0 and factor :2
Epoch: 1
train_loss: 1.9924860288800985
accuracy:0.44637223974763407
checkpoint model with train loss: 1.9924860288800985
val loss: 1.9273554510875326
val accuracy: 0.4945054945054945
checkpointing_model with val loss: 1.9273554510875326
Epoch: 2
train_loss: 2.006573524603645
accuracy:0.4794952681388013
val loss: 1.9424919387049477
val accuracy: 0.4468864468864469
Epoch: 3
train_loss: 2.004109039709521
accuracy:0.49369085173501576
val loss: 1.936815102128787
val accuracy: 0.4652014652014652
Epoch: 4
train_loss: 1.9804826683995744
accuracy:0.4668769716088328
checkpoint model with train loss: 1.9804826683995744
val loss: 1.9088662272715131
val accuracy: 0.48351648

In [729]:
#Euclidean Distance
#context norm
best_val_loss = 1000.0
best_val_accuracy = 0.0
#FOCUS ON FACTOR 2
for x in range(1):
    print ('Factor ' + str(factors[x]))
    for f in range(len(fusion_arr)):  
        fusion = fusion_arr[f]
        print ('FUSION: ' + str(fusion))
        best_train_loss = 1000.0
        #accur = 0
        model = BiasedMatrixFactorization(interactions_train_orig.shape[0], interactions_train_orig.shape[1],mean, n_factors=factors[x])
        loss_func = torch.nn.MSELoss()
        reg_loss_func = torch.optim.Adam(model.parameters(), lr=1e-6, weight_decay=0.001)
        for i in range(5):
            print ("Epoch: " + str(i))
            running_loss = 0.0
            accur = 0
            for m, batch in enumerate(get_batch(batch_size, interactions_train_SIM_E)):
                reg_loss_func.zero_grad()
                interactions = Variable(torch.FloatTensor(interactions_train_SIM_E[batch,:].toarray()))
                rows = Variable(torch.LongTensor(batch))
                cols = Variable(torch.LongTensor(np.arange(interactions_train_SIM_E.shape[1])))
                first = (contexts_norm[batch,:].toarray())*fusion
        
                contexts = Variable(torch.FloatTensor(first))
                #predictions = model(rows, cols)
                predictions = model(rows, cols, contexts)
        
                loss = loss_func(predictions, interactions)
                running_loss += np.sqrt(loss.data[0])*batch_size
                
                #correct = get_accuracy_test(interactions_train_orig, model, rows, fusion, contexts_full)
                correct = get_accuracy_context(interactions_train_SIM_E, interactions_train_orig, model, rows, fusion, contexts_norm)
            #correct = get_accuracy(interactions_train_SIM_E, interactions_train_orig, model, rows)
                accur = accur + correct
                
                loss.backward()
            reg_loss_func.step()
    
            epoch_loss = running_loss/interactions_train_SIM_E.shape[0]
            print ('train_loss: ' + str(epoch_loss))
            
            print ('accuracy:' + str(accur/len(checks[0])))
            
            if epoch_loss < best_train_loss:
                best_train_loss = epoch_loss
                print ('checkpoint model with train loss: ' + str(best_train_loss))
                checkpoint_model(epoch_loss, model, save_path_b)
        
            #val_loss, val_accuracy = run_validation(model,interactions_test_orig, contexts_full,fusion, batch_size, save_path)
            val_loss, val_accuracy = run_validation_context(model, interactions_test_SIM_E, interactions_test_orig, contexts_norm, fusion,batch_size, save_path_b)
            print ('val loss: ' + str(val_loss))
            val_percent = val_accuracy/len(checks_v[0])
            print ('val accuracy: ' + str(val_accuracy/len(checks_v[0])))
        
        
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                print ('checkpointing_model with val loss: ' + str(best_val_loss))
                checkpoint_model(val_loss, model, save_path_val_b)
            if val_percent > best_val_accuracy:
                best_val_accuracy = val_percent
                checkpoint_model(val_loss, model, save_path_val_b)
                print ('New Best Val Accuracy: ' + str(best_val_accuracy)+ ' with fusion: ' + str(fusion) + ' and factor :' + str(factors[x]))

Factor 2
FUSION: 0.0
Epoch: 0
train_loss: 2.4029043794643496
accuracy:0.44637223974763407
checkpoint model with train loss: 2.4029043794643496
val loss: 2.3402342688550526
val accuracy: 0.3956043956043956
checkpointing_model with val loss: 2.3402342688550526
New Best Val Accuracy: 0.3956043956043956 with fusion: 0.0 and factor :2
Epoch: 1
train_loss: 2.397502189143252
accuracy:0.4227129337539432
checkpoint model with train loss: 2.397502189143252
val loss: 2.3444545780547386
val accuracy: 0.3553113553113553
Epoch: 2
train_loss: 2.3906152354635397
accuracy:0.40063091482649843
checkpoint model with train loss: 2.3906152354635397
val loss: 2.324585135799029
val accuracy: 0.4139194139194139
checkpointing_model with val loss: 2.324585135799029
New Best Val Accuracy: 0.4139194139194139 with fusion: 0.0 and factor :2
Epoch: 3
train_loss: 2.398682487261816
accuracy:0.4195583596214511
val loss: 2.3423919226331797
val accuracy: 0.43223443223443225
New Best Val Accuracy: 0.43223443223443225 with 

In [741]:
#Pearson Coefficient
#context norm
best_val_loss = 1000.0
best_val_accuracy = 0.0

for x in range(1):
    print ('Factor ' + str(factors[x]))
    for f in range(len(fusion_arr)):  
        fusion = fusion_arr[f]
        print ('FUSION: ' + str(fusion))
        best_train_loss = 1000.0
        #accur = 0
        model = BiasedMatrixFactorization(interactions_train_orig.shape[0], interactions_train_orig.shape[1],mean, n_factors=factors[x])
        loss_func = torch.nn.MSELoss()
        reg_loss_func = torch.optim.Adam(model.parameters(), lr=1e-6, weight_decay=0.001)
        for i in range(5):
            print ("Epoch: " + str(i))
            running_loss = 0.0
            accur = 0
            for m, batch in enumerate(get_batch(batch_size, interactions_train_SIM_P)):
                reg_loss_func.zero_grad()
                interactions = Variable(torch.FloatTensor(interactions_train_SIM_P[batch,:].toarray()))
                rows = Variable(torch.LongTensor(batch))
                cols = Variable(torch.LongTensor(np.arange(interactions_train_SIM_P.shape[1])))
                first = (contexts_norm[batch,:].toarray())*fusion
        
                contexts = Variable(torch.FloatTensor(first))
                #predictions = model(rows, cols)
                predictions = model(rows, cols, contexts)
        
                loss = loss_func(predictions, interactions)
                running_loss += np.sqrt(loss.data[0])*batch_size
                
                #correct = get_accuracy_test(interactions_train_orig, model, rows, fusion, contexts_full)
                correct = get_accuracy_context(interactions_train_SIM_P, interactions_train_orig, model, rows, fusion, contexts_norm)
            #correct = get_accuracy(interactions_train_SIM_E, interactions_train_orig, model, rows)
                accur = accur + correct
                
                loss.backward()
            reg_loss_func.step()
    
            epoch_loss = running_loss/interactions_train_SIM_P.shape[0]
            print ('train_loss: ' + str(epoch_loss))
            
            print ('accuracy:' + str(accur/len(checks[0])))
            
            if epoch_loss < best_train_loss:
                best_train_loss = epoch_loss
                print ('checkpoint model with train loss: ' + str(best_train_loss))
                checkpoint_model(epoch_loss, model, save_path_b)
        
            #val_loss, val_accuracy = run_validation(model,interactions_test_orig, contexts_full,fusion, batch_size, save_path)
            val_loss, val_accuracy = run_validation_context(model, interactions_test_SIM_P, interactions_test_orig, contexts_norm, fusion,batch_size, save_path_b)
            print ('val loss: ' + str(val_loss))
            val_percent = val_accuracy/len(checks_v[0])
            print ('val accuracy: ' + str(val_accuracy/len(checks_v[0])))
        
        
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                print ('checkpointing_model with val loss: ' + str(best_val_loss))
                checkpoint_model(val_loss, model, save_path_val_b)
            if val_percent > best_val_accuracy:
                best_val_accuracy = val_percent
                checkpoint_model(val_loss, model, save_path_val_b)
                print ('New Best Val Accuracy: ' + str(best_val_accuracy)+ ' with fusion: ' + str(fusion) + ' and factor :' + str(factors[x]))

Factor 2
FUSION: 0.0
Epoch: 0
train_loss: 2.081429011434301
accuracy:0.32018927444794953
checkpoint model with train loss: 2.081429011434301
val loss: 2.0440156789468977
val accuracy: 0.32234432234432236
checkpointing_model with val loss: 2.0440156789468977
New Best Val Accuracy: 0.32234432234432236 with fusion: 0.0 and factor :2
Epoch: 1
train_loss: 2.1095082961199005
accuracy:0.3501577287066246
val loss: 2.031054048921989
val accuracy: 0.2600732600732601
checkpointing_model with val loss: 2.031054048921989
Epoch: 2
train_loss: 2.0833406844326126
accuracy:0.3264984227129338
val loss: 2.062031146787092
val accuracy: 0.3333333333333333
New Best Val Accuracy: 0.3333333333333333 with fusion: 0.0 and factor :2
Epoch: 3
train_loss: 2.101612329808129
accuracy:0.33753943217665616
val loss: 2.0423580262184355
val accuracy: 0.3516483516483517
New Best Val Accuracy: 0.3516483516483517 with fusion: 0.0 and factor :2
Epoch: 4
train_loss: 2.096481106798703
accuracy:0.3643533123028391
val loss: 2.07